sah.fit
Description
This module contains functions for fitting and analyzing spectral data.
Index
plateau() |
Fit the mean value and the error of a plateau |
area_under_peak() |
Calculate the area under a given peak |
ratio_areas() |
Check the ratio between two areas |
mean() |
Get the mean and standard deviation of a list of values |
1""" 2# Description 3 4This module contains functions for fitting and analyzing spectral data. 5 6 7# Index 8 9| | | 10| --- | --- | 11| `plateau()` | Fit the mean value and the error of a plateau | 12| `area_under_peak()` | Calculate the area under a given peak | 13| `ratio_areas()` | Check the ratio between two areas | 14| `mean()` | Get the mean and standard deviation of a list of values | 15 16--- 17""" 18 19 20import math 21import scipy 22import numpy as np 23from copy import deepcopy 24from .classes import Spectra 25import aton.alias as alias 26 27 28def plateau( 29 spectra:Spectra, 30 cuts=None, 31 df_index:int=0 32 ) -> tuple: 33 """Fit the mean value and the error of a plateau in a `sah.classes.Spectra` object. 34 35 Use as `sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0)`. 36 37 If `sah.classes.Spectra.dfs[df_index]` has an 'Error' column, those errors are also taken into account 38 along with the standard deviation of the mean, else only the standard deviation is considered. 39 This is the case if your dataset had a third column with the errors 40 when you imported the `sah.classes.Spectra` object. 41 42 Note that `cuts`, `low_cut` and/or `top_cut` can be set to None. 43 """ 44 df = deepcopy(spectra.dfs[df_index]) 45 if isinstance(cuts, list): 46 low_cut = cuts[0] 47 if len(cuts) > 1: 48 top_cut = cuts[1] 49 else: 50 top_cut = None 51 elif isinstance(cuts, float): # If cuts is a float, it is considered as low_cut 52 low_cut = cuts 53 top_cut = None 54 elif cuts is None: 55 low_cut = None 56 top_cut = None 57 else: 58 raise ValueError("plateau: cuts must be a float for the low_cut, or a list") 59 if low_cut is not None: 60 df = df[df[df.columns[0]] >= low_cut] 61 if top_cut is not None: 62 df = df[df[df.columns[0]] <= top_cut] 63 mean = df[df.columns[1]].mean() 64 std_mean = df[df.columns[1]].std() 65 error_column = next((col for col in alias.files['error'] if col in df.columns), None) # Get the error column title 66 if error_column: 67 errors = df[error_column].to_numpy() 68 std_data = np.sqrt(np.sum(errors**2)) / len(errors) 69 std = np.sqrt(std_data**2 + std_mean**2) 70 else: 71 std = std_mean 72 return mean, std 73 74 75def area_under_peak( 76 spectra:Spectra, 77 peak:list, 78 df_index:int=0, 79 errors_as_in_baseline:bool=True, 80 min_as_baseline:bool=False 81 ) -> tuple: 82 """Calculate the area under a given peak. 83 84 Peaks must be defined as `peak:list=[xmin, xmax, baseline=0, baseline_error=0]`. 85 If the dataset has no `Error` column, the error for each point is assumed to be the same 86 as the baseline error if `errors_as_in_baseline=True`, otherwise it is assumed to be zero. 87 If `min_as_baseline=True` and `baseline=0`, the baseline is assumed to be the minimum value. 88 Also, if `min_as_baseline=True` and there are negative areas even after applying the baseline, 89 the baseline will be corrected to the minimum value. 90 """ 91 if len(peak) < 2: 92 raise ValueError("area_under_peak: peak must have at least two values: [xmin, xmax]") 93 xmin = peak[0] 94 xmax = peak[1] 95 baseline = peak[2] if len(peak) >= 3 else 0.0 96 baseline_error = peak[3] if len(peak) >= 4 else 0.0 97 98 df = deepcopy(spectra.dfs[df_index]) 99 df_range = df[(df[df.columns[0]] >= xmin) & (df[df.columns[0]] <= xmax)] 100 x = df_range[df.columns[0]].to_numpy() 101 y = df_range[df.columns[1]].to_numpy() 102 103 min_y = y.min() 104 if min_as_baseline and (baseline == 0 or baseline > min_y): 105 baseline = min_y 106 107 y = y - baseline 108 109 area = scipy.integrate.simpson(y, x=x) 110 111 # Get the error column title 112 error_column = next((col for col in df.columns if col.lower() in alias.files['error']), None) 113 if error_column: 114 point_errors = df_range[error_column].to_numpy() 115 else: # Assume the error in each point is the same as the baseline error 116 if errors_as_in_baseline == True: 117 point_errors = np.full_like(y, baseline_error) 118 else: 119 point_errors = np.zeros_like(y) 120 total_errors = np.sqrt(point_errors**2 + baseline_error**2) 121 area_error = np.sqrt(scipy.integrate.simpson(total_errors**2, x=x)) 122 123 return area, area_error 124 125 126def ratio_areas( 127 area:float, 128 area_total:float, 129 area_error:float=0.0, 130 area_total_error:float=0.0, 131 inverse_ratio:bool=False 132 ) -> tuple: 133 """Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data. 134 135 The ratio is calculated as `area / area_total`. This behavior is modified if `inverse_ratio = True`, 136 so that the ratio is calculated as `(area_total - area) / area_total`. 137 Note that changing the ratio calculation also affects the error propagation. 138 """ 139 if inverse_ratio: 140 ratio = (area_total - area) / area_total 141 if ratio != 0.0: 142 ratio_error = abs(ratio) * np.sqrt((np.sqrt(area_total_error**2 + area_error**2) / (area_total - area))**2 + (area_total_error / area_total)**2) 143 else: 144 ratio_error = None 145 else: 146 ratio = area / area_total 147 if ratio != 0.0: 148 ratio_error = abs(ratio) * np.sqrt((area_error / area)**2 + (area_total_error / area_total)**2) 149 else: 150 ratio_error = None 151 152 return ratio, ratio_error 153 154 155def mean( 156 array:list, 157 rounded:bool=True, 158 degrees_of_freedom=0 159 ) -> tuple: 160 """Takes an `array` of numerical values and returns the mean and standard deviation. 161 162 It is calculated with numpy as:\n 163 $\\sigma_{x}=\\sqrt{\\frac{\\sum{(x_{i}-{\\overline{x}})^2}}{N-\\text{ddof}}}$\n 164 where ddof are the delta `degrees_of_freedom`, zero by default. 165 Set it to `1` for a corrected sample standard deviation (low N cases), 166 see more details [here](https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation).\n 167 The mean is rounded up to the order of the error by default. To override this behaviour, set `rounded=False`. 168 """ 169 if not all(isinstance(x, (int, float, np.ndarray)) for x in array): 170 raise ValueError("mean_std(list) requires numerical values (int, float, or numpy.ndarray).") 171 data = np.asarray(array) 172 mean = float(data.mean()) 173 error = float(data.std(ddof=degrees_of_freedom)) 174 if not rounded or error == 0: 175 return mean, error 176 exponent = int(math.floor(math.log10(abs(error)))) 177 first_three_digits = int(100*abs(error) / 10**exponent) 178 if 104 < first_three_digits < 195: 179 exponent -= 1 180 rounded_mean = round(mean, -exponent) 181 rounded_error = round(error, -exponent) 182 return rounded_mean, rounded_error
29def plateau( 30 spectra:Spectra, 31 cuts=None, 32 df_index:int=0 33 ) -> tuple: 34 """Fit the mean value and the error of a plateau in a `sah.classes.Spectra` object. 35 36 Use as `sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0)`. 37 38 If `sah.classes.Spectra.dfs[df_index]` has an 'Error' column, those errors are also taken into account 39 along with the standard deviation of the mean, else only the standard deviation is considered. 40 This is the case if your dataset had a third column with the errors 41 when you imported the `sah.classes.Spectra` object. 42 43 Note that `cuts`, `low_cut` and/or `top_cut` can be set to None. 44 """ 45 df = deepcopy(spectra.dfs[df_index]) 46 if isinstance(cuts, list): 47 low_cut = cuts[0] 48 if len(cuts) > 1: 49 top_cut = cuts[1] 50 else: 51 top_cut = None 52 elif isinstance(cuts, float): # If cuts is a float, it is considered as low_cut 53 low_cut = cuts 54 top_cut = None 55 elif cuts is None: 56 low_cut = None 57 top_cut = None 58 else: 59 raise ValueError("plateau: cuts must be a float for the low_cut, or a list") 60 if low_cut is not None: 61 df = df[df[df.columns[0]] >= low_cut] 62 if top_cut is not None: 63 df = df[df[df.columns[0]] <= top_cut] 64 mean = df[df.columns[1]].mean() 65 std_mean = df[df.columns[1]].std() 66 error_column = next((col for col in alias.files['error'] if col in df.columns), None) # Get the error column title 67 if error_column: 68 errors = df[error_column].to_numpy() 69 std_data = np.sqrt(np.sum(errors**2)) / len(errors) 70 std = np.sqrt(std_data**2 + std_mean**2) 71 else: 72 std = std_mean 73 return mean, std
Fit the mean value and the error of a plateau in a sah.classes.Spectra
object.
Use as sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0)
.
If sah.classes.Spectra.dfs[df_index]
has an 'Error' column, those errors are also taken into account
along with the standard deviation of the mean, else only the standard deviation is considered.
This is the case if your dataset had a third column with the errors
when you imported the sah.classes.Spectra
object.
Note that cuts
, low_cut
and/or top_cut
can be set to None.
76def area_under_peak( 77 spectra:Spectra, 78 peak:list, 79 df_index:int=0, 80 errors_as_in_baseline:bool=True, 81 min_as_baseline:bool=False 82 ) -> tuple: 83 """Calculate the area under a given peak. 84 85 Peaks must be defined as `peak:list=[xmin, xmax, baseline=0, baseline_error=0]`. 86 If the dataset has no `Error` column, the error for each point is assumed to be the same 87 as the baseline error if `errors_as_in_baseline=True`, otherwise it is assumed to be zero. 88 If `min_as_baseline=True` and `baseline=0`, the baseline is assumed to be the minimum value. 89 Also, if `min_as_baseline=True` and there are negative areas even after applying the baseline, 90 the baseline will be corrected to the minimum value. 91 """ 92 if len(peak) < 2: 93 raise ValueError("area_under_peak: peak must have at least two values: [xmin, xmax]") 94 xmin = peak[0] 95 xmax = peak[1] 96 baseline = peak[2] if len(peak) >= 3 else 0.0 97 baseline_error = peak[3] if len(peak) >= 4 else 0.0 98 99 df = deepcopy(spectra.dfs[df_index]) 100 df_range = df[(df[df.columns[0]] >= xmin) & (df[df.columns[0]] <= xmax)] 101 x = df_range[df.columns[0]].to_numpy() 102 y = df_range[df.columns[1]].to_numpy() 103 104 min_y = y.min() 105 if min_as_baseline and (baseline == 0 or baseline > min_y): 106 baseline = min_y 107 108 y = y - baseline 109 110 area = scipy.integrate.simpson(y, x=x) 111 112 # Get the error column title 113 error_column = next((col for col in df.columns if col.lower() in alias.files['error']), None) 114 if error_column: 115 point_errors = df_range[error_column].to_numpy() 116 else: # Assume the error in each point is the same as the baseline error 117 if errors_as_in_baseline == True: 118 point_errors = np.full_like(y, baseline_error) 119 else: 120 point_errors = np.zeros_like(y) 121 total_errors = np.sqrt(point_errors**2 + baseline_error**2) 122 area_error = np.sqrt(scipy.integrate.simpson(total_errors**2, x=x)) 123 124 return area, area_error
Calculate the area under a given peak.
Peaks must be defined as peak:list=[xmin, xmax, baseline=0, baseline_error=0]
.
If the dataset has no Error
column, the error for each point is assumed to be the same
as the baseline error if errors_as_in_baseline=True
, otherwise it is assumed to be zero.
If min_as_baseline=True
and baseline=0
, the baseline is assumed to be the minimum value.
Also, if min_as_baseline=True
and there are negative areas even after applying the baseline,
the baseline will be corrected to the minimum value.
127def ratio_areas( 128 area:float, 129 area_total:float, 130 area_error:float=0.0, 131 area_total_error:float=0.0, 132 inverse_ratio:bool=False 133 ) -> tuple: 134 """Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data. 135 136 The ratio is calculated as `area / area_total`. This behavior is modified if `inverse_ratio = True`, 137 so that the ratio is calculated as `(area_total - area) / area_total`. 138 Note that changing the ratio calculation also affects the error propagation. 139 """ 140 if inverse_ratio: 141 ratio = (area_total - area) / area_total 142 if ratio != 0.0: 143 ratio_error = abs(ratio) * np.sqrt((np.sqrt(area_total_error**2 + area_error**2) / (area_total - area))**2 + (area_total_error / area_total)**2) 144 else: 145 ratio_error = None 146 else: 147 ratio = area / area_total 148 if ratio != 0.0: 149 ratio_error = abs(ratio) * np.sqrt((area_error / area)**2 + (area_total_error / area_total)**2) 150 else: 151 ratio_error = None 152 153 return ratio, ratio_error
Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data.
The ratio is calculated as area / area_total
. This behavior is modified if inverse_ratio = True
,
so that the ratio is calculated as (area_total - area) / area_total
.
Note that changing the ratio calculation also affects the error propagation.
156def mean( 157 array:list, 158 rounded:bool=True, 159 degrees_of_freedom=0 160 ) -> tuple: 161 """Takes an `array` of numerical values and returns the mean and standard deviation. 162 163 It is calculated with numpy as:\n 164 $\\sigma_{x}=\\sqrt{\\frac{\\sum{(x_{i}-{\\overline{x}})^2}}{N-\\text{ddof}}}$\n 165 where ddof are the delta `degrees_of_freedom`, zero by default. 166 Set it to `1` for a corrected sample standard deviation (low N cases), 167 see more details [here](https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation).\n 168 The mean is rounded up to the order of the error by default. To override this behaviour, set `rounded=False`. 169 """ 170 if not all(isinstance(x, (int, float, np.ndarray)) for x in array): 171 raise ValueError("mean_std(list) requires numerical values (int, float, or numpy.ndarray).") 172 data = np.asarray(array) 173 mean = float(data.mean()) 174 error = float(data.std(ddof=degrees_of_freedom)) 175 if not rounded or error == 0: 176 return mean, error 177 exponent = int(math.floor(math.log10(abs(error)))) 178 first_three_digits = int(100*abs(error) / 10**exponent) 179 if 104 < first_three_digits < 195: 180 exponent -= 1 181 rounded_mean = round(mean, -exponent) 182 rounded_error = round(error, -exponent) 183 return rounded_mean, rounded_error
Takes an array
of numerical values and returns the mean and standard deviation.
It is calculated with numpy as:
$\sigma_{x}=\sqrt{\frac{\sum{(x_{i}-{\overline{x}})^2}}{N-\text{ddof}}}$
where ddof are the delta degrees_of_freedom
, zero by default.
Set it to 1
for a corrected sample standard deviation (low N cases),
see more details here.
The mean is rounded up to the order of the error by default. To override this behaviour, set rounded=False
.