sah.fit

Description

This module contains functions for fitting and analyzing spectral data.

Index

plateau() Fit the mean value and the error of a plateau
area_under_peak() Calculate the area under a given peak
ratio_areas() Check the ratio between two areas
mean() Get the mean and standard deviation of a list of values

  1"""
  2# Description
  3
  4This module contains functions for fitting and analyzing spectral data.
  5
  6
  7# Index
  8
  9| | |
 10| --- | --- |
 11| `plateau()`         | Fit the mean value and the error of a plateau |
 12| `area_under_peak()` | Calculate the area under a given peak |
 13| `ratio_areas()`     | Check the ratio between two areas |
 14| `mean()`            | Get the mean and standard deviation of a list of values |
 15
 16---
 17"""
 18
 19
 20import math
 21import scipy
 22import numpy as np
 23from copy import deepcopy
 24from .classes import Spectra
 25import aton.alias as alias
 26
 27
 28def plateau(
 29        spectra:Spectra,
 30        cuts=None,
 31        df_index:int=0
 32    ) -> tuple:
 33    """Fit the mean value and the error of a plateau in a `sah.classes.Spectra` object.
 34
 35    Use as `sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0)`.
 36
 37    If `sah.classes.Spectra.dfs[df_index]` has an 'Error' column, those errors are also taken into account
 38    along with the standard deviation of the mean, else only the standard deviation is considered.
 39    This is the case if your dataset had a third column with the errors
 40    when you imported the `sah.classes.Spectra` object.
 41
 42    Note that `cuts`, `low_cut` and/or `top_cut` can be set to None.
 43    """
 44    df = deepcopy(spectra.dfs[df_index])
 45    if isinstance(cuts, list):
 46        low_cut = cuts[0]
 47        if len(cuts) > 1:
 48            top_cut = cuts[1]
 49        else:
 50            top_cut = None
 51    elif isinstance(cuts, float):  # If cuts is a float, it is considered as low_cut
 52        low_cut = cuts
 53        top_cut = None
 54    elif cuts is None:
 55        low_cut = None
 56        top_cut = None
 57    else:
 58        raise ValueError("plateau: cuts must be a float for the low_cut, or a list")
 59    if low_cut is not None:
 60        df = df[df[df.columns[0]] >= low_cut]
 61    if top_cut is not None:
 62        df = df[df[df.columns[0]] <= top_cut]
 63    mean = df[df.columns[1]].mean()
 64    std_mean = df[df.columns[1]].std()
 65    error_column = next((col for col in alias.files['error'] if col in df.columns), None)  # Get the error column title
 66    if error_column:
 67        errors = df[error_column].to_numpy()
 68        std_data = np.sqrt(np.sum(errors**2)) / len(errors)
 69        std = np.sqrt(std_data**2 + std_mean**2)
 70    else:
 71        std = std_mean
 72    return mean, std
 73
 74
 75def area_under_peak(
 76        spectra:Spectra,
 77        peak:list,
 78        df_index:int=0,
 79        errors_as_in_baseline:bool=True,
 80        min_as_baseline:bool=False
 81    ) -> tuple:
 82    """Calculate the area under a given peak.
 83
 84    Peaks must be defined as `peak:list=[xmin, xmax, baseline=0, baseline_error=0]`.
 85    If the dataset has no `Error` column, the error for each point is assumed to be the same
 86    as the baseline error if `errors_as_in_baseline=True`, otherwise it is assumed to be zero.
 87    If `min_as_baseline=True` and `baseline=0`, the baseline is assumed to be the minimum value.
 88    Also, if `min_as_baseline=True` and there are negative areas even after applying the baseline,
 89    the baseline will be corrected to the minimum value.
 90    """
 91    if len(peak) < 2:
 92        raise ValueError("area_under_peak: peak must have at least two values: [xmin, xmax]")
 93    xmin = peak[0]
 94    xmax = peak[1]
 95    baseline = peak[2] if len(peak) >= 3 else 0.0
 96    baseline_error = peak[3] if len(peak) >= 4 else 0.0
 97
 98    df = deepcopy(spectra.dfs[df_index])
 99    df_range = df[(df[df.columns[0]] >= xmin) & (df[df.columns[0]] <= xmax)]
100    x = df_range[df.columns[0]].to_numpy()
101    y = df_range[df.columns[1]].to_numpy()
102    
103    min_y = y.min()
104    if min_as_baseline and (baseline == 0 or baseline > min_y):
105        baseline = min_y
106
107    y = y - baseline
108
109    area = scipy.integrate.simpson(y, x=x)
110
111    # Get the error column title
112    error_column = next((col for col in df.columns if col.lower() in alias.files['error']), None)  
113    if error_column:
114        point_errors = df_range[error_column].to_numpy()
115    else: # Assume the error in each point is the same as the baseline error
116        if errors_as_in_baseline == True:
117            point_errors = np.full_like(y, baseline_error)
118        else:
119            point_errors = np.zeros_like(y)
120    total_errors = np.sqrt(point_errors**2 + baseline_error**2)
121    area_error = np.sqrt(scipy.integrate.simpson(total_errors**2, x=x))
122
123    return area, area_error
124
125
126def ratio_areas(
127        area:float,
128        area_total:float,
129        area_error:float=0.0,
130        area_total_error:float=0.0,
131        inverse_ratio:bool=False
132    ) -> tuple:
133    """Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data.
134    
135    The ratio is calculated as `area / area_total`. This behavior is modified if `inverse_ratio = True`,
136    so that the ratio is calculated as `(area_total - area) / area_total`.
137    Note that changing the ratio calculation also affects the error propagation.
138    """
139    if inverse_ratio:
140        ratio = (area_total - area) / area_total
141        if ratio != 0.0:
142            ratio_error = abs(ratio) * np.sqrt((np.sqrt(area_total_error**2 + area_error**2) / (area_total - area))**2 + (area_total_error / area_total)**2)
143        else:
144            ratio_error = None
145    else:
146        ratio = area / area_total
147        if ratio != 0.0:
148            ratio_error = abs(ratio) * np.sqrt((area_error / area)**2 + (area_total_error / area_total)**2)
149        else:
150            ratio_error = None
151    
152    return ratio, ratio_error
153
154
155def mean(
156        array:list,
157        rounded:bool=True,
158        degrees_of_freedom=0
159    ) -> tuple:
160    """Takes an `array` of numerical values and returns the mean and standard deviation.
161
162    It is calculated with numpy as:\n
163    $\\sigma_{x}=\\sqrt{\\frac{\\sum{(x_{i}-{\\overline{x}})^2}}{N-\\text{ddof}}}$\n
164    where ddof are the delta `degrees_of_freedom`, zero by default.
165    Set it to `1` for a corrected sample standard deviation (low N cases),
166    see more details [here](https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation).\n
167    The mean is rounded up to the order of the error by default. To override this behaviour, set `rounded=False`.
168    """
169    if not all(isinstance(x, (int, float, np.ndarray)) for x in array):
170        raise ValueError("mean_std(list) requires numerical values (int, float, or numpy.ndarray).")
171    data = np.asarray(array)
172    mean = float(data.mean())
173    error = float(data.std(ddof=degrees_of_freedom))
174    if not rounded or error == 0:
175        return mean, error
176    exponent = int(math.floor(math.log10(abs(error))))
177    first_three_digits = int(100*abs(error) / 10**exponent)
178    if 104 < first_three_digits < 195:
179        exponent -= 1
180    rounded_mean = round(mean, -exponent)
181    rounded_error = round(error, -exponent)
182    return rounded_mean, rounded_error
def plateau(spectra: sah.classes.Spectra, cuts=None, df_index: int = 0) -> tuple:
29def plateau(
30        spectra:Spectra,
31        cuts=None,
32        df_index:int=0
33    ) -> tuple:
34    """Fit the mean value and the error of a plateau in a `sah.classes.Spectra` object.
35
36    Use as `sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0)`.
37
38    If `sah.classes.Spectra.dfs[df_index]` has an 'Error' column, those errors are also taken into account
39    along with the standard deviation of the mean, else only the standard deviation is considered.
40    This is the case if your dataset had a third column with the errors
41    when you imported the `sah.classes.Spectra` object.
42
43    Note that `cuts`, `low_cut` and/or `top_cut` can be set to None.
44    """
45    df = deepcopy(spectra.dfs[df_index])
46    if isinstance(cuts, list):
47        low_cut = cuts[0]
48        if len(cuts) > 1:
49            top_cut = cuts[1]
50        else:
51            top_cut = None
52    elif isinstance(cuts, float):  # If cuts is a float, it is considered as low_cut
53        low_cut = cuts
54        top_cut = None
55    elif cuts is None:
56        low_cut = None
57        top_cut = None
58    else:
59        raise ValueError("plateau: cuts must be a float for the low_cut, or a list")
60    if low_cut is not None:
61        df = df[df[df.columns[0]] >= low_cut]
62    if top_cut is not None:
63        df = df[df[df.columns[0]] <= top_cut]
64    mean = df[df.columns[1]].mean()
65    std_mean = df[df.columns[1]].std()
66    error_column = next((col for col in alias.files['error'] if col in df.columns), None)  # Get the error column title
67    if error_column:
68        errors = df[error_column].to_numpy()
69        std_data = np.sqrt(np.sum(errors**2)) / len(errors)
70        std = np.sqrt(std_data**2 + std_mean**2)
71    else:
72        std = std_mean
73    return mean, std

Fit the mean value and the error of a plateau in a sah.classes.Spectra object.

Use as sah.fit.plateau(spectra, cuts=[low_cut, high_cut], df_index=0).

If sah.classes.Spectra.dfs[df_index] has an 'Error' column, those errors are also taken into account along with the standard deviation of the mean, else only the standard deviation is considered. This is the case if your dataset had a third column with the errors when you imported the sah.classes.Spectra object.

Note that cuts, low_cut and/or top_cut can be set to None.

def area_under_peak( spectra: sah.classes.Spectra, peak: list, df_index: int = 0, errors_as_in_baseline: bool = True, min_as_baseline: bool = False) -> tuple:
 76def area_under_peak(
 77        spectra:Spectra,
 78        peak:list,
 79        df_index:int=0,
 80        errors_as_in_baseline:bool=True,
 81        min_as_baseline:bool=False
 82    ) -> tuple:
 83    """Calculate the area under a given peak.
 84
 85    Peaks must be defined as `peak:list=[xmin, xmax, baseline=0, baseline_error=0]`.
 86    If the dataset has no `Error` column, the error for each point is assumed to be the same
 87    as the baseline error if `errors_as_in_baseline=True`, otherwise it is assumed to be zero.
 88    If `min_as_baseline=True` and `baseline=0`, the baseline is assumed to be the minimum value.
 89    Also, if `min_as_baseline=True` and there are negative areas even after applying the baseline,
 90    the baseline will be corrected to the minimum value.
 91    """
 92    if len(peak) < 2:
 93        raise ValueError("area_under_peak: peak must have at least two values: [xmin, xmax]")
 94    xmin = peak[0]
 95    xmax = peak[1]
 96    baseline = peak[2] if len(peak) >= 3 else 0.0
 97    baseline_error = peak[3] if len(peak) >= 4 else 0.0
 98
 99    df = deepcopy(spectra.dfs[df_index])
100    df_range = df[(df[df.columns[0]] >= xmin) & (df[df.columns[0]] <= xmax)]
101    x = df_range[df.columns[0]].to_numpy()
102    y = df_range[df.columns[1]].to_numpy()
103    
104    min_y = y.min()
105    if min_as_baseline and (baseline == 0 or baseline > min_y):
106        baseline = min_y
107
108    y = y - baseline
109
110    area = scipy.integrate.simpson(y, x=x)
111
112    # Get the error column title
113    error_column = next((col for col in df.columns if col.lower() in alias.files['error']), None)  
114    if error_column:
115        point_errors = df_range[error_column].to_numpy()
116    else: # Assume the error in each point is the same as the baseline error
117        if errors_as_in_baseline == True:
118            point_errors = np.full_like(y, baseline_error)
119        else:
120            point_errors = np.zeros_like(y)
121    total_errors = np.sqrt(point_errors**2 + baseline_error**2)
122    area_error = np.sqrt(scipy.integrate.simpson(total_errors**2, x=x))
123
124    return area, area_error

Calculate the area under a given peak.

Peaks must be defined as peak:list=[xmin, xmax, baseline=0, baseline_error=0]. If the dataset has no Error column, the error for each point is assumed to be the same as the baseline error if errors_as_in_baseline=True, otherwise it is assumed to be zero. If min_as_baseline=True and baseline=0, the baseline is assumed to be the minimum value. Also, if min_as_baseline=True and there are negative areas even after applying the baseline, the baseline will be corrected to the minimum value.

def ratio_areas( area: float, area_total: float, area_error: float = 0.0, area_total_error: float = 0.0, inverse_ratio: bool = False) -> tuple:
127def ratio_areas(
128        area:float,
129        area_total:float,
130        area_error:float=0.0,
131        area_total_error:float=0.0,
132        inverse_ratio:bool=False
133    ) -> tuple:
134    """Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data.
135    
136    The ratio is calculated as `area / area_total`. This behavior is modified if `inverse_ratio = True`,
137    so that the ratio is calculated as `(area_total - area) / area_total`.
138    Note that changing the ratio calculation also affects the error propagation.
139    """
140    if inverse_ratio:
141        ratio = (area_total - area) / area_total
142        if ratio != 0.0:
143            ratio_error = abs(ratio) * np.sqrt((np.sqrt(area_total_error**2 + area_error**2) / (area_total - area))**2 + (area_total_error / area_total)**2)
144        else:
145            ratio_error = None
146    else:
147        ratio = area / area_total
148        if ratio != 0.0:
149            ratio_error = abs(ratio) * np.sqrt((area_error / area)**2 + (area_total_error / area_total)**2)
150        else:
151            ratio_error = None
152    
153    return ratio, ratio_error

Check the ratio between two areas, e.g. to estimate deuteration levels from ATR data.

The ratio is calculated as area / area_total. This behavior is modified if inverse_ratio = True, so that the ratio is calculated as (area_total - area) / area_total. Note that changing the ratio calculation also affects the error propagation.

def mean(array: list, rounded: bool = True, degrees_of_freedom=0) -> tuple:
156def mean(
157        array:list,
158        rounded:bool=True,
159        degrees_of_freedom=0
160    ) -> tuple:
161    """Takes an `array` of numerical values and returns the mean and standard deviation.
162
163    It is calculated with numpy as:\n
164    $\\sigma_{x}=\\sqrt{\\frac{\\sum{(x_{i}-{\\overline{x}})^2}}{N-\\text{ddof}}}$\n
165    where ddof are the delta `degrees_of_freedom`, zero by default.
166    Set it to `1` for a corrected sample standard deviation (low N cases),
167    see more details [here](https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation).\n
168    The mean is rounded up to the order of the error by default. To override this behaviour, set `rounded=False`.
169    """
170    if not all(isinstance(x, (int, float, np.ndarray)) for x in array):
171        raise ValueError("mean_std(list) requires numerical values (int, float, or numpy.ndarray).")
172    data = np.asarray(array)
173    mean = float(data.mean())
174    error = float(data.std(ddof=degrees_of_freedom))
175    if not rounded or error == 0:
176        return mean, error
177    exponent = int(math.floor(math.log10(abs(error))))
178    first_three_digits = int(100*abs(error) / 10**exponent)
179    if 104 < first_three_digits < 195:
180        exponent -= 1
181    rounded_mean = round(mean, -exponent)
182    rounded_error = round(error, -exponent)
183    return rounded_mean, rounded_error

Takes an array of numerical values and returns the mean and standard deviation.

It is calculated with numpy as:

$\sigma_{x}=\sqrt{\frac{\sum{(x_{i}-{\overline{x}})^2}}{N-\text{ddof}}}$

where ddof are the delta degrees_of_freedom, zero by default. Set it to 1 for a corrected sample standard deviation (low N cases), see more details here.

The mean is rounded up to the order of the error by default. To override this behaviour, set rounded=False.