Source code for pvops.timeseries.models.survival

from scipy import stats
from sksurv.nonparametric import kaplan_meier_estimator

[docs] def fit_survival_function(df, col_dict, method): """ Calculate the survival function for different groups in a DataFrame using specified methods. This function computes the survival function for each unique group in the input DataFrame based on the specified method. It supports the Kaplan-Meier estimator and Weibull distribution fitting for survival analysis. The Kaplan-Meier estimator is a non-parametric statistic, while the Weibull distribution is a parametric model. Parameters ---------- df : pandas.DataFrame A DataFrame containing failure data with at least three columns specified in `col_dict`: one for grouping, one for the time to failure, and one indicating whether the failure was observed col_dict : dict of {str : str} A dictionary that contains the column names relevant for survival analysis - **group_by** (*string*), should be assigned to the column to group by - **time_to_fail** (*string*), should be assigned to the column containing the time until failure - **was_observed** (*string*), should be assigned to the column indicating whether the failure was observed method : str The method to use for calculating the survival function. Must be one of: - 'kaplan-meier': Uses the Kaplan-Meier estimator for survival analysis. - 'weibull': Fits a Weibull distribution to the data. Returns ------- dict - If `method` is `'kaplan-meier'`, contains keys `'times'`, `'fail_prob'`, and `'conf_int'`, which denote the times, failure probabilities, and confidence intervals on the failure probabilities. - If `method` is `'weibull'`, contains keys `'shape'`, `'scale'`, and `'distribution'`, which denote the shape parameter, scale parameter, and corresponding fitted `stats.weibull_min` distribution. """ implemented_methods = ['kaplan-meier', 'weibull'] if method not in implemented_methods: raise ValueError(f'method argument must be one of {implemented_methods}, got {method}') df = df.reset_index() group_by = col_dict['group_by'] time_to_fail = col_dict['time_to_fail'] was_observed = col_dict['was_observed'] results = {} unique_group_by = df[group_by].unique() for group in unique_group_by: group_df = df[df[group_by] == group] if method == 'kaplan-meier': km_result = kaplan_meier_estimator(group_df[was_observed], group_df[time_to_fail], conf_type='log-log') group_result = {'times': km_result[0], 'fail_prob': km_result[1], 'conf_int': km_result[2]} elif method == 'weibull': uncensored_times = group_df[group_df[was_observed]][time_to_fail] censored_times = group_df[~group_df[was_observed]][time_to_fail] data = stats.CensoredData(uncensored=uncensored_times, right=censored_times) shape, _, scale = stats.weibull_min.fit(data, floc=0) group_result = {'shape': shape, 'scale': scale, 'distribution': stats.weibull_min(c=shape, scale=scale)} results[group] = group_result return results