Source code for pvops.timeseries.models.AIT

import numpy as np
from numpy.core.fromnumeric import prod
from sklearn.metrics import mean_squared_error, r2_score


[docs] class Predictor: """ Predictor class """ def __init__(self): super(Predictor, self).__init__()
[docs] def apply_additive_polynomial_model(self, model_terms, Xs): """Predict energy using a model derived by pvOps. Parameters ---------- df : dataframe Data containing columns with the values in the `prod_col_dict` model_terms : list of tuples Contain model coefficients and powers. For example, .. code-block:: python [(0.29359785963294494, [1, 0]), (0.754806343190528, [0, 1]), (0.396833207207238, [1, 1]), (-0.0588375219110795, [0, 0])] prod_col_dict : dict Dictionary mapping nicknamed parameters to the named parameters in the dataframe `df`. Returns ------- Array of predicted energy values """ for idx, (coeff, powers) in enumerate(model_terms): for i, (x, n) in enumerate(zip(Xs, powers)): if i == 0: term = x**n else: term *= x**n if idx == 0: energy = coeff * term else: energy += coeff * term return energy
[docs] def evaluate(self, real, pred,): logrmse = np.log(np.sqrt(mean_squared_error(real, pred))) r2 = r2_score(real, pred) print(f"The fit has an R-squared of {r2} and a log RMSE of {logrmse}") return logrmse, r2
[docs] class Processer: def __init__(self): super(Processer, self).__init__() self._col_scaled_prefix = 'stdscaled_'
[docs] def check_data(self, data, prod_col_dict): self.do_eval = False if 'energyprod' in prod_col_dict: if prod_col_dict['energyprod'] in data.columns.tolist(): self.do_eval = True if not self.do_eval: print("Because the power production data is not" " passed, the fit will not be evaluated." " Predictions will still be rendered.")
def _apply_transform(self, data, scaler_info): data -= scaler_info["mean"] data /= scaler_info["scale"] return data def _apply_inverse_transform(self, data, scaler_info): data *= scaler_info["scale"] data += scaler_info["mean"] return data def _clean_columns(self, scaler, prod_df, prod_col_dict): for k, d in scaler.items(): del prod_df[self._col_scaled_prefix + prod_col_dict[k]]
# @dev: The 'AIT' class can be one of many models that inherit the # @dev: Processor and Predictor templates. When adding new models, # @dev: use the Processor and Predictor classes to hold general # @dev: functionality while having model-specific nuances in the # @dev: classes below. The above classes may be placed in a different # @dev: if it seems fit.
[docs] class AIT(Processer, Predictor): def __init__(self): super(AIT, self).__init__() self._load_params() def _load_params(self): self.scaler_highcap = {"irradiance": {"mean": 571.45952959, "scale": 324.19905495}, "dcsize": {"mean": 14916.2339917, "scale": 20030.00088265}, "energyprod": {"mean": 7449.15184666, "scale": 12054.52533771} } self.model_terms_highcap = [(0.29359785963294494, [1, 0]), (0.754806343190528, [0, 1]), (0.396833207207238, [1, 1]), (-0.0588375219110795, [0, 0])] self.scaler_lowcap = {"irradiance": {"mean": 413.53334101, "scale": 286.11031612}, "dcsize": {"mean": 375.91883522, "scale": 234.15141671}, "energyprod": {"mean": 119.00787546, "scale": 119.82927847} } self.model_terms_lowcap = [(0.6866363032474436, [1, 0]), (0.6473846301807609, [0, 1]), (0.41926724219597955, [1, 1]), (0.06624491753542901, [0, 0])]
[docs] def predict_subset(self, prod_df, scaler, model_terms, prod_col_dict): self.check_data(prod_df, prod_col_dict) """1. Standardize the data using same scales""" for k, d in scaler.items(): data = prod_df[prod_col_dict[k]].copy() scaled_data = self._apply_transform(data, d) prod_df[self._col_scaled_prefix + prod_col_dict[k]] = scaled_data prod_irr = prod_col_dict["irradiance"] prod_dcsize = prod_col_dict["dcsize"] irr = prod_df[self._col_scaled_prefix + prod_irr].values capacity = prod_df[self._col_scaled_prefix + prod_dcsize].values Xs = [irr, capacity] """2. Predict energy""" predicted_energy = self.apply_additive_polynomial_model(model_terms, Xs) """3. Rescale predictions""" predicted_rescaled_energy = self._apply_inverse_transform(predicted_energy, scaler['energyprod']) """4. Evaluate""" if self.do_eval: self.evaluate(prod_df[prod_col_dict["energyprod"]].values, predicted_rescaled_energy) return predicted_rescaled_energy
[docs] def predict(self, prod_df, prod_col_dict): # High-capacity systems high_cap_mask = prod_df[prod_col_dict['dcsize']] > 1000 if sum(high_cap_mask) > 0: predicted = self.predict_subset(prod_df.loc[high_cap_mask, :], self.scaler_highcap, self.model_terms_highcap, prod_col_dict) prod_df.loc[high_cap_mask, prod_col_dict["baseline"]] = predicted # Low-capacity systems low_cap_mask = prod_df[prod_col_dict['dcsize']] <= 1000 if sum(low_cap_mask) > 0: predicted = self.predict_subset(prod_df.loc[low_cap_mask, :], self.scaler_lowcap, self.model_terms_lowcap, prod_col_dict) prod_df.loc[low_cap_mask, prod_col_dict["baseline"]] = predicted return prod_df
[docs] def AIT_calc(prod_df, prod_col_dict): """ Calculates expected energy using measured irradiance based on trained regression model from field data. Plane-of-array irradiance is recommended when using the pre-trained AIT model. Parameters ---------- prod_df : DataFrame A data frame corresponding to the production data prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **irradiance** (*string*), should be assigned to irradiance column name in prod_df, where data should be in [W/m^2] - **dcsize**, (*string*), should be assigned to preferred column name for site capacity in prod_df - **energyprod**, (*string*), should be assigned to the column name holding the power or energy production. If this is passed, an evaluation will be provided. - **baseline**, (*string*), should be assigned to preferred column name to capture the calculations in prod_df Example ------- .. code-block:: python production_col_dict = {'irradiance': 'irrad_poa_Wm2', 'ambient_temperature': 'temp_amb_C', 'dcsize': 'capacity_DC_kW', 'energyprod': 'energy_generated_kWh', 'baseline': 'predicted' } data = AIT_calc(data, production_col_dict) Returns ------- DataFrame A data frame for production data with a new column, the predicted energy """ prod_df = prod_df.copy() # assigning dictionary items to local variables for cleaner code model = AIT() prod_df = model.predict(prod_df, prod_col_dict) return prod_df