Timeseries IV Simulation

[90]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os

from pvops.iv import timeseries_simulator
from pvops.timeseries import preprocess
from pvops.timeseries.models import linear
from pvops.text2time import utils as t2t_utils, preprocess as t2t_preprocess

Next we load in example data. We only simulate for irradiance above 200 select one of the two sites to focus on. Additionally, we drop rows without the data needed for simulation.

[91]:

example_prodpath = os.path.join('example_data', 'example_prod_with_covariates.csv')
env_df = pd.read_csv(example_prodpath)
env_df.index = pd.to_datetime(env_df["date"])
env_df = env_df.sort_index()

# Only simulate where irradiance > 200
env_df = env_df[env_df['irrad_poa_Wm2'] > 200]
# Two sites have data here so we choose one
env_df = env_df[env_df['randid'] == 'R15']
# Remove any NaN environmental specifications
env_df = env_df.dropna(subset=['irrad_poa_Wm2','temp_amb_C'])
env_df.head()

[91]:

	date	randid	generated_kW	expected_kW	irrad_poa_Wm2	temp_amb_C	wind_speed_ms	temp_mod_C
date
2018-04-01 09:00:00	2018-04-01 09:00:00	R15	6616.573	7343.981135	367.9035	20.1680	4.5095	24.5745
2018-04-01 10:00:00	2018-04-01 10:00:00	R15	8847.800	10429.876422	508.2870	21.9870	4.9785	30.7740
2018-04-01 11:00:00	2018-04-01 11:00:00	R15	11607.389	12981.228814	618.7945	23.4170	4.6410	35.8695
2018-04-01 12:00:00	2018-04-01 12:00:00	R15	15860.138	20000.000000	992.8930	24.3280	4.3235	44.8640
2018-04-01 13:00:00	2018-04-01 13:00:00	R15	15672.136	18737.585012	861.8760	25.1885	5.1810	44.3385

[92]:

metadata = pd.DataFrame()
metadata['randid'] = ['R15', 'R10']
metadata.head()

[92]:

	randid
0	R15
1	R10

[93]:

#Format for dictionaries is {pvops variable: user-specific column names}
prod_col_dict = {
    'siteid': 'randid',
    'timestamp': 'date',
    'powerprod': 'generated_kW',
    'irradiance':'irrad_poa_Wm2',
    'temperature':'temp_amb_C', # Optional parameter, used by one of the modeling structures
    'baseline': 'IEC_pstep', #user's name choice for new column (baseline expected energy defined by user or calculated based on IEC)
    'dcsize': 'dcsize', #user's name choice for new column (System DC-size, extracted from meta-data)
    'compared': 'Compared', #user's name choice for new column
    'energy_pstep': 'Energy_pstep' #user's name choice for new column
    }

metad_col_dict = {'siteid': 'randid'}

[94]:

failureA = timeseries_simulator.TimeseriesFailure()
longterm_fcn_dict = {
    'Rs_mult': "degrade"
}
annual_fcn_dict = {
    'Rs_mult': lambda x : ( 0.3 * np.sin(np.pi * x) )
}

failureA.trend(longterm_fcn_dict=longterm_fcn_dict,
               annual_fcn_dict=annual_fcn_dict,
               degradation_rate=1.005)

iv_col_dict = {'irradiance': 'irrad_poa_Wm2',
               'temperature': 'temp_amb_C'
               }

env_df['identifier'] = env_df.index.strftime("%Y-%m-%d %H:%M:%S")

time_simulator = timeseries_simulator.IVTimeseriesGenerator()
condition_dicts = time_simulator.generate(env_df, [failureA], iv_col_dict, 'identifier', plot_trends=False)

[95]:

plt.scatter(time_simulator.specs_df.index, time_simulator.specs_df['Rs_mult'])
plt.ylabel('Rs_mult')

[95]:

Text(0, 0.5, 'Rs_mult')

../../_images/pages_tutorials_tutorial_timeseries_sim_7_1.png

[96]:

time_simulator.add_time_conditions('complete', nmods=12)
time_simulator.simulate()

Simulating cells:   0%|          | 0/3307 [00:00<?, ?it/s]/home/klbonne/.local/bin/anaconda3/envs/pvops_dev/lib/python3.8/site-packages/scipy/optimize/_zeros_py.py:466: RuntimeWarning: some failed to converge after 100 iterations
  warnings.warn(msg, RuntimeWarning)
Simulating cells: 100%|██████████| 3307/3307 [01:17<00:00, 42.75it/s]
Adding up simulations: 100%|██████████| 3306/3306 [00:53<00:00, 61.60it/s]
Adding up other definitions: 100%|██████████| 3307/3307 [00:00<00:00, 29882.31it/s]

[97]:

sims_df = time_simulator.sims_to_df(focus=['string'], cutoff=True)

[98]:

sims_df.head()

[98]:

	current	voltage	E	T	mode	level
0	[3.3373719452150903, 3.3359903398092783, 3.334...	[3.836930773104541e-12, 11.97867132786855, 23....	367.9035	20.1680	str_2018-04-01 09:00:00	string
1	[4.614599256674858, 4.612678148222023, 4.61080...	[3.836930773104541e-12, 12.058127583686556, 23...	508.2870	21.9870	str_2018-04-01 10:00:00	string
2	[5.621465807867254, 5.6191204837065545, 5.6168...	[3.8331560148208155e-12, 12.093588347866122, 2...	618.7945	23.4170	str_2018-04-01 11:00:00	string
3	[9.020136734061941, 9.016314598640065, 9.01258...	[3.836930773104541e-12, 12.288852807799348, 24...	992.8930	24.3280	str_2018-04-01 12:00:00	string
4	[7.834888228861011, 7.831598650286882, 7.82838...	[3.836930773104541e-12, 12.182321719206548, 24...	861.8760	25.1885	str_2018-04-01 13:00:00	string

[99]:

pmaxs = np.array([max(np.array(row['current']) * np.array(row['voltage']))
            for ind,row in sims_df.iterrows()])

env_df["simulated_power"] = pmaxs

[100]:

def plot(input_df, npts_viz=50):
    df = input_df.copy()
    df['generated_kW'] = df['generated_kW'] / df['generated_kW'].max()
    df['expected_kW'] = df['expected_kW'] / df['expected_kW'].max()
    df['simulated_power'] = df['simulated_power'] / df['simulated_power'].max()

    plt.figure(figsize=(12,6))
    plt.plot(df.index[0:npts_viz], df['generated_kW'].iloc[0:npts_viz], label='measured')
    plt.plot(df.index[0:npts_viz], df['expected_kW'].iloc[0:npts_viz], label='company expected')
    plt.plot(df.index[0:npts_viz], df['simulated_power'].iloc[0:npts_viz], label='simulated')
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel("Normalized power production (Power / Pmax)")
    plt.show()

plot(env_df)

../../_images/pages_tutorials_tutorial_timeseries_sim_12_0.png

Timeseries simulations

Using the environmental conditions and the physics-based estimation as input, we inference an output.

[101]:

prod_data_converted = t2t_preprocess.prod_date_convert(env_df, prod_col_dict)
prod_data_datena_d, _ = t2t_preprocess.prod_nadate_process(prod_data_converted, prod_col_dict, pnadrop=True)
prod_data_datena_d.index = prod_data_datena_d[prod_col_dict['timestamp']]

[102]:

masked_prod_data = preprocess.prod_inverter_clipping_filter(prod_data_datena_d, prod_col_dict, metadata, metad_col_dict, 'threshold', freq=60)

filtered_prod_data = masked_prod_data.loc[masked_prod_data['mask'] == False,:]

/home/klbonne/Documents/GitHub/pvOps/pvops/timeseries/preprocess.py:322: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
  prod_df.loc[site_prod_mask, "mask"] = pvanalytics.features.clipping.threshold(

[103]:

model_prod_data = filtered_prod_data.dropna(subset=['irrad_poa_Wm2', 'temp_amb_C', 'wind_speed_ms']+[prod_col_dict['powerprod']])

[104]:

from sklearn.metrics import mean_squared_error, r2_score

def plot_inference(model, prod_col_dict, data_split='test', npts=50):

    def print_info(real,pred,name):
        mse = mean_squared_error(real, pred)
        r2 = r2_score(real, pred)
        print(f'[{name}] Mean squared error: %.2f'
                % mse)
        print(f'[{name}] Coefficient of determination: %.2f'
                % r2)

    fig,(ax) = plt.subplots(figsize=(14,8))

    if data_split == 'test':
        df = test_df
    elif data_split == 'train':
        df = train_df

    measured = model.estimators['OLS'][f'{data_split}_y'][:npts]

    ax2 = ax.twinx()
    ax2.plot(model.estimators['OLS'][f'{data_split}_index'][:npts], df[prod_col_dict['irradiance']].values[:npts], 'k', label='irradiance')

    ax.plot(model.estimators['OLS'][f'{data_split}_index'][:npts], df['expected_kW'].values[:npts], label='partner_expected')
    print_info(measured, df['expected_kW'].values[:npts], 'partner_expected')

    ax.plot(model.estimators['OLS'][f'{data_split}_index'][:npts], measured, label='measured')
    for name, info in model.estimators.items():
        predicted = model.estimators[name][f'{data_split}_prediction'][:npts]
        ax.plot(model.estimators[name][f'{data_split}_index'][:npts], predicted, label=name)
        print_info(measured, predicted, name)

    ax2.set_ylabel("Irradiance (W/m2)")
    ax.set_ylabel("Power (W)")
    ax.set_xlabel('Time')
    handles, labels = [(a+b) for a, b in zip(ax.get_legend_handles_labels(), ax2.get_legend_handles_labels())]
    ax.legend(handles, labels, loc='best')
    plt.show()

Power modeling using environmental conditions

[105]:

# Make sure to only pass data for one site! If sites are very similar, you can consider providing both sites.
model, train_df, test_df = linear.modeller(
                                            prod_col_dict,
                                            kernel_type='default',
                                            time_weighted='month',
                                            X_parameters=['irrad_poa_Wm2', 'temp_amb_C'],
                                            Y_parameter='generated_kW',
                                            prod_df=model_prod_data,
                                            test_split=0.05,
                                            degree=3,
                                            verbose=1)

train {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
test {3}

Begin training.
[OLS] Mean squared error: 933524.95
[OLS] Coefficient of determination: 0.96
[OLS] 24 coefficient trained.
[RANSAC] Mean squared error: 1052555.85
[RANSAC] Coefficient of determination: 0.95

Begin testing.
[OLS] Mean squared error: 510178.49
[OLS] Coefficient of determination: 0.98
[OLS] 24 coefficient trained.
[RANSAC] Mean squared error: 1743431.22
[RANSAC] Coefficient of determination: 0.93

/home/klbonne/.local/bin/anaconda3/envs/pvops_dev/lib/python3.8/site-packages/statsmodels/regression/linear_model.py:1934: RuntimeWarning: divide by zero encountered in double_scalars
  return np.sqrt(eigvals[0]/eigvals[-1])

[106]:

plot_inference(model, prod_col_dict, data_split='train', npts=40)

[partner_expected] Mean squared error: 3433925.51
[partner_expected] Coefficient of determination: 0.85
[OLS] Mean squared error: 270424.03
[OLS] Coefficient of determination: 0.99
[RANSAC] Mean squared error: 268835.82
[RANSAC] Coefficient of determination: 0.99

../../_images/pages_tutorials_tutorial_timeseries_sim_20_1.png

[107]:

plot_inference(model, prod_col_dict, data_split='test', npts=40)

[partner_expected] Mean squared error: 180731.98
[partner_expected] Coefficient of determination: 0.99
[OLS] Mean squared error: 275562.47
[OLS] Coefficient of determination: 0.99
[RANSAC] Mean squared error: 2293599.51
[RANSAC] Coefficient of determination: 0.90

../../_images/pages_tutorials_tutorial_timeseries_sim_21_1.png

Power modeling using physics simulations as input

[108]:

# Make sure to only pass data for one site! If sites are very similar, you can consider providing both sites.
model, train_df, test_df = linear.modeller(
                                            prod_col_dict,
                                            kernel_type='default',
                                            time_weighted='month',
                                            X_parameters=['irrad_poa_Wm2', 'temp_amb_C', 'simulated_power'],
                                            Y_parameter='generated_kW',
                                            prod_df=model_prod_data,
                                            test_split=0.05,
                                            degree=3,
                                            verbose=1)

train {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
test {3}

Begin training.
[OLS] Mean squared error: 754429.15
[OLS] Coefficient of determination: 0.97
[OLS] 36 coefficient trained.
[RANSAC] Mean squared error: 788074.79
[RANSAC] Coefficient of determination: 0.97

Begin testing.
[OLS] Mean squared error: 482864.39
[OLS] Coefficient of determination: 0.98
[OLS] 36 coefficient trained.
[RANSAC] Mean squared error: 1005442.42
[RANSAC] Coefficient of determination: 0.96

[109]:

plot_inference(model, prod_col_dict, data_split='train', npts=40)

[partner_expected] Mean squared error: 3433925.51
[partner_expected] Coefficient of determination: 0.85
[OLS] Mean squared error: 241325.46
[OLS] Coefficient of determination: 0.99
[RANSAC] Mean squared error: 232528.26
[RANSAC] Coefficient of determination: 0.99

../../_images/pages_tutorials_tutorial_timeseries_sim_24_1.png

[110]:

plot_inference(model, prod_col_dict, data_split='test', npts=40)

[partner_expected] Mean squared error: 180731.98
[partner_expected] Coefficient of determination: 0.99
[OLS] Mean squared error: 355754.91
[OLS] Coefficient of determination: 0.98
[RANSAC] Mean squared error: 566091.79
[RANSAC] Coefficient of determination: 0.98

../../_images/pages_tutorials_tutorial_timeseries_sim_25_1.png