Skip to content

Christmas

#exports
import numpy as np
import pandas as pd

import os
from sklearn.ensemble import RandomForestRegressor

from batopt import clean, discharge, charge, constraints, pv
import FEAutils as hlp
import matplotlib.pyplot as plt


User Inputs

raw_data_dir = '../data/raw'
intermediate_data_dir = '../data/intermediate'


Christmas Model EDA

We'll start by loading in the combined training dataset

df = clean.combine_training_datasets(intermediate_data_dir).interpolate(limit=1)

df.head()
demand pv weather demand_MW irradiance_Wm-2 panel_temp_C pv_power_mw solar_location1 solar_location2 solar_location3 solar_location4 solar_location5 solar_location6 temp_location1 temp_location2 temp_location3 temp_location4 temp_location5 temp_location6 holidays
2015-01-01 00:00:00+00:00 nan nan nan nan nan nan nan 0 0 0 0 0 0 9.75 9.65 8.83 7.58 11.62 11.22 nan
2015-01-01 00:30:00+00:00 nan nan nan nan nan nan nan 0 0 0 0 0 0 9.83 9.705 8.865 7.6 11.635 11.27 nan
2015-01-01 01:00:00+00:00 nan nan nan nan nan nan nan 0 0 0 0 0 0 9.91 9.76 8.9 7.62 11.65 11.32 nan
2015-01-01 01:30:00+00:00 nan nan nan nan nan nan nan 0 0 0 0 0 0 9.95 9.78 9 7.615 11.65 11.31 nan
2015-01-01 02:00:00+00:00 nan nan nan nan nan nan nan 0 0 0 0 0 0 9.99 9.8 9.1 7.61 11.65 11.3 nan


We'll now create our charge/discharge baseline for 2018

test_start_date = '2018-12-18'
test_end_date = '2018-12-24 23:59'

discharge_opt_model_fp = '../models/discharge_opt.sav'
pv_model_fp = '../models/pv_model.sav'

model_params = {
    'criterion': 'mse',
    'bootstrap': True,
    'max_depth': 32,
    'max_features': 'auto',
    'min_samples_leaf': 1,
    'min_samples_split': 4,
    'n_estimators': 74  
}

X, y = pv.prepare_training_input_data(intermediate_data_dir)

if test_start_date is not None and test_end_date is not None:
    pred_index = X[test_start_date:test_end_date].index
    X = X.drop(pred_index)
    y = y.drop(pred_index)

pv.fit_and_save_pv_model(X, y, pv_model_fp, model_class=RandomForestRegressor, **model_params)

s_charge_profile = pv.optimise_test_charge_profile(raw_data_dir, intermediate_data_dir, pv_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)
s_discharge_profile = discharge.optimise_test_discharge_profile(raw_data_dir, intermediate_data_dir, discharge_opt_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)

s_battery_profile = (s_charge_profile + s_discharge_profile).fillna(0)
s_battery_profile.name = 'charge_MW'

s_battery_profile.plot()
<AxesSubplot:>

png


As well as the current year we're meant to be forecasting

test_start_date = None
test_end_date = None

discharge_opt_model_fp = '../models/discharge_opt.sav'
pv_model_fp = '../models/pv_model.sav'

model_params = {
    'criterion': 'mse',
    'bootstrap': True,
    'max_depth': 32,
    'max_features': 'auto',
    'min_samples_leaf': 1,
    'min_samples_split': 4,
    'n_estimators': 74  
}

X, y = pv.prepare_training_input_data(intermediate_data_dir)

if test_start_date is not None and test_end_date is not None:
    pred_index = X[test_start_date:test_end_date].index
    X = X.drop(pred_index)
    y = y.drop(pred_index)

pv.fit_and_save_pv_model(X, y, pv_model_fp, model_class=RandomForestRegressor, **model_params)

s_charge_profile = pv.optimise_test_charge_profile(raw_data_dir, intermediate_data_dir, pv_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)
s_discharge_profile = discharge.optimise_test_discharge_profile(raw_data_dir, intermediate_data_dir, discharge_opt_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)

s_battery_profile = (s_charge_profile + s_discharge_profile).fillna(0)
s_battery_profile.name = 'charge_MW'

s_battery_profile.plot()
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

<ipython-input-9-81823d82d537> in <module>
     24 pv.fit_and_save_pv_model(X, y, pv_model_fp, model_class=RandomForestRegressor, **model_params)
     25 
---> 26 s_charge_profile = pv.optimise_test_charge_profile(raw_data_dir, intermediate_data_dir, pv_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)
     27 s_discharge_profile = discharge.optimise_test_discharge_profile(raw_data_dir, intermediate_data_dir, discharge_opt_model_fp, test_start_date=test_start_date, test_end_date=test_end_date)
     28


c:\users\ayrto\desktop\hackathons\wpd-ds-challenge\batopt\pv.py in optimise_test_charge_profile(raw_data_dir, intermediate_data_dir, pv_model_fp, test_start_date, test_end_date, start_time, end_time)
    142 # Cell
    143 def optimise_test_charge_profile(raw_data_dir, intermediate_data_dir, pv_model_fp, test_start_date=None, test_end_date=None, start_time='08:00', end_time='23:59'):
--> 144     df_features = charge.prepare_test_feature_data(raw_data_dir, intermediate_data_dir, test_start_date=test_start_date, test_end_date=test_end_date, start_time=start_time, end_time=end_time)
    145     charging_datetimes = charge.extract_charging_datetimes(df_features)
    146     X_test = df_features.loc[charging_datetimes]


c:\users\ayrto\desktop\hackathons\wpd-ds-challenge\batopt\charge.py in prepare_test_feature_data(raw_data_dir, intermediate_data_dir, test_start_date, test_end_date, start_time, end_time)
    280 
    281     # Filtering feature data on submission datetimes
--> 282     df_features = df_features.loc[index].between_time(start_time, end_time)
    283 
    284     return df_features


~\anaconda3\envs\batopt\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
    892 
    893             maybe_callable = com.apply_if_callable(key, self.obj)
--> 894             return self._getitem_axis(maybe_callable, axis=axis)
    895 
    896     def _is_scalar_access(self, key: Tuple):


~\anaconda3\envs\batopt\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
   1110                     raise ValueError("Cannot index with multidimensional key")
   1111 
-> 1112                 return self._getitem_iterable(key, axis=axis)
   1113 
   1114             # nested tuple slicing


~\anaconda3\envs\batopt\lib\site-packages\pandas\core\indexing.py in _getitem_iterable(self, key, axis)
   1050 
   1051         # A collection of keys
-> 1052         keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
   1053         return self.obj._reindex_with_indexers(
   1054             {axis: [keyarr, indexer]}, copy=True, allow_dups=True


~\anaconda3\envs\batopt\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1263             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1264 
-> 1265         self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
   1266         return keyarr, indexer
   1267


~\anaconda3\envs\batopt\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1305             if missing == len(indexer):
   1306                 axis_name = self.obj._get_axis_name(axis)
-> 1307                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   1308 
   1309             ax = self.obj._get_axis(axis)


KeyError: "None of [DatetimeIndex(['2020-07-03 00:00:00+00:00', '2020-07-03 00:30:00+00:00',\n               '2020-07-03 01:00:00+00:00', '2020-07-03 01:30:00+00:00',\n               '2020-07-03 02:00:00+00:00', '2020-07-03 02:30:00+00:00',\n               '2020-07-03 03:00:00+00:00', '2020-07-03 03:30:00+00:00',\n               '2020-07-03 04:00:00+00:00', '2020-07-03 04:30:00+00:00',\n               ...\n               '2020-07-09 19:00:00+00:00', '2020-07-09 19:30:00+00:00',\n               '2020-07-09 20:00:00+00:00', '2020-07-09 20:30:00+00:00',\n               '2020-07-09 21:00:00+00:00', '2020-07-09 21:30:00+00:00',\n               '2020-07-09 22:00:00+00:00', '2020-07-09 22:30:00+00:00',\n               '2020-07-09 23:00:00+00:00', '2020-07-09 23:30:00+00:00'],\n              dtype='datetime64[ns, UTC]', name='datetime', length=336, freq=None)] are in the [index]"
fig, ax = plt.subplots(dpi=150)

for year in [2017, 2018]:
    start_date = f'{year}-12-18'
    end_date = f'{year}-12-24 23:59'

    s_discharge = discharge.construct_discharge_s(df.loc[start_date:end_date, 'demand_MW'])
    plt.plot(s_discharge.iloc[:48*7].values, label=f'{year}')

plt.plot(s_discharge_profile.iloc[:48*7].values, linestyle='--', label='2019 Prediction')

plt.legend(frameon=False, bbox_to_anchor=(1, 1))
hlp.hide_spines(ax)
for year in [2017, 2018]:
    start_date = f'{year}-12-18'
    end_date = f'{year}-12-24 23:59'

    s_discharge = discharge.construct_discharge_s(df.loc[start_date:end_date, 'demand_MW'])
    plt.plot(s_discharge.iloc[:48*7].values)