Prediction & Confidence Intervals¶
This notebook outlines the calculation of the prediction and confidence intervals for the GB and DE price MOE models
Imports¶
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from moepy import lowess, eda
from moepy.surface import PicklableFunction
from ipypb import track
Great Britain¶
We'll start by loading and cleaning the data for GB
df_EI = eda.load_EI_df('../data/raw/electric_insights.csv')
df_EI_model = df_EI[['day_ahead_price', 'demand', 'solar', 'wind']].dropna()
s_price = df_EI_model['day_ahead_price']
s_dispatchable = df_EI_model['demand'] - df_EI_model[['solar', 'wind']].sum(axis=1)
We'll then calculate the estimate for the 68% prediction interval
def get_pred_intvl(low_q_fp, high_q_fp):
"""Calculates the prediction interval between the low and high quantile models specified"""
smooth_dates_low = pickle.load(open(low_q_fp, 'rb'))
smooth_dates_high = pickle.load(open(high_q_fp, 'rb'))
x_pred = np.linspace(3, 61, 581)
dt_pred = pd.date_range('2009-01-01', '2020-12-31', freq='1D')
df_pred_low = smooth_dates_low.predict(x_pred=x_pred, dt_pred=dt_pred)
df_pred_low.index = np.round(df_pred_low.index, 1)
df_pred_high = smooth_dates_high.predict(x_pred=x_pred, dt_pred=dt_pred)
df_pred_high.index = np.round(df_pred_high.index, 1)
df_pred_intvl = df_pred_high - df_pred_low
return df_pred_intvl
%%time
df_pred_68pct_intvl_GB = get_pred_intvl('../data/models/DAM_price_GB_p16.pkl', '../data/models/DAM_price_GB_p84.pkl')
df_pred_68pct_intvl_GB.head()
Wall time: 11.4 s
Unnamed: 0 | 2009-01-01 | 2009-01-02 | 2009-01-03 | 2009-01-04 | 2009-01-05 | 2009-01-06 | 2009-01-07 | 2009-01-08 | 2009-01-09 | 2009-01-10 | ... | 2020-12-22 | 2020-12-23 | 2020-12-24 | 2020-12-25 | 2020-12-26 | 2020-12-27 | 2020-12-28 | 2020-12-29 | 2020-12-30 | 2020-12-31 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | -4.77878 | -4.80147 | -4.82393 | -4.84614 | -4.86811 | -4.88982 | -4.91126 | -4.93241 | -4.95325 | -4.97378 | ... | 41.4778 | 41.4841 | 41.4904 | 41.4967 | 41.503 | 41.5093 | 41.5157 | 41.522 | 41.5284 | 41.5348 |
3.1 | -4.73778 | -4.76051 | -4.78301 | -4.80526 | -4.82727 | -4.84902 | -4.8705 | -4.89169 | -4.91257 | -4.93314 | ... | 41.3044 | 41.3107 | 41.317 | 41.3233 | 41.3296 | 41.3359 | 41.3422 | 41.3486 | 41.3549 | 41.3613 |
3.2 | -4.69656 | -4.71933 | -4.74186 | -4.76415 | -4.7862 | -4.80799 | -4.82951 | -4.85074 | -4.87167 | -4.89228 | ... | 41.1312 | 41.1375 | 41.1437 | 41.15 | 41.1563 | 41.1626 | 41.169 | 41.1753 | 41.1816 | 41.188 |
3.3 | -4.65507 | -4.67787 | -4.70044 | -4.72276 | -4.74485 | -4.76668 | -4.78824 | -4.80951 | -4.83047 | -4.85113 | ... | 40.9582 | 40.9645 | 40.9707 | 40.977 | 40.9833 | 40.9896 | 40.9959 | 41.0023 | 41.0086 | 41.0149 |
3.4 | -4.61326 | -4.63609 | -4.65869 | -4.68105 | -4.70317 | -4.72504 | -4.74664 | -4.76794 | -4.78895 | -4.80964 | ... | 40.7855 | 40.7918 | 40.798 | 40.8043 | 40.8106 | 40.8169 | 40.8232 | 40.8295 | 40.8358 | 40.8421 |
We can see that we get some quantile crossing at the extreme ends of the dispatch curve which is why some of our 68% interval values are negative, to counter this we'll weight our prediction interval by how often that part of the dispatch curve is where the price clears at.
s_pred_idx_weight = s_dispatchable.round(1).value_counts().sort_index()
dispatchable_gen_idxs = sorted(list(set(s_pred_idx_weight.index).intersection(df_pred_68pct_intvl_GB.index)))
pred_68pct_intvl = np.average(df_pred_68pct_intvl_GB.mean(axis=1).loc[dispatchable_gen_idxs], weights=s_pred_idx_weight.loc[dispatchable_gen_idxs])
print(f'The 68% prediction interval for GB is {round(pred_68pct_intvl, 2)} £/MWh')
The 68% prediction interval for GB is 16.32 £/MWh
We'll use our bootstrapping helper function to calculate the confidence interval of the GB model
%%capture
center_dts = pd.date_range(s_price.index.min(), s_price.index.max(), freq='3MS') + pd.Timedelta(days=45)
all_conf_intvl_95pct = []
for center_dt in track(center_dts):
s_price_subset = s_price[center_dt-pd.Timedelta(days=45):center_dt+pd.Timedelta(days=45)]
s_dispatchable_subset = s_dispatchable[center_dt-pd.Timedelta(days=45):center_dt+pd.Timedelta(days=45)]
df_bootstrap = lowess.bootstrap_model(s_price_subset.values, s_dispatchable_subset.values, num_runs=100, frac=0.3, num_fits=10)
conf_intvl_95pct = df_bootstrap.replace(0, np.nan).quantile([0.025, 0.975], axis=1).diff().dropna(how='all').mean(axis=1).iloc[0]
all_conf_intvl_95pct += [conf_intvl_95pct]
conf_intvl_95pct_GB = np.array(all_conf_intvl_95pct).mean()
print(f'The 95% confidence interval for GB is {round(conf_intvl_95pct_GB, 2)} £/MWh')
The 95% confidence interval for GB is 1.03 £/MWh
Germany¶
We'll start by loading and cleaning the data for DE
%%time
df_DE = eda.load_DE_df('../data/raw/energy_charts.csv', '../data/raw/ENTSOE_DE_price.csv')
df_DE_model = df_DE[['price', 'demand', 'Solar', 'Wind']].dropna()
s_DE_price = df_DE_model['price']
s_DE_demand = df_DE_model['demand']
s_DE_dispatchable = df_DE_model['demand'] - df_DE_model[['Solar', 'Wind']].sum(axis=1)
Wall time: 1.72 s
We'll then calculate the estimate for the 68% prediction interval
%%time
df_pred_68pct_intvl_DE = get_pred_intvl('../data/models/DAM_price_DE_p16.pkl', '../data/models/DAM_price_DE_p84.pkl')
s_pred_idx_weight = s_DE_dispatchable.round(1).value_counts().sort_index()
dispatchable_gen_idxs = sorted(list(set(s_pred_idx_weight.index).intersection(df_pred_68pct_intvl_DE.index)))
pred_68pct_intvl = np.average(df_pred_68pct_intvl_DE.mean(axis=1).loc[dispatchable_gen_idxs], weights=s_pred_idx_weight.loc[dispatchable_gen_idxs])
print(f'The 68% prediction interval for DE is {round(pred_68pct_intvl, 2)} EUR/MWh')
The 68% prediction interval for DE is 13.79 EUR/MWh
Wall time: 1.5 s
We'll use our bootstrapping helper function to calculate the confidence interval of the GB model
%%capture
center_dts = pd.date_range(s_DE_price.index.min(), s_DE_price.index.max(), freq='3MS') + pd.Timedelta(days=45)
all_conf_intvl_95pct = []
for center_dt in track(center_dts):
s_price_subset = s_DE_price[center_dt-pd.Timedelta(days=45):center_dt+pd.Timedelta(days=45)]
s_dispatchable_subset = s_DE_dispatchable[center_dt-pd.Timedelta(days=45):center_dt+pd.Timedelta(days=45)]
df_bootstrap = lowess.bootstrap_model(s_price_subset.values, s_dispatchable_subset.values, num_runs=100, frac=0.3, num_fits=10)
conf_intvl_95pct = df_bootstrap.replace(0, np.nan).quantile([0.025, 0.975], axis=1).diff().dropna(how='all').mean(axis=1).iloc[0]
all_conf_intvl_95pct += [conf_intvl_95pct]
conf_intvl_95pct_DE = np.array(all_conf_intvl_95pct).mean()
print(f'The 95% confidence interval for DE is {round(conf_intvl_95pct_DE, 2)} EUR/MWh')
The 95% confidence interval for DE is 1.69 EUR/MWh