Estimation of Price Surfaces¶

This notebook outlines how to specify different variants the model, then proceeds to fit them.

Imports¶

#exports
import pandas as pd
import numpy as np

import os
import pickle
from tqdm import tqdm

from moepy import lowess, eda

import seaborn as sns
import matplotlib.pyplot as plt

User Inputs¶

models_dir = '../data/models'
load_existing_model = True

Loading & Cleaning Data¶

We'll start by loading in ...

%%time

df_EI = eda.load_EI_df('../data/raw/electric_insights.csv')

df_EI.head()

Wall time: 1.69 s

local_datetime	day_ahead_price	SP	imbalance_price	valueSum	temperature	TCO2_per_h	gCO2_per_kWh	nuclear	coal	...	demand	pumped_storage	wind_onshore	wind_offshore	french	irish
2009-01-01 00:00:00+00:00	58.05	1	74.74	74.74	-0.6	21278	555	6.973	17.65	...	38.329	-0.404	nan	nan	1.977	-0.161
2009-01-01 00:30:00+00:00	56.33	2	74.89	74.89	-0.6	21442	558	6.968	17.77	...	38.461	-0.527	nan	nan	1.977	-0.16
2009-01-01 01:00:00+00:00	52.98	3	76.41	76.41	-0.6	21614	569	6.97	18.07	...	37.986	-1.018	nan	nan	1.977	-0.16
2009-01-01 01:30:00+00:00	50.39	4	37.73	37.73	-0.6	21320	578	6.969	18.022	...	36.864	-1.269	nan	nan	1.746	-0.16
2009-01-01 02:00:00+00:00	48.7	5	59	59	-0.6	21160	585	6.96	17.998	...	36.18	-1.566	nan	nan	1.73	-0.16

... and cleaning the GB data

df_EI_model = df_EI[['day_ahead_price', 'demand', 'solar', 'wind']].dropna()

s_demand = df_EI_model['demand']
s_price = df_EI_model['day_ahead_price']
s_dispatchable = df_EI_model['demand'] - df_EI_model[['solar', 'wind']].sum(axis=1)

# Plotting
fig, ax = plt.subplots(dpi=150)

ax.scatter(s_dispatchable['2010-09':'2011-03'], s_price['2010-09':'2011-03'], s=1)
ax.scatter(s_dispatchable['2020-03':'2020-09'], s_price['2020-03':'2020-09'], s=1)

eda.hide_spines(ax)
ax.set_xlim(8, 60)
ax.set_ylim(-25, 100)
ax.set_xlabel('Demand - [Wind + Solar] (MW)')
ax.set_ylabel('Price (Â£/MWh)')

Text(0, 0.5, 'Price (Â£/MWh)')

png

As well as the DE data

df_DE = eda.load_DE_df('../data/raw/energy_charts.csv', '../data/raw/ENTSOE_DE_price.csv')

df_DE_model = df_DE[['price', 'demand', 'Solar', 'Wind']].dropna()

s_DE_demand = df_DE_model['demand']
s_DE_price = df_DE_model['price']
s_DE_dispatchable = df_DE_model['demand'] - df_DE_model[['Solar', 'Wind']].sum(axis=1)

# Plotting
fig, ax = plt.subplots(dpi=150)

ax.scatter(s_DE_dispatchable['2015-09':'2016-03'], s_DE_price['2015-09':'2016-03'], s=1)
ax.scatter(s_DE_dispatchable['2020-03':'2020-09'], s_DE_price['2020-03':'2020-09'], s=1)

eda.hide_spines(ax)
ax.set_xlim(8, 75)
ax.set_ylim(-25, 100)
ax.set_xlabel('Demand - [Wind + Solar] (MW)')
ax.set_ylabel('Price (Â£/MWh)')

Text(0, 0.5, 'Price (Â£/MWh)')

png

Results Wrapper¶

We'll start defining each of the price models that we'll fit, using the PicklableFunction class to ensure that all of our models can be saved for later use.

#exports
import copy
import types
import marshal

class PicklableFunction:
    """Provides a wrapper to ensure functions can be pickled"""
    def __init__(self, fun):
        self._fun = fun

    def __call__(self, *args, **kwargs):
        return self._fun(*args, **kwargs)

    def __getstate__(self):
        try:
            return pickle.dumps(self._fun)
        except Exception:
            return marshal.dumps((self._fun.__code__, self._fun.__name__))

    def __setstate__(self, state):
        try:
            self._fun = pickle.loads(state)
        except Exception:
            code, name = marshal.loads(state)
            self._fun = types.FunctionType(code, {}, name)

        return

def get_fit_kwarg_sets(qs=np.linspace(0.1, 0.9, 9)):
    """Helper to generate kwargs for the `fit` method of `Lowess`"""
    fit_kwarg_sets = [
        # quantile lowess
        { 
            'name': f'p{int(q*100)}',
            'lowess_kwargs': {'reg_func': PicklableFunction(lowess.calc_quant_reg_betas)},
            'q': q,
        }
        for q in qs

        # standard lowess
    ] + [{'name': 'average'}] 

    return fit_kwarg_sets

model_definitions = {
    'DAM_price_GB': {
        'dt_idx': s_dispatchable.index,
        'x': s_dispatchable.values,
        'y': s_price.values,
        'reg_dates_start': '2009-01-01',
        'reg_dates_end': '2021-01-01',
        'reg_dates_freq': '13W', # 13 
        'frac': 0.3, 
        'num_fits': 31, # 31
        'dates_smoothing_value': 26, # 26
        'dates_smoothing_units': 'W',
        'fit_kwarg_sets': get_fit_kwarg_sets(qs=[0.16, 0.5, 0.84])
    },
    'DAM_price_demand_GB': {
        'dt_idx': s_demand.index,
        'x': s_demand.values,
        'y': s_price.values,
        'reg_dates_start': '2009-01-01',
        'reg_dates_end': '2021-01-01',
        'reg_dates_freq': '13W', # 13 
        'frac': 0.3, 
        'num_fits': 31, # 31
        'dates_smoothing_value': 26, # 26
        'dates_smoothing_units': 'W',
        'fit_kwarg_sets': get_fit_kwarg_sets(qs=[0.5])
    },
    'DAM_price_DE': {
        'dt_idx': s_DE_dispatchable.index,
        'x': s_DE_dispatchable.values,
        'y': s_DE_price.values,
        'reg_dates_start': '2015-01-04',
        'reg_dates_end': '2021-01-01',
        'reg_dates_freq': '13W', # 13 
        'frac': 0.3, 
        'num_fits': 31, # 31
        'dates_smoothing_value': 26, # 26
        'dates_smoothing_units': 'W',
        'fit_kwarg_sets': get_fit_kwarg_sets(qs=[0.16, 0.5, 0.84])
    },
    'DAM_price_demand_DE': {
        'dt_idx': s_DE_dispatchable.index,
        'x': s_DE_demand.values,
        'y': s_DE_price.values,
        'reg_dates_start': '2015-01-04',
        'reg_dates_end': '2021-01-01',
        'reg_dates_freq': '13W', # 13 
        'frac': 0.3, 
        'num_fits': 31, # 31
        'dates_smoothing_value': 26, # 26
        'dates_smoothing_units': 'W',
        'fit_kwarg_sets': get_fit_kwarg_sets(qs=[0.5])
    }
}

We'll now take these model definitions to fit and save them

#exports
def fit_models(model_definitions, models_dir):
    """Fits LOWESS variants using the specified model definitions"""
    for model_parent_name, model_spec in model_definitions.items():
        for fit_kwarg_set in tqdm(model_spec['fit_kwarg_sets'], desc=model_parent_name):
            run_name = fit_kwarg_set.pop('name')
            model_name = f'{model_parent_name}_{run_name}'

            if f'{model_name}.pkl' not in os.listdir(models_dir):
                smooth_dates = lowess.SmoothDates()

                reg_dates = pd.date_range(
                    model_spec['reg_dates_start'], 
                    model_spec['reg_dates_end'], 
                    freq=model_spec['reg_dates_freq']
                )

                smooth_dates.fit(
                    model_spec['x'], 
                    model_spec['y'], 
                    dt_idx=model_spec['dt_idx'], 
                    reg_dates=reg_dates, 
                    frac=model_spec['frac'], 
                    threshold_value=model_spec['dates_smoothing_value'], 
                    threshold_units=model_spec['dates_smoothing_units'],
                    num_fits=model_spec['num_fits'], 
                    **fit_kwarg_set
                )

                model_fp = f'{models_dir}/{model_name}.pkl'
                pickle.dump(smooth_dates, open(model_fp, 'wb'))

                del smooth_dates

fit_models(model_definitions, models_dir)

DAM_price_GB 100% 4/4 [00:00<00:00, 0.00s/it]

DAM_price_demand_GB 100% 2/2 [00:00<00:00, 0.00s/it]

DAM_price_DE 100% 4/4 [00:00<00:00, 0.00s/it]

DAM_price_demand_DE 100% 2/2 [00:00<00:00, 0.00s/it]

We'll load one of the models in

%%time

if load_existing_model == True:
    smooth_dates = pickle.load(open(f'{models_dir}/DAM_price_GB_p50.pkl', 'rb'))
else:
    lowess_kwargs = {}
    reg_dates = pd.date_range('2009-01-01', '2021-01-01', freq='13W')

    smooth_dates = lowess.SmoothDates()
    smooth_dates.fit(s_dispatchable.values, s_price.values, dt_idx=s_dispatchable.index, 
                     reg_dates=reg_dates, frac=0.3, num_fits=31, threshold_value=26, lowess_kwargs=lowess_kwargs)

Wall time: 2.7 s

And create a prediction surface using it

%%time

x_pred = np.linspace(8, 60, 521)
dt_pred = pd.date_range('2009-01-01', '2021-01-01', freq='1W')

df_pred = smooth_dates.predict(x_pred=x_pred, dt_pred=dt_pred)

df_pred.head()

Wall time: 346 ms

Unnamed: 0	2009-01-04	2009-01-11	2009-01-18	2009-01-25	2009-02-01	2009-02-08	2009-02-15	2009-02-22	2009-03-01	2009-03-08	...	2020-10-25	2020-11-01	2020-11-08	2020-11-15	2020-11-22	2020-11-29	2020-12-06	2020-12-13	2020-12-20	2020-12-27
8	-7.66001	-7.78927	-7.91081	-8.02572	-8.13481	-8.23875	-8.33813	-8.43345	-8.52519	-8.61382	...	10.2354	10.292	10.3476	10.4021	10.4557	10.5085	10.5611	10.6143	10.6691	10.7271
8.1	-7.46772	-7.59637	-7.71734	-7.83171	-7.94028	-8.04374	-8.14266	-8.23754	-8.32887	-8.41709	...	10.4429	10.4994	10.5548	10.6092	10.6627	10.7154	10.7679	10.821	10.8758	10.9336
8.2	-7.27561	-7.40364	-7.52404	-7.63785	-7.74592	-7.84889	-7.94734	-8.04178	-8.13268	-8.22049	...	10.6503	10.7066	10.7619	10.8162	10.8695	10.9222	10.9746	11.0276	11.0823	11.1401
8.3	-7.08366	-7.21108	-7.33089	-7.44416	-7.5517	-7.65418	-7.75217	-7.84616	-7.93663	-8.02403	...	10.8576	10.9138	10.9689	11.023	11.0763	11.1288	11.1812	11.2341	11.2888	11.3464
8.4	-6.89188	-7.01867	-7.13789	-7.25061	-7.35763	-7.45962	-7.55713	-7.65067	-7.74071	-7.82769	...	11.0648	11.1208	11.1757	11.2298	11.2829	11.3353	11.3876	11.4405	11.4951	11.5527