Skip to content

Exploratory Data Analysis

Binder

This notebook includes some visualisation and exploration of the price and fuel data for Germany and Great Britain


Imports

#exports
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.transforms as mtf
import seaborn as sns


Loading Data

#exports
def load_EI_df(EI_fp):
    """Loads the electric insights data and returns a DataFrame"""
    df = pd.read_csv(EI_fp)

    df['local_datetime'] = pd.to_datetime(df['local_datetime'], utc=True)
    df = df.set_index('local_datetime')

    return df
%%time

df = load_EI_df('../data/raw/electric_insights.csv')

df.head()
Wall time: 2.29 s
local_datetime day_ahead_price SP imbalance_price valueSum temperature TCO2_per_h gCO2_per_kWh nuclear biomass coal ... demand pumped_storage wind_onshore wind_offshore belgian dutch french ireland northern_ireland irish
2009-01-01 00:00:00+00:00 58.05 1 74.74 74.74 -0.6 21278 555 6.973 0 17.65 ... 38.329 -0.404 nan nan 0 0 1.977 0 0 -0.161
2009-01-01 00:30:00+00:00 56.33 2 74.89 74.89 -0.6 21442 558 6.968 0 17.77 ... 38.461 -0.527 nan nan 0 0 1.977 0 0 -0.16
2009-01-01 01:00:00+00:00 52.98 3 76.41 76.41 -0.6 21614 569 6.97 0 18.07 ... 37.986 -1.018 nan nan 0 0 1.977 0 0 -0.16
2009-01-01 01:30:00+00:00 50.39 4 37.73 37.73 -0.6 21320 578 6.969 0 18.022 ... 36.864 -1.269 nan nan 0 0 1.746 0 0 -0.16
2009-01-01 02:00:00+00:00 48.7 5 59 59 -0.6 21160 585 6.96 0 17.998 ... 36.18 -1.566 nan nan 0 0 1.73 0 0 -0.16


We'll do the same for the German Energy-Charts and ENTSOE data

#exports
def load_DE_df(EC_fp, ENTSOE_fp):
    """Loads the energy-charts and ENTSOE data and returns a DataFrame"""
    # Energy-Charts
    df_DE = pd.read_csv(EC_fp)

    df_DE['local_datetime'] = pd.to_datetime(df_DE['local_datetime'], utc=True)
    df_DE = df_DE.set_index('local_datetime')

    # ENTSOE
    df_ENTSOE = pd.read_csv(ENTSOE_fp)

    df_ENTSOE['local_datetime'] = pd.to_datetime(df_ENTSOE['local_datetime'], utc=True)
    df_ENTSOE = df_ENTSOE.set_index('local_datetime')

    # Combining data
    df_DE['demand'] = df_DE.sum(axis=1)

    s_price = df_ENTSOE['DE_price']
    df_DE['price'] = s_price[~s_price.index.duplicated(keep='first')]

    return df_DE
df_DE = load_DE_df('../data/raw/energy_charts.csv', '../data/raw/ENTSOE_DE_price.csv')

df_DE.head()
local_datetime Biomass Brown Coal Gas Hard Coal Hydro Power Oil Others Pumped Storage Seasonal Storage Solar Uranium Wind Net Balance demand price
2010-01-03 23:00:00+00:00 3.637 16.533 4.726 10.078 2.331 0 0 0.052 0.068 0 16.826 0.635 -1.229 53.657 nan
2010-01-04 00:00:00+00:00 3.637 16.544 4.856 8.816 2.293 0 0 0.038 0.003 0 16.841 0.528 -1.593 51.963 nan
2010-01-04 01:00:00+00:00 3.637 16.368 5.275 7.954 2.299 0 0 0.032 0 0 16.846 0.616 -1.378 51.649 nan
2010-01-04 02:00:00+00:00 3.637 15.837 5.354 7.681 2.299 0 0 0.027 0 0 16.699 0.63 -1.624 50.54 nan
2010-01-04 03:00:00+00:00 3.637 15.452 5.918 7.498 2.301 0.003 0 0.02 0 0 16.635 0.713 -0.731 51.446 nan


Stacked-Fuels Time-Series

We'll create a stacked plot of the different generation types over time. We'll begin by cleaning the dataframe and merging columns so that it's ready for plotting, we'll also take the 7-day rolling average to make long-term trends clearer.

#exports
def clean_EI_df_for_plot(df, freq='7D'):
    """Cleans the electric insights dataframe for plotting"""
    fuel_order = ['Imports & Storage', 'nuclear', 'biomass', 'gas', 'coal', 'hydro', 'wind', 'solar']
    interconnectors = ['french', 'irish', 'dutch', 'belgian', 'ireland', 'northern_ireland']

    df = (df
          .copy()
          .assign(imports_storage=df[interconnectors+['pumped_storage']].sum(axis=1))
          .rename(columns={'imports_storage':'Imports & Storage'})
          .drop(columns=interconnectors+['demand', 'pumped_storage'])
          [fuel_order]
         )

    df_resampled = df.astype('float').resample(freq).mean()
    return df_resampled
df_plot = clean_EI_df_for_plot(df)

df_plot.head()
local_datetime Imports & Storage nuclear biomass gas coal hydro wind solar
2009-01-01 00:00:00+00:00 -0.039018 5.76854 0 16.2951 20.1324 0.35589 0.390015 0
2009-01-08 00:00:00+00:00 -0.921768 5.5829 0 16.3811 21.6997 0.551753 1.15155 0
2009-01-15 00:00:00+00:00 -0.024241 5.55999 0 14.84 20.4463 0.704382 1.483 0
2009-01-22 00:00:00+00:00 0.18283 6.22841 0 14.4678 20.5907 0.562277 0.938827 0
2009-01-29 00:00:00+00:00 0.120204 6.79959 0 13.9657 21.3497 0.519632 1.36261 0


We'll also define the colours we'll use for each fuel-type

N.b. the colour palette used is from this paper

fuel_colour_dict_rgb = {
    'Imports & Storage' : (121,68,149), 
    'nuclear' : (77,157,87), 
    'biomass' : (168,125,81), 
    'gas' : (254,156,66), 
    'coal' : (122,122,122), 
    'hydro' : (50,120,196), 
    'wind' : (72,194,227), 
    'solar' : (255,219,65),
}


However we need to convert from rgb to matplotlib plotting colours (0-1 not 0-255)

#exports
def rgb_2_plt_tuple(r, g, b):
    """converts a standard rgb set from a 0-255 range to 0-1"""
    plt_tuple = tuple([x/255 for x in (r, g, b)])
    return plt_tuple

def convert_fuel_colour_dict_to_plt_tuple(fuel_colour_dict_rgb):
    """Converts a dictionary of fuel colours to matplotlib colour values"""
    fuel_colour_dict_plt = fuel_colour_dict_rgb.copy()

    fuel_colour_dict_plt = {
        fuel: rgb_2_plt_tuple(*rgb_tuple) 
        for fuel, rgb_tuple 
        in fuel_colour_dict_plt.items()
    }

    return fuel_colour_dict_plt
fuel_colour_dict = convert_fuel_colour_dict_to_plt_tuple(fuel_colour_dict_rgb)

sns.palplot(fuel_colour_dict.values())

png


Finally we can plot the stacked fuel plot itself

#exports
def hide_spines(ax, positions=["top", "right"]):
    """
    Pass a matplotlib axis and list of positions with spines to be removed

    Parameters:
        ax:          Matplotlib axis object
        positions:   Python list e.g. ['top', 'bottom']
    """
    assert isinstance(positions, list), "Position must be passed as a list "

    for position in positions:
        ax.spines[position].set_visible(False)

def stacked_fuel_plot(
    df, 
    ax=None, 
    save_path=None, 
    dpi=150,
    fuel_colour_dict = {
    'Imports & Storage' : rgb_2_plt_tuple(121,68,149), 
    'nuclear' : rgb_2_plt_tuple(77,157,87), 
    'biomass' : rgb_2_plt_tuple(168,125,81), 
    'gas' : rgb_2_plt_tuple(254,156,66), 
    'coal' : rgb_2_plt_tuple(122,122,122), 
    'hydro' : rgb_2_plt_tuple(50,120,196), 
    'wind' : rgb_2_plt_tuple(72,194,227), 
    'solar' : rgb_2_plt_tuple(255,219,65),
}
):
    """Plots the electric insights fuel data as a stacked area graph"""
    df = df[fuel_colour_dict.keys()]

    if ax == None:
        fig = plt.figure(figsize=(10, 5), dpi=dpi)
        ax = plt.subplot()

    ax.stackplot(df.index.values, df.values.T, labels=df.columns.str.capitalize(), linewidth=0.25, edgecolor='white', colors=list(fuel_colour_dict.values()))

    plt.rcParams['axes.ymargin'] = 0
    ax.spines['bottom'].set_position('zero')
    hide_spines(ax)

    ax.set_xlim(df.index.min(), df.index.max())
    ax.legend(ncol=4, bbox_to_anchor=(0.85, 1.15), frameon=False)
    ax.set_ylabel('Generation (GW)')

    if save_path:
        fig.savefig(save_path)

    return ax
stacked_fuel_plot(df_plot, dpi=250)
<AxesSubplot:ylabel='Generation (GW)'>

png

#exports
def clean_EC_df_for_plot(
    df_EC, 
    freq='7D', 
    fuel_order=['Imports & Storage', 'nuclear', 'biomass', 
                'gas', 'coal', 'hydro', 'wind', 'solar']
):
    """Cleans the electric insights dataframe for plotting"""

    df_EC_clean = (pd
                   .DataFrame(index=df_EC.index)
                   .assign(nuclear=df_EC['Uranium'])
                   .assign(biomass=df_EC['Biomass'])
                   .assign(gas=df_EC['Gas'])
                   .assign(coal=df_EC['Brown Coal']+df_EC['Hard Coal'])
                   .assign(hydro=df_EC['Hydro Power'])
                   .assign(wind=df_EC['Wind'])
                   .assign(solar=df_EC['Solar'])
                  )

    df_EC_clean['Imports & Storage'] = df_EC['Pumped Storage'] + df_EC['Seasonal Storage'] + df_EC['Net Balance']
    df_EC_clean = df_EC_clean[fuel_order].interpolate()

    df_EC_resampled = df_EC_clean.astype('float').resample(freq).mean()

    return df_EC_resampled
df_DE_plot = clean_EC_df_for_plot(df_DE)
stacked_fuel_plot(df_DE_plot, dpi=250)
<AxesSubplot:ylabel='Generation (GW)'>

png