Merge pull request #197 from OSeMOSYS/demand-refactor

Demand module refactor
OSeMOSYS · Sep 21, 2024 · 2d745c4 · 2d745c4
2 parents 5b1ca0b + efa9f5b
commit 2d745c4
Show file tree

Hide file tree

Showing 13 changed files with 1,276 additions and 776 deletions.
diff --git a/workflow/rules/preprocess.smk b/workflow/rules/preprocess.smk
@@ -164,27 +164,53 @@ rule variable_costs:
     shell:
         'python workflow/scripts/osemosys_global/variablecosts.py 2> {log}'
 
+def demand_custom_csv() -> str:
+    if config["nodes_to_add"]:
+        return "resources/data/custom_nodes/specified_annual_demand.csv"
+    else:
+        return []
+
 rule demand_projections:
     message:
-        'Generating demand data...'
+        "Generating demand data..."
     input:
-        'resources/data/PLEXOS_World_2015_Gold_V1.1.xlsx',
-        'resources/data/iamc_db_GDPppp_Countries.xlsx',
-        'resources/data/iamc_db_POP_Countries.xlsx',
-        'resources/data/iamc_db_URB_Countries.xlsx',
-        'resources/data/iamc_db_POP_GDPppp_URB_Countries_Missing.xlsx',
-        'resources/data/T&D Losses.xlsx',
-        'resources/data/ember_yearly_electricity_data.csv'
+        plexos = "resources/data/PLEXOS_World_2015_Gold_V1.1.xlsx",
+        plexos_demand = "resources/data/All_Demand_UTC_2015.csv",
+        iamc_gdp ="resources/data/iamc_db_GDPppp_Countries.xlsx",
+        iamc_pop = "resources/data/iamc_db_POP_Countries.xlsx",
+        iamc_urb = "resources/data/iamc_db_URB_Countries.xlsx",
+        iamc_missing = "resources/data/iamc_db_POP_GDPppp_URB_Countries_Missing.xlsx",
+        td_losses = "resources/data/T&D Losses.xlsx",
+        ember = "resources/data/ember_yearly_electricity_data.csv",
+        custom_nodes = demand_custom_csv()
     params:
         start_year = config['startYear'],
         end_year = config['endYear'],
+        custom_nodes = config["nodes_to_add"]
     output:
-        csv_files = expand('results/data/{output_file}', output_file = demand_files),
-        figures = expand('results/data/../figs/Demand projection {demand_figure}.jpg', demand_figure = demand_figures),
+        csv_files = 'results/data/SpecifiedAnnualDemand.csv',
     log:
         log = 'results/logs/demand_projections.log'
-    shell:
-        'python workflow/scripts/osemosys_global/demand_projection.py 2> {log}'
+    script:
+        "../scripts/osemosys_global/demand/main.py"
+
+rule demand_projection_figures:
+    message:
+        "Generating demand figures..."
+    input:
+        plexos = "resources/data/PLEXOS_World_2015_Gold_V1.1.xlsx",
+        iamc_gdp ="resources/data/iamc_db_GDPppp_Countries.xlsx",
+        iamc_pop = "resources/data/iamc_db_POP_Countries.xlsx",
+        iamc_urb = "resources/data/iamc_db_URB_Countries.xlsx",
+        iamc_missing = "resources/data/iamc_db_POP_GDPppp_URB_Countries_Missing.xlsx",
+        ember = "resources/data/ember_yearly_electricity_data.csv"
+    output:
+        regression = 'results/figs/regression.png',
+        projection = 'results/figs/projection.png'
+    log:
+        log = 'results/logs/demand_projection_plot.log'
+    script:
+        "../scripts/osemosys_global/demand/figures.py"
 
 rule emissions:
     message:
@@ -208,7 +234,8 @@ rule max_capacity:
         'Generating capacity limits...'
     input:
         'resources/data/PLEXOS_World_MESSAGEix_GLOBIOM_Softlink.xlsx',
-        'results/data/ResidualCapacity.csv'
+        'results/data/ResidualCapacity.csv',
+        'results/data/SpecifiedAnnualDemand.csv'
     params:
         start_year = config['startYear'],
         end_year = config['endYear'],

diff --git a/workflow/scripts/osemosys_global/demand/__init__.py b/workflow/scripts/osemosys_global/demand/__init__.py
diff --git a/workflow/scripts/osemosys_global/demand/constants.py b/workflow/scripts/osemosys_global/demand/constants.py
@@ -0,0 +1,21 @@
+"""Constants for the demand module"""
+
+# Sets the spatial resolution for the regession, right now can only be applied to continental level by setting 'child_object'.
+# Note that not all countries have historical data available so country-level regression can't be applied for all.
+SPATIAL_RESOLUTION = "child_object"
+
+# Include urbanization as part of the regression? 'Yes' or 'No'
+URBANIZATION = "Yes"
+
+# Set which SSP data and sources are to be used for the country-level GDP|PPP and Population projects.
+PATHWAY = "SSP2"
+POP_COUNTRIES_SOURCE = "IIASA-WiC POP"  # Options are 'IIASA-WiC POP' and 'NCAR'
+GDP_PPP_COUNTRIES_SOURCE = "OECD Env-Growth"  # Options are 'IIASA GDP' and 'OECD Env-Growth'. 'OECD Env-Growth' has more entries.
+URB_COUNTRIES_SOURCE = "NCAR"  #'NCAR' is the only option.
+
+# Peak to total demand ratio
+PEAK_RATIO_FACTOR = 1
+
+# Projection range 
+START_YEAR = 2010
+END_YEAR = 2100
diff --git a/workflow/scripts/osemosys_global/demand/custom.py b/workflow/scripts/osemosys_global/demand/custom.py
@@ -0,0 +1,48 @@
+"""Custom Nodes logic"""
+
+import pandas as pd
+import itertools
+
+
+def _get_custom_demand_expected(
+    nodes: list[str], start_year: int, end_year: int
+) -> pd.DataFrame:
+    """Gets formatted expected custom data"""
+
+    years = range(start_year, end_year + 1)
+
+    df = pd.DataFrame(
+        list(itertools.product(nodes, years)), columns=["CUSTOM_NODE", "YEAR"]
+    )
+    df["REGION"] = "GLOBAL"
+    df["FUEL"] = "ELC" + df["CUSTOM_NODE"] + "02"
+
+    return df
+
+
+def import_custom_demand_data(csv: str) -> pd.DataFrame:
+    """Gets all custom demand data"""
+    return pd.read_csv(csv)
+
+
+def get_custom_demand_data(
+    all_custom: pd.DataFrame, nodes: list[str], start_year: int, end_year: int
+) -> pd.DataFrame:
+    """Gets merged custom demand data"""
+
+    expected = _get_custom_demand_expected(nodes, start_year, end_year)
+
+    df = pd.merge(expected, all_custom, how="left", on=["CUSTOM_NODE", "YEAR"])
+    df = df[["REGION", "FUEL", "YEAR", "VALUE"]]
+
+    return df
+
+
+def merge_default_custom_data(
+    default: pd.DataFrame, custom: pd.DataFrame
+) -> pd.DataFrame:
+    assert default.columns.equals(custom.columns)
+    df = pd.concat([default, custom], ignore_index=True)
+    df["VALUE"] = df["VALUE"].round(2)
+    df = df.drop_duplicates(keep="first", subset=["REGION", "FUEL", "YEAR"])
+    return df
diff --git a/workflow/scripts/osemosys_global/demand/data.py b/workflow/scripts/osemosys_global/demand/data.py
@@ -0,0 +1,211 @@
+"""Functions to extract relevent data"""
+
+import pandas as pd
+import wbgapi as wb
+from datetime import datetime
+
+from spatial import get_spatial_mapping_country
+from constants import (
+    POP_COUNTRIES_SOURCE,
+    GDP_PPP_COUNTRIES_SOURCE,
+    URB_COUNTRIES_SOURCE,
+    PATHWAY,
+    SPATIAL_RESOLUTION,
+    START_YEAR,
+    END_YEAR,
+)
+
+
+def get_nodal_plexos_demand(plexos: pd.DataFrame) -> pd.DataFrame:
+    """Gets historical nodal demand
+
+    Determines relative 2015 share of demand per sub-country node from
+    'All_Demand_UTC_2015.csv'
+    """
+
+    raw = plexos.copy()
+
+    # Sums the hourly demand as retrieved from the PLEXOS-World dataset to year total (in MWh) and drops all hourly values.
+    df = raw.drop(columns=["Datetime"])
+    df.loc["Node_Demand_2015"] = df.sum()
+    df = df.iloc[8760:]
+
+    # Transposes the dataframe and uses the original headers as column entry.
+    df = df.transpose().reset_index().rename(columns={"index": "PLEXOS_Nodes"})
+
+    # Adds country entry to dataframe (e.g. NA-USA-CA Node gets column entry NA-USA country)
+    df.insert(
+        loc=0,
+        column="Country",
+        value=df.PLEXOS_Nodes.str.split("-", expand=True)[1],
+    )
+
+    df.insert(loc=1, column="PLEXOS_Countries", value=df["PLEXOS_Nodes"].str[:6])
+
+    # Creates a dataframe excluding all sub-country nodes
+    country = (
+        df[df["PLEXOS_Countries"] == df["PLEXOS_Nodes"]]
+        .drop(columns=["PLEXOS_Nodes"])
+        .rename(columns={"Node_Demand_2015": "Country_Demand_2015"})
+    )
+
+    # Adds country-level 2015 demand in column adjacent to sub-country level 2015 demand
+    # and calculates relative share per country.
+
+    nodes = pd.merge(
+        df,
+        country[["PLEXOS_Countries", "Country_Demand_2015"]],
+        on="PLEXOS_Countries",
+        how="left",
+    )
+
+    nodes["Share_%_Country_Demand"] = (
+        nodes["Node_Demand_2015"] / nodes["Country_Demand_2015"]
+    )
+
+    return nodes
+
+
+def get_historical_gdp_ppp_wb(long: bool = True) -> pd.DataFrame:
+    """Gets historical GDPppp per capita from the World Bank API"""
+    df = _extract_wb("NY.GDP.PCAP.PP.KD")
+    if long:
+        return _longify_wb(df, "WB_GDPppp")
+    else:
+        return df
+
+
+def get_historical_urban_pop_wb(long: bool = True) -> pd.DataFrame:
+    """Gets historical Urban population (% of total population) from the World Bank API"""
+    df = _extract_wb("SP.URB.TOTL.IN.ZS")
+    if long:
+        return _longify_wb(df, "WB_Urb")
+    else:
+        return df
+
+
+def _extract_wb(column: str) -> pd.DataFrame:
+    """Extracts world bank data
+
+    2000 used as first year as EMBER dataset doesn't go back further.
+    """
+    return (
+        wb.data.DataFrame([column], mrv=datetime.now().year - 2000)
+        .reset_index()
+        .rename(columns={"economy": "Country"})
+        .set_index("Country")
+    )
+
+
+def _longify_wb(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
+    """Converts world bank data into long format
+
+    TODO: Change this to a melt function
+    """
+
+    dfs = []
+
+    for year in df.columns:
+        data = df[[year]].rename(columns={year: column_name})
+        data["Year"] = year.replace("YR", "")
+        dfs.append(data)
+
+    return pd.concat(dfs)
+
+
+def get_historical_ember_demand(ember: pd.DataFrame) -> pd.DataFrame:
+    """Gets historical ember electricity data per capita"""
+
+    df = ember.copy()
+
+    # Electricity demand per capita only
+    df = df[df.Variable == "Demand per capita"].drop(columns={"Variable"})
+
+    # Conversion to kWh
+    df["ember_Elec"] = df["ember_Elec"] * 1000
+    df["Year"] = df["Year"].astype("str")  #################### why
+
+    return df
+
+
+def _iamc_data_available(
+    iamc: pd.DataFrame, iamc_missing: pd.DataFrame, spatial_mapping: pd.DataFrame
+) -> bool:
+    """Checks whether pathway data is available for all included countries.
+
+    In case pathway data is not available for all countriesit checks wheter custom data is provided in 'iamc_db_POP_POP_Countries_Missing.xlsx'. If not, an error POPs up indicatingfor which countries data is missing. Data has to be manually added to project demand for all countries.
+    """
+
+    country_missing = spatial_mapping[(~spatial_mapping.index.isin(iamc.index))]
+
+    for x in country_missing.index:
+        if not x in iamc_missing.index:
+            return False
+    return True
+
+
+def get_iamc_data(
+    plexos: pd.DataFrame, iamc: pd.DataFrame, iamc_missing: pd.DataFrame, metric: str
+) -> pd.DataFrame:
+    """Gets full iamc data"""
+
+    if metric == "gdp":
+        model = GDP_PPP_COUNTRIES_SOURCE
+    elif metric == "pop":
+        model = POP_COUNTRIES_SOURCE
+    elif metric == "urb":
+        model = URB_COUNTRIES_SOURCE
+    else:
+        raise NotImplementedError
+
+    spatial_mapping = get_spatial_mapping_country(plexos)
+
+    df_original = iamc[
+        (iamc["Model"] == model) & (iamc["Scenario"] == PATHWAY)
+    ].set_index("Region")
+
+    if not _iamc_data_available(df_original, iamc_missing, spatial_mapping):
+        raise ValueError("Country data for is not available in custom dataset!")
+
+    df_missing = iamc_missing[(iamc_missing["Scenario"] == PATHWAY)]
+
+    df = pd.concat([df_original, df_missing])
+
+    # Filters data for relevant to be modelled countries
+    return pd.merge(
+        spatial_mapping[[SPATIAL_RESOLUTION]],
+        df,
+        left_index=True,
+        right_index=True,
+        how="inner",
+    )
+
+
+def format_for_writing(df: pd.DataFrame) -> pd.DataFrame:
+    """Formats projection to be written out as a CSV"""
+
+    def _extract_fuel(s: str) -> str:
+        if len(s) == 6:
+            return "ELC" + s[3:6] + "XX02"
+        elif len(s) == 9:
+            return "ELC" + s[3:6] + s[7:9] + "02"
+        else:
+            raise NotImplementedError
+
+    df = df.reset_index(drop=False)
+
+    assert "PLEXOS_Nodes" in df.columns
+
+    cols = list(range(START_YEAR, END_YEAR + 1))
+    cols.insert(0, "PLEXOS_Nodes")
+    df = df[cols]
+
+    df = df.melt(id_vars="PLEXOS_Nodes", var_name="YEAR", value_name="VALUE")
+
+    df["FUEL"] = df.PLEXOS_Nodes.map(lambda x: _extract_fuel(x))
+
+    df["VALUE"] = df.VALUE.mul(0.0036)  # MW -> PJ ?
+
+    df["REGION"] = "GLOBAL"
+
+    return df[["REGION", "FUEL", "YEAR", "VALUE"]]