diff --git a/workflow/scripts/osemosys_global/validation/climate_watch.py b/workflow/scripts/osemosys_global/validation/climate_watch.py new file mode 100644 index 00000000..dd2b4d5f --- /dev/null +++ b/workflow/scripts/osemosys_global/validation/climate_watch.py @@ -0,0 +1,40 @@ +"""Data handeling for Climate Watch validation + +https://www.climatewatchdata.org/ghg-emissions +""" + +import pandas as pd + +### +# public functions +### + + +def get_cw_emissions(csv_file: str, **kwargs) -> pd.DataFrame: + """Gets Climate Watch emissions data""" + df = _read_cw_data(csv_file) + return _format_cw_data(df) + + +### +# private functions +### + + +def _read_cw_data(csv_file: str) -> pd.DataFrame: + """Reads climate watch data + + https://www.climatewatchdata.org/ghg-emissions?end_year=2021&gases=all-ghg§ors=electricity-heat&start_year=1990 + """ + return pd.read_csv(csv_file, skipfooter=2, engine="python") + + +def _format_cw_data(cw: pd.DataFrame) -> pd.DataFrame: + df = cw.copy() + df = df.drop(columns=["Country/Region", "unit"]).rename(columns={"iso": "EMISSION"}) + df = df.melt(id_vars=["EMISSION"], var_name="YEAR", value_name="VALUE") + df = df.fillna(0).replace("false", 0) + df["YEAR"] = df.YEAR.astype(int) + df["VALUE"] = df.VALUE.astype(float) + df["REGION"] = "GLOBAL" + return df.set_index(["REGION", "EMISSION", "YEAR"]) diff --git a/workflow/scripts/osemosys_global/validation/ember.py b/workflow/scripts/osemosys_global/validation/ember.py index 871356e6..c30ee777 100644 --- a/workflow/scripts/osemosys_global/validation/ember.py +++ b/workflow/scripts/osemosys_global/validation/ember.py @@ -52,6 +52,11 @@ def get_ember_generation(csv_file: str, **kwargs) -> pd.DataFrame: return _format_ember_generation_data(df) +def get_ember_emissions(csv_file: str, **kwargs) -> pd.DataFrame: + df = _read_ember_data(csv_file) + return _format_ember_emission_data(df) + + ### # private functions ### @@ -100,3 +105,20 @@ def _format_ember_generation_data(ember: pd.DataFrame) -> pd.DataFrame: df["VALUE"] = df.VALUE.mul(3.6) df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]] return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum() + + +def _format_ember_emission_data(ember: pd.DataFrame) -> pd.DataFrame: + """Formats data into otoole compatiable data structure + + No unit conversion needed, as Ember emissions in mtCO2 + """ + + df = ember.copy() + + df = df[ + (df.Category == "Power sector emissions") & (df.Subcategory == "Total") + ].copy() + df["EMISSION"] = df.COUNTRY + df["REGION"] = "GLOBAL" + df = df[["REGION", "EMISSION", "YEAR", "VALUE"]] + return df.groupby(["REGION", "EMISSION", "YEAR"]).sum() diff --git a/workflow/scripts/osemosys_global/validation/main.py b/workflow/scripts/osemosys_global/validation/main.py index 849d78fd..cbb201d6 100644 --- a/workflow/scripts/osemosys_global/validation/main.py +++ b/workflow/scripts/osemosys_global/validation/main.py @@ -6,11 +6,12 @@ import pandas as pd from pathlib import Path -from utils import plot_gen_cap, format_og_data +from utils import plot_gen_cap, plot_emissions, format_rty_results, format_rey_results from functools import partial import eia import irena import ember +import climate_watch import logging @@ -26,19 +27,19 @@ def get_generation_funcs(datasource: str) -> dict[str, callable]: case "eia" | "EIA" | "Eia": return { "getter": eia.get_eia_generation, - "formatter": partial(format_og_data, mapper=eia.OG_GEN_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=eia.OG_GEN_NAME_MAPPER), "plotter": plot_gen_cap, } case "irena" | "IRENA" | "Irena": return { "getter": irena.get_irena_generation, - "formatter": partial(format_og_data, mapper=irena.OG_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=irena.OG_NAME_MAPPER), "plotter": plot_gen_cap, } case "ember" | "EMBER" | "Ember": return { "getter": ember.get_ember_generation, - "formatter": partial(format_og_data, mapper=ember.OG_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=ember.OG_NAME_MAPPER), "plotter": plot_gen_cap, } case _: @@ -50,25 +51,43 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]: case "eia" | "EIA" | "Eia": return { "getter": eia.get_eia_capacity, - "formatter": partial(format_og_data, mapper=eia.OG_CAP_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=eia.OG_CAP_NAME_MAPPER), "plotter": plot_gen_cap, } case "irena" | "IRENA" | "Irena": return { "getter": irena.get_irena_capacity, - "formatter": partial(format_og_data, mapper=irena.OG_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=irena.OG_NAME_MAPPER), "plotter": plot_gen_cap, } case "ember" | "EMBER" | "Ember": return { "getter": ember.get_ember_capacity, - "formatter": partial(format_og_data, mapper=ember.OG_NAME_MAPPER), + "formatter": partial(format_rty_results, mapper=ember.OG_NAME_MAPPER), "plotter": plot_gen_cap, } case _: raise KeyError +def get_emission_funcs(datasource: str) -> dict[str, callable]: + match datasource: + case "ember" | "EMBER" | "Ember": + return { + "getter": ember.get_ember_emissions, + "formatter": format_rey_results, + "plotter": plot_emissions, + } + case "climatewatch" | "climate_watch" | "ClimateWatch" | "Climatewatch": + return { + "getter": climate_watch.get_cw_emissions, + "formatter": format_rey_results, + "plotter": plot_emissions, + } + case _: + raise KeyError + + ### # entry point ### @@ -77,10 +96,10 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]: if "snakemake" in globals(): raise NotImplementedError else: - datasource = "eia" - variable = "capacity" + datasource = "climatewatch" + variable = "emissions" result_dir = "results/India/results" - data_file = "resources/data/validation/eia_capacity.json" + data_file = "resources/data/validation/climate-watch-emissions.csv" options = {} # options = {"iso_codes": "resources/data/validation/iso.csv"} @@ -95,6 +114,9 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]: elif variable == "capacity": og_result = "TotalCapacityAnnual" funcs = get_capacity_funcs(datasource) + elif variable == "emissions": + og_result = "AnnualEmissions" + funcs = get_emission_funcs(datasource) else: raise NotImplementedError @@ -107,11 +129,12 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]: except KeyError as e: actual = None modelled = None - logger.error(f"No validation for {variable} from {datasource}: \n{e}") + logger.error(f"No validation for {variable} from {datasource}") + raise KeyError(e) if isinstance(actual, pd.DataFrame) and isinstance(modelled, pd.DataFrame): - gen = funcs["plotter"](modelled, actual, variable, datasource) - for country, (fig, _) in gen.items(): + results = funcs["plotter"](modelled, actual, variable, datasource) + for country, (fig, _) in results.items(): p = Path(validation_results, country, variable) if not p.exists(): p.mkdir(parents=True) diff --git a/workflow/scripts/osemosys_global/validation/utils.py b/workflow/scripts/osemosys_global/validation/utils.py index 1ef30fd9..2dfa24de 100644 --- a/workflow/scripts/osemosys_global/validation/utils.py +++ b/workflow/scripts/osemosys_global/validation/utils.py @@ -4,27 +4,33 @@ from typing import Optional import matplotlib.pyplot as plt +def _join_data( + modelled: pd.DataFrame, actual: pd.DataFrame, dataset_name: Optional[str] = None +) -> pd.DataFrame: + """Joins modelled and actual data on indices""" + + if not dataset_name: + dataset_name = "ACTUAL" + + modelled = modelled.rename(columns={"VALUE": "OSeMOSYS"}) + actual = actual.rename(columns={"VALUE": dataset_name}) + df = modelled.join(actual) + + assert len(df.index.get_level_values("REGION").unique()) == 1 + + return df.droplevel("REGION") + +### +# plotters +### + def plot_gen_cap( modelled: pd.DataFrame, actual: pd.DataFrame, variable: str, dataset_name: Optional[str] = None, ) -> dict[str, tuple[plt.figure, plt.axes]]: - - def _join_data( - modelled: pd.DataFrame, actual: pd.DataFrame, dataset_name: Optional[str] = None - ) -> pd.DataFrame: - - if not dataset_name: - dataset_name = "ACTUAL" - - modelled = modelled.rename(columns={"VALUE": "OSeMOSYS"}) - actual = actual.rename(columns={"VALUE": dataset_name}) - df = modelled.join(actual) - - assert len(df.index.get_level_values("REGION").unique()) == 1 - - return df.droplevel("REGION") + """Plots generation and capacity data""" assert modelled.index.names == actual.index.names @@ -68,8 +74,49 @@ def _join_data( return data -def format_og_data(og: pd.DataFrame, mapper: dict[str, str]) -> pd.DataFrame: - """Formats OG results for comparison +def plot_emissions( + modelled: pd.DataFrame, + actual: pd.DataFrame, + variable: str, + dataset_name: Optional[str] = None, +) -> dict[str, tuple[plt.figure, plt.axes]]: + """Plots generation and capacity data""" + + assert modelled.index.names == actual.index.names + + if variable == "emissions": + units = "MT" + elif variable == "emission_intensity": + units = "T/kWh" + raise NotImplementedError + else: + raise ValueError( + f"Variable must be one of ['generation', 'capacity']. Recieved {variable}" + ) + + df = _join_data(modelled, actual, dataset_name).reset_index() + + data = {} + + countries = df.EMISSION.unique() # emission column holds country + for country in countries: + df_country = df[df.EMISSION == country].drop(columns=["EMISSION"]).set_index("YEAR") + fig, ax = plt.subplots(1, 1, figsize=(10, 5)) + title = f"{country} {variable.capitalize()}" + df_country.plot( + kind="bar", ax=ax, rot=45, title=title, xlabel="", ylabel=units + ) + + data[country] = (fig, ax) + + return data + +### +# formatters +### + +def format_rty_results(og: pd.DataFrame, mapper: dict[str, str]) -> pd.DataFrame: + """Formats OG results for comparison on region, tech, year Mapper is to group different technologies together, to match external dataset aggregation. @@ -91,4 +138,21 @@ def format_og_data(og: pd.DataFrame, mapper: dict[str, str]) -> pd.DataFrame: df = df.dropna(subset="CODE") df["TECHNOLOGY"] = df.CODE + df.COUNTRY df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]] - return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum() \ No newline at end of file + return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum() + +def format_rey_results(og: pd.DataFrame) -> pd.DataFrame: + """Formats OG results for comparison on region, emission, year + + Works on: + - AnnualEmissions + """ + + df = og.copy() + + if len(df.columns) == 1: + df = df.reset_index() + + # emission is used to track country + df = df[df.YEAR < 2023] + df["EMISSION"] = df.EMISSION.str[3:6] + return df.set_index(["REGION", "EMISSION", "YEAR"]) \ No newline at end of file