Skip to content

Commit

Permalink
eia capacity
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorb1 committed Oct 14, 2024
1 parent 3145012 commit 6061119
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 169 deletions.
95 changes: 54 additions & 41 deletions workflow/scripts/osemosys_global/validation/eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,38 @@
"Other gases fossil fuel": "OTH",
}

OG_GEN_NAME_MAPPER = {
"BIO": "BIO",
"CCG": "GAS",
"COA": "COA",
"CSP": "SPV",
"HYD": "HYD",
"OCG": "GAS",
"OIL": "OIL",
"SPV": "SPV",
"TRN": None,
"URN": "URN",
"WON": "WND",
"WOF": "WND",
"WAV": "WAV",
}

OG_CAP_NAME_MAPPER = {
"BIO": "BIO",
"CCG": "FFS",
"COA": "FFS",
"CSP": "SPV",
"HYD": "HYD",
"OCG": "FFS",
"OIL": "FFS",
"SPV": "SPV",
"TRN": None,
"URN": "URN",
"WON": "WND",
"WOF": "WND",
"WAV": "WAV",
}

###
# public functions
###
Expand All @@ -58,40 +90,6 @@ def get_eia_generation(json_file: str, **kwargs) -> pd.DataFrame:
return _format_eia_generation_data(df)


def format_og_generation(prod_tech_annual: pd.DataFrame) -> pd.DataFrame:
"""Formats ProductionByTechnologyAnnual data for eia comparison"""

name_mapper = {
"BIO": "BIO",
"CCG": "GAS",
"COA": "COA",
"CSP": "SPV",
"HYD": "HYD",
"OCG": "GAS",
"OIL": "OIL",
"SPV": "SPV",
"TRN": None,
"URN": "URN",
"WON": "WND",
"WOF": "WND",
"WAV": "WAV",
}

df = prod_tech_annual.copy()

if len(df.columns) == 1:
df = df.reset_index()

df = df[(df.TECHNOLOGY.str.startswith("PWR")) & (df.YEAR < 2023)]
df["COUNTRY"] = df.TECHNOLOGY.str[6:9]
df["CODE"] = df.TECHNOLOGY.str[3:6]
df["CODE"] = df.CODE.map(name_mapper)
df = df.dropna(subset="CODE")
df["TECHNOLOGY"] = df.CODE + df.COUNTRY
df = df.drop(columns=["FUEL", "COUNTRY", "CODE"])
return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum()


###
# private functions
###
Expand All @@ -105,7 +103,12 @@ def _read_eia_data(json_file: str) -> pd.DataFrame:
df = pd.read_json(json_file)
df["name"] = df.name.map(lambda x: x.split(", ")[0])
df = df.explode(column="data")
df["year"] = df.data.map(lambda x: datetime.fromtimestamp(x["date"] / 1000).year)
# not sure why, but the 'datetime.fromtimestamp(x["date"] / 1000).year' call gives the
# next year rather than the correct one. ie. If I call the year 2020, 2021 values are
# returned. Thats why the extra '+1' at the end of the lambda
df["year"] = df.data.map(
lambda x: datetime.fromtimestamp(x["date"] / 1000).year + 1
)
df["VALUE"] = df.data.map(lambda x: x["value"])
df["VALUE"] = df.VALUE.fillna(0)
return df.drop(
Expand All @@ -114,19 +117,29 @@ def _read_eia_data(json_file: str) -> pd.DataFrame:


def _format_eia_capacity_data(eia: pd.DataFrame) -> pd.DataFrame:
"""Formats data into otoole compatiable data structure"""
"""Formats data into otoole compatiable data structure
Note, no unit conversion as capacity is already given in GW
"""

df = eia.copy()

df["name"] = df.name.map(
lambda x: x.split(" electricity installed capacity")[0]
).map(CAPACITY_MAPPER)
df["name"] = df.name + df.iso
df = df.drop(columns=["iso"])
df = df.groupby(["name", "year"], as_index=False).sum()
df["VALUE"] = (
df.VALUE.replace("NA", 0)
.replace("--", 0)
.replace("ie", 0)
.replace("(s)", 0)
.fillna(0)
.astype(float)
)
df = df.rename(columns={"name": "TECHNOLOGY", "year": "YEAR"})
df["REGION"] = "GLOBAL"
return df.set_index(["REGION", "TECHNOLOGY", "YEAR"])
df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]]
return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum()


def _format_eia_generation_data(eia: pd.DataFrame) -> pd.DataFrame:
Expand All @@ -147,10 +160,10 @@ def _format_eia_generation_data(eia: pd.DataFrame) -> pd.DataFrame:
.replace("NA", 0)
.astype(float)
)
df = df.groupby(["name", "year"], as_index=False).sum()
df = df.rename(columns={"name": "TECHNOLOGY", "year": "YEAR"})
df["REGION"] = "GLOBAL"
# billion kWh -> PJ
# 1B kWh = 1 TWh * (1PWh / 1000TWh) * (3600sec / hr) = 1 PWs = 1 PJ
df["VALUE"] = df.VALUE.mul(3.6)
return df.set_index(["REGION", "TECHNOLOGY", "YEAR"])
df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]]
return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum()
23 changes: 0 additions & 23 deletions workflow/scripts/osemosys_global/validation/ember.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,29 +52,6 @@ def get_ember_generation(csv_file: str, **kwargs) -> pd.DataFrame:
return _format_ember_generation_data(df)


def format_og_data(og: pd.DataFrame) -> pd.DataFrame:
"""Formats OG results for ember comparison
Works on:
- ProductionByTechnologyAnnual
- TotalCapacityAnnual
"""

df = og.copy()

if len(df.columns) == 1:
df = df.reset_index()

df = df[(df.TECHNOLOGY.str.startswith("PWR")) & (df.YEAR < 2023)]
df["COUNTRY"] = df.TECHNOLOGY.str[6:9]
df["CODE"] = df.TECHNOLOGY.str[3:6]
df["CODE"] = df.CODE.map(OG_NAME_MAPPER)
df = df.dropna(subset="CODE")
df["TECHNOLOGY"] = df.CODE + df.COUNTRY
df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]]
return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum()


###
# private functions
###
Expand Down
23 changes: 0 additions & 23 deletions workflow/scripts/osemosys_global/validation/irena.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,29 +60,6 @@ def get_irena_generation(csv_file: str, iso_codes: str, **kwargs) -> pd.DataFram
return _format_irena_generation_data(df)


def format_og_data(prod_tech_annual: pd.DataFrame) -> pd.DataFrame:
"""Formats OG results for irena comparison
Works on:
- ProductionByTechnologyAnnual
- TotalCapacityAnnual
"""

df = prod_tech_annual.copy()

if len(df.columns) == 1:
df = df.reset_index()

df = df[(df.TECHNOLOGY.str.startswith("PWR")) & (df.YEAR < 2023)]
df["COUNTRY"] = df.TECHNOLOGY.str[6:9]
df["CODE"] = df.TECHNOLOGY.str[3:6]
df["CODE"] = df.CODE.map(OG_NAME_MAPPER)
df = df.dropna(subset="CODE").copy()
df["TECHNOLOGY"] = df.CODE + df.COUNTRY
df = df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]]
return df.groupby(["REGION", "TECHNOLOGY", "YEAR"]).sum()


###
# private functions
###
Expand Down
94 changes: 12 additions & 82 deletions workflow/scripts/osemosys_global/validation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
"""

import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional
from pathlib import Path
from utils import plot_gen_cap, format_og_data
from functools import partial
import eia
import irena
import ember
Expand All @@ -16,76 +16,6 @@

logger = logging.getLogger(__name__)

###
# plotters
###


def plot_gen_cap(
modelled: pd.DataFrame,
actual: pd.DataFrame,
variable: str,
dataset_name: Optional[str] = None,
) -> dict[str, tuple[plt.figure, plt.axes]]:

def _join_data(
modelled: pd.DataFrame, actual: pd.DataFrame, dataset_name: Optional[str] = None
) -> pd.DataFrame:

if not dataset_name:
dataset_name = "ACTUAL"

modelled = modelled.rename(columns={"VALUE": "OSeMOSYS"})
actual = actual.rename(columns={"VALUE": dataset_name})
df = modelled.join(actual)

assert len(df.index.get_level_values("REGION").unique()) == 1

return df.droplevel("REGION")

assert modelled.index.names == actual.index.names

if variable == "generation":
units = "PJ"
elif variable == "capacity":
units = "GW"
else:
raise ValueError(
f"Variable must be one of ['generation', 'capacity']. Recieved {variable}"
)

df = _join_data(modelled, actual, dataset_name).reset_index()
df["TECH"] = df["TECHNOLOGY"].str[0:3]
df["COUNTRY"] = df["TECHNOLOGY"].str[3:]

data = {}

countries = df.COUNTRY.unique()
for country in countries:
df_country = df[df.COUNTRY == country]
years = df_country.YEAR.unique()
n_rows = len(years)
fig, axs = plt.subplots(n_rows, 1, figsize=(10, n_rows * 4))
for i, year in enumerate(years):
df_year = (
df_country[df_country.YEAR == year]
.drop(columns=["TECHNOLOGY", "YEAR", "COUNTRY"])
.set_index("TECH")
)
title = f"{country} {variable.capitalize()} in {year}"
if n_rows > 1:
ax = axs[i]
else:
ax = axs
df_year.plot(
kind="bar", ax=ax, rot=45, title=title, xlabel="", ylabel=units
)

data[country] = (fig, axs)

return data


###
# getters
###
Expand All @@ -96,19 +26,19 @@ def get_generation_funcs(datasource: str) -> dict[str, callable]:
case "eia" | "EIA" | "Eia":
return {
"getter": eia.get_eia_generation,
"formatter": eia.format_og_generation,
"formatter": partial(format_og_data, mapper=eia.OG_GEN_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case "irena" | "IRENA" | "Irena":
return {
"getter": irena.get_irena_generation,
"formatter": irena.format_og_data,
"formatter": partial(format_og_data, mapper=irena.OG_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case "ember" | "EMBER" | "Ember":
return {
"getter": ember.get_ember_generation,
"formatter": ember.format_og_data,
"formatter": partial(format_og_data, mapper=ember.OG_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case _:
Expand All @@ -120,19 +50,19 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]:
case "eia" | "EIA" | "Eia":
return {
"getter": eia.get_eia_capacity,
"formatter": eia.format_og_capacity,
"formatter": partial(format_og_data, mapper=eia.OG_CAP_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case "irena" | "IRENA" | "Irena":
return {
"getter": irena.get_irena_capacity,
"formatter": irena.format_og_data,
"formatter": partial(format_og_data, mapper=irena.OG_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case "ember" | "EMBER" | "Ember":
return {
"getter": ember.get_ember_capacity,
"formatter": ember.format_og_data,
"formatter": partial(format_og_data, mapper=ember.OG_NAME_MAPPER),
"plotter": plot_gen_cap,
}
case _:
Expand All @@ -147,12 +77,12 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]:
if "snakemake" in globals():
raise NotImplementedError
else:
datasource = "irena"
datasource = "eia"
variable = "capacity"
result_dir = "results/India/results"
data_file = "resources/data/validation/irena_capacity.csv"
data_file = "resources/data/validation/eia_capacity.json"
options = {}
options = {"iso_codes": "resources/data/validation/iso.csv"}
# options = {"iso_codes": "resources/data/validation/iso.csv"}

csv_results = Path(result_dir)
validation_results = Path(csv_results, "..", "validation")
Expand Down Expand Up @@ -180,7 +110,7 @@ def get_capacity_funcs(datasource: str) -> dict[str, callable]:
logger.error(f"No validation for {variable} from {datasource}: \n{e}")

if isinstance(actual, pd.DataFrame) and isinstance(modelled, pd.DataFrame):
gen = funcs["plotter"](modelled, actual, datasource)
gen = funcs["plotter"](modelled, actual, variable, datasource)
for country, (fig, _) in gen.items():
p = Path(validation_results, country, variable)
if not p.exists():
Expand Down
Loading

0 comments on commit 6061119

Please sign in to comment.