-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathpostprocess_for_tableau.py
922 lines (827 loc) · 66.7 KB
/
postprocess_for_tableau.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
# ReEDS-to-Tableau Postprocessing
#
# This script concatenates ReEDS csv outputs from a specified list of ReEDS scenarios to
# a new directory (specified as output_dir), creates pivot tables of the specified outputs
# using custom functions that can be defined for each parameter, and creates a Tableau .hyper
# extract file containing the pivotted tables for visualization and analysis in Tableau.
#
# Before running this script for the first time, you'll need to install the Tableau Hyper API via pip with:
# `pip install tableauhyperapi`
#
# Example call:
# python postprocess_for_tableau.py \
# -d '//nrelnas01/reeds/some_dir_containing_runs' \
# -r 'D:/mirish/projects/ReEDS-2.0' \
# -o '//nrelnas01/reeds/some_directory_containing_runs/testbatch_suite' \
# -s testbatch_refseq,testbatch_carbtax,testbatch,carbcap -\
# -p standard,plexos
import pandas as pd
import numpy as np
import os
import sys
from pathlib import Path
# Add bokehpivot and retail_rate_module to the path so we can grab some of their objects:
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / 'postprocessing' / 'bokehpivot'))
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / 'postprocessing' / 'retail_rate_module'))
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / 'postprocessing' / 'tableau'))
from pivot_definitions import PIVOT_DEFS, pivots_without_csvs
import core
from reeds2 import pre_systemcost, inflate_series
import bokeh.models.widgets as bmw #for setting dollar year widget in cost reporting
import reeds_bokeh
import retail_rate_calculations
from tableauhyperapi import HyperProcess, Telemetry, \
Connection, CreateMode, \
NULLABLE, SqlType, TableDefinition, \
escape_string_literal, \
HyperException
# Define a dict of column types mapping their names to data types and
# fancier names we'll use within Tableau for plotting.
# "val" is excluded here, which refers to the measured value column.
# Whatever's in the "label" column of tables_to_aggregate
# will be applied as the name and the column's assigned as a double.
column_types = {
"scenario": ["Scenario", SqlType.text()],
"i": ["Technology or Category", SqlType.text()],
"v": ["Class", SqlType.text()],
"h": ["Timeslice", SqlType.text()],
"r": ["Region", SqlType.text()],
"rf": ["From Region", SqlType.text()],
"rt": ["To Region", SqlType.text()],
"t": ["Year", SqlType.int()],
"bin": ["Bin", SqlType.text()],
"szn": ["Season", SqlType.text()],
"capture_types": ["Capture Type", SqlType.text()],
'cendiv': ["Census Division", SqlType.text()],
'country': ["Country", SqlType.text()],
'interconnect': ["Interconnection", SqlType.text()],
'rb': ["Balancing Area", SqlType.text()],
'st': ["State/Province", SqlType.text()],
'transreg': ["Transmission Region", SqlType.text()],
'usda_region': ["USDA (Biomass Supply Curve) Region", SqlType.text()],
"ortype": ["Reserve Type", SqlType.text()],
"trtype": ["Transmission Type", SqlType.text()],
"type": ["Type", SqlType.text()],
"subtype": ["Subtype", SqlType.text()],
"cost_cat_display": ["Cost Category", SqlType.text()],
"cost_cat": ["Cost Subcategory", SqlType.text()],
"stor_in_or_out": ["Energy Flow", SqlType.text()],
"price_component": ["Price Component or Credit", SqlType.text()],
"var_name": ["Variable", SqlType.text()],
"con_name": ["Constraint or Objective Coeffficient", SqlType.text()],
"process": ["Process", SqlType.text()],
"subprocess": ["Subprocess", SqlType.text()],
"starttime": ["Start Time (UTC)", SqlType.timestamp()],
"stoptime": ["Stop Time (UTC)", SqlType.timestamp()],
"machine": ["Computing Resource", SqlType.text()],
"repo": ["Repo Path", SqlType.text()],
"branch": ["Branch", SqlType.text()],
"commit": ["Commit Hash", SqlType.text()],
"p": ["Product", SqlType.text()],
"e": ["Emission Type", SqlType.text()],
"8760": ["Time and Date", SqlType.timestamp()],
"plexos_scenario": ["PLEXOS Scenario", SqlType.text()]
}
# Create a mapping between H5PLEXOS parameters as read in and as
# reported to Tableau:
plexos_param_names = {
'plexos_capacity':'PLEXOS Installed Capacity (MW)',
'plexos_generation':'PLEXOS Generation (MWh)',
'plexos_emissions':'na',
'plexos_availableenergy':'PLEXOS Available Energy (GWh)',
'plexos_pumpload':'PLEXOS Pump Load (MWh)',
'plexos_load':'PLEXOS Load (MWh)',
'plexos_losses':'PLEXOS Interregional Transmission Losses (MWh)',
'plexos_lmp':'PLEXOS LMP ($/MWh)',
'plexos_use':'PLEXOS Unserved Energy (MWh)'
}
def get_region_mapping(reeds_path):
"""
Assemble a spatial mapping file with geometry columns using various csvs
saved in the ReEDS repo.
"""
hier = pd.read_csv(Path(reeds_path,'inputs','hierarchy.csv'),header=0,index_col=False)
hier = hier.rename(columns={'*r':'r'})
hier['rb'] = hier['r']
# Pull in geometries for BA polygons and centroids/transmission endpoints
ba_polygons = pd.read_csv(Path(reeds_path,'inputs','shapefiles','US_CAN_MEX_PCA_polygons.csv'))
ba_centroids = pd.read_csv(Path(reeds_path,'inputs','shapefiles','US_transmission_endpoints_and_CAN_MEX_centroids.csv'))
hier = pd.merge(hier,ba_polygons[['WKT','rb']],on='rb',how='left')
hier = pd.merge(hier,ba_centroids,left_on='rb',right_on='ba_str',how='left')
hier = hier.rename(columns={i:column_types[i][0] for i in column_types})
hier = hier.rename(columns={'WKT_x':'BA Polygon Geometry',
'WKT_y':'BA Centroid Geometry'})
return hier
def merge_spatial_data(pivot,pivot_info,col_defs,region_mapping):
"""
Merge region_mapping into a pivot table natively.
This drastically increases the size of a table, so it's intended
for use only when an output csv is going to be used directly
instead of analyzed in Tableau, which can do merges on the fly.
"""
# Merge regional metadata columns where appropriate:
if 'r' in pivot_info['id_columns'] and pivot_name != 'region_mapping':
# Exclude i**** individual site supply regions from mapping table if none are present in the pivot table to speed up merges:
if pivot[pivot['r'].str.startswith('i')].empty:
this_region_mapping = region_mapping.loc[~region_mapping['r'].str.startswith('i'),:]
pivot = pivot.merge(this_region_mapping,how='left')
# Rearrange columns:
new_region_cols = this_region_mapping.drop('r',axis=1).columns.tolist()
pivot = pivot[pivot_info['id_columns'] + \
new_region_cols + \
pivot.drop(set(pivot_info['id_columns'] + new_region_cols),axis=1).columns.tolist()]
# Insert region mapping columns into column definitions:
col_defs = col_defs[0:len(pivot_info['id_columns'])] + \
[ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in new_region_cols if col not in ['BA Polygon Geometry','BA Centroid Geometry'] ] + \
[ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in new_region_cols if col in ['BA Polygon Geometry','BA Centroid Geometry'] ] + \
col_defs[len(pivot_info['id_columns']):]
elif 'st' in pivot_info['id_columns']:
this_region_mapping = region_mapping.drop(['r','Balancing Area'],axis=1).drop_duplicates()
pivot = pivot.merge(this_region_mapping,left_on='st',right_on='State/Province' ,how='left')
# Rearrange columns:
new_region_cols = this_region_mapping.drop('State/Province',axis=1).columns.tolist()
pivot = pivot[pivot_info['id_columns'] + \
new_region_cols + \
pivot.drop(set(pivot_info['id_columns'] + new_region_cols),axis=1).columns.tolist()]
# Insert region mapping columns into column definitions:
col_defs = col_defs[0:len(pivot_info['id_columns'])] + \
col_defs[len(pivot_info['id_columns']):]
elif 'rf' in pivot_info['id_columns']: #for transmission table
this_region_mapping = region_mapping.loc[~region_mapping['r'].str.get(0).isin(['s','i']),:] #exclude non-BA regions from merge
pivot = pivot.merge(region_mapping.add_prefix('From '),how='left',left_on='rf',right_on='From r')
pivot = pivot.merge(region_mapping.add_prefix('To '),how='left',left_on='rt',right_on='To r')
pivot = pivot.drop(['From r','To r'],axis=1)
# Join on line geometries:
pivot = pivot.merge(line_geometries,how='left',left_on=['From Balancing Area','To Balancing Area'],right_on=['from_ba','to_ba'])
pivot = pivot.drop(['from_ba','to_ba'],axis=1)
pivot = pivot.rename(columns={'WKT':'Line Geometry'})
# Rearrange columns:
geometry_cols = ['From BA Polygon Geometry','From BA Centroid Geometry',
'To BA Polygon Geometry','To BA Centroid Geometry',
'Line Geometry']
new_region_cols = [ col for col in region_mapping.add_prefix('From ').drop('From r',axis=1).columns.tolist() if col not in geometry_cols ] + \
[ col for col in region_mapping.add_prefix('To ').drop('To r',axis=1).columns.tolist() if col not in geometry_cols ] + \
geometry_cols
pivot = pivot[pivot_info['id_columns'] + \
new_region_cols + \
pivot.drop(set(pivot_info['id_columns'] + new_region_cols),axis=1).columns.tolist()]
# Insert region mapping columns into column definitions:
col_defs = col_defs[0:len(pivot_info['id_columns'])] + \
[ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in new_region_cols if col not in geometry_cols ] + \
[ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in new_region_cols if col in geometry_cols ] + \
col_defs[len(pivot_info['id_columns']):]
return pivot, col_defs
def concatenate_csvs(this_csv,rel_path,col_name,scenarios,runs_dir):
"""
For the csv name input as a string, this_csv, grab the csv
from each ReEDS scenario in the list scenarios and concatenate
into a single table with a scenario column name and headers
specified in columname_defs.
"""
df_list = []
for this_scenario in scenarios:
# -- Add a scenario name and append this csv to the list --
try:
df = pd.read_csv(Path(runs_dir) / this_scenario / rel_path / ( this_csv + '.csv'))
except FileNotFoundError:
print(f'Scenario {this_scenario} did not create a csv for {this_csv}. Skipping the scenario.')
continue
if any(['Symbol not found:' in x for x in df.columns]):
print(f'Scenario {this_scenario} created a csv for {this_csv} but the variable did not exist in GAMS. Skipping the scenario.')
continue
df.insert(0,'Scenario',this_scenario)
# Reset set columns to account for different GAMS versions' naming conventions:
df.columns = ['Scenario'] + ['Dim' + str(c+1) for c in range(len(df.columns) - 2) ] + ['Val']
df_list.append(df)
# Concatenate all the dfs:
try:
full_df = pd.concat(df_list,axis=0)
except ValueError:
print(f'No scenarios have data for {this_csv}. Not adding it to the hyper file.')
raise ValueError
# Remove any "Undf" values representing years that didn't solve, and set any "Eps" instances to their intended value of zero:
full_df = full_df.rename(columns={'value':'Val'}) #rename valuestreams "value" to "val" if present--make more elegant later
full_df = full_df.loc[full_df['Val'] != 'Undf']
full_df.loc[full_df['Val'] == 'Eps'] = 0
#Rename the columns to their set names so they're not just "Dim1", "Dim2", etc.
full_df.columns = ['scenario'] + col_name
return full_df
def create_table_definitions(table_aggregation_csv_path):
"""
Create a dict of Tableau Hyper API objects which define all the tables to be created in the .hyper file.
returns:
path_dict, a dict of path strings whose keys are each csv to be aggregated
table_dict, a dict of Hyper API table def objects whose keys are each csv to be aggregated
columnnames_dict, a dict of column names for each concatenated csv
"""
csv_list = pd.read_csv(table_aggregation_csv_path)
csv_list = csv_list[csv_list.ignore!=1].reset_index() #ignore any csvs flagged as such
path_dict = {}
table_dict = {}
columnnames_dict = {}
for this_csv_idx in range(0,len(csv_list)):
csv_parameters = csv_list.loc[this_csv_idx]
path_dict[csv_parameters['csv']] = csv_parameters['path']
# Assemble the columns from those listed in the table aggregation csv
column_objects = []
column_objects.append(TableDefinition.Column('Scenario', SqlType.text(), NULLABLE)) #first column is always ReEDS scenario
for col in csv_parameters[6:]:
if not isinstance(col,str): #have run out of columns to add
break
elif col == 'val': #label the measure column according to what's specified in the table aggregation csv
column_objects.append(TableDefinition.Column(csv_parameters['label'], SqlType.double(), NULLABLE))
else: #regular column listed in column_types
try:
column_objects.append(TableDefinition.Column(column_types[col][0], column_types[col][1], NULLABLE))
except KeyError:
print('{} contains a column name whose type has not been defined in column_types.'.format(csv_parameters['csv']))
this_table = TableDefinition(
table_name=csv_parameters['csv'],
columns = column_objects
)
table_dict[csv_parameters['csv']] = this_table
these_columnnames = csv_parameters[6:]
these_columnnames[these_columnnames=='val'] = csv_parameters['label']
columnnames_dict[csv_parameters['csv']] = [ x for x in these_columnnames if str(x) != 'nan' ]
return path_dict, table_dict, columnnames_dict
def update_hyper_file_from_csv(table_def,csv_path,hyper_path,create_new=False):
"""
Load data from a csv into a Hyper file, creating a new Hyper file if directed to do so
"""
print(f"Loading data from {csv_path} into table in Tableau Hyper file at {hyper_path}")
create_mode_to_use = CreateMode.CREATE_AND_REPLACE if create_new else CreateMode.NONE
# Optional process parameters.
# They are documented in the Tableau Hyper documentation, chapter "Process Settings"
# (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/processsettings.html).
process_parameters = {
# Limits the number of Hyper event log files to two.
"log_file_max_count": "2",
# Limits the size of Hyper event log files to 100 megabytes.
"log_file_size_limit": "100M"
}
# Starts the Hyper Process with telemetry enabled to send data to Tableau.
# To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:
# Optional connection parameters.
# They are documented in the Tableau Hyper documentation, chapter "Connection Settings"
# (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/connectionsettings.html).
connection_parameters = {"lc_time": "en_US"}
# Creates new Hyper file if directed to.
with Connection(endpoint=hyper.endpoint,
database=hyper_path,
create_mode=create_mode_to_use,
parameters=connection_parameters) as connection:
connection.catalog.create_table_if_not_exists(table_definition=table_def)
# Using path to current file, create a path that locates CSV file packaged with these examples.
# path_to_csv = str(Path(__file__).parent / "data" / "customers.csv")
# Load all rows into the table from the CSV file.
# `execute_command` executes a SQL statement and returns the impacted row count.
#
# Note:
# You might have to adjust the COPY parameters to the format of your specific csv file.
# The example assumes that your columns are separated with the ',' character
# and that NULL values are encoded via the string 'NULL'.
# Also be aware that the `header` option is used in this example:
# It treats the first line of the csv file as a header and does not import it.
#
# The parameters of the COPY command are documented in the Tableau Hyper SQL documentation
# (https:#help.tableau.com/current/api/hyper_api/en-us/reference/sql/sql-copy.html).
count_in_table = connection.execute_command(
command=f"COPY {table_def.table_name} from {escape_string_literal(str(csv_path))} with "
f"(format csv, NULL 'NULL', delimiter ',', header)") #include ", header" in string if header is present to skip it
# print(f"The number of rows in table {table_def.table_name} is {count_in_table}.")
# print("The connection to the Hyper file has been closed.")
# print("The Hyper process has been shut down.")
def main(raw_args=None):
# -- Argument Block --
# parser = argparse.ArgumentParser(description="""This script concatenates csv outputs from specified ReEDS runs and outputs them as csvs and a Tableau Hyper extract file.""")
# parser.add_argument("-d","--runs_dir", help="full path to directory containing ReEDS runs")
# parser.add_argument("-r","--reeds_path", default=str(Path(__file__).resolve().parents[1]), help="full path to ReEDS repo")
# parser.add_argument("-o","--output_dir", help="name of new directory to create to house outputs within ReEDS-2.0/runs")
# parser.add_argument("-p","--pivot_dicts", type=lambda s: [x for x in s.split(',')], help="python list of keys to PIVOT_DEFS (in pivot_definitions.py) specifying which set of pivot tables to create")
# parser.add_argument("-s","--scenarios", type=lambda s: [x for x in s.split(',')], help="Python list of scenario names to include")
# parser.add_argument("-a","--all_scenarios", action='store_true', help="Flag to include all scenarios in runs_dir ")
# parser.add_argument("-dy","--dollar_year", type=str, default=str(reeds_bokeh.DEFAULT_DOLLAR_YEAR), help="desired dollar year for outputs (!!!!note: only works for bokehpivot system cost outputs currently. All else still in 2004$")
# args = parser.parse_args(raw_args)
# runs_dir = args.runs_dir
# reeds_path = args.reeds_path
# output_dir = args.output_dir
# pivot_dict = args.pivot_dicts
# scenarios = args.scenarios
# include_all_scenarios = args.all_scenarios
# DOLLAR_YEAR = args.dollar_year
# if include_all_scenarios and scenarios is not None:
# raise argparse.ArgumentTypeError("""
# Either -a can be specified to include all ReEDS scenarios
# containing outputs within runs_dir, or -s can be specified
# with a list of scenarios to include, but not both.
# """)
# elif include_all_scenarios:
# scenarios = os.listdir(runs_dir)
# if include_all_scenarios is None and any([ f for f in scenarios if not (Path(runs_dir) / f).is_dir() ]):
# print(f'Scenarios {[ f for f in scenarios if not (Path(runs_dir) / f).is_dir() ]} are not directories. Skipping them.')
# scenarios = [ f for f in scenarios if (Path(runs_dir) / f).is_dir() ]
# incomplete_scenarios = [ f for f in scenarios if 'outputs' not in os.listdir(Path(runs_dir) / f) ] #grab only directories that contain "outputs"
# incomplete_scenarios = [ f for f in scenarios if 'cap.csv' not in os.listdir(Path(runs_dir) / f / 'outputs') ] #check that outputs contains results
# if incomplete_scenarios:
# print(f'Scenarios {incomplete_scenarios} are incomplete. Skipping them.')
# scenarios = [ f for f in scenarios if f not in incomplete_scenarios ]
# # Test arguments:
# runs_dir = '//nrelnas01/reeds/FY21-EMRE-BeyondVRE/runs/v20210825' runs_dir = '//nrelnas01/reeds/FY21-EMRE-BeyondVRE/runs/v20220514'
# reeds_path = str(Path(__file__).resolve().parents[2]) runs_dir = r'D:\mirish\ReEDS-2.0\runs'
# output_dir = "v20210825_results" reeds_path = str(Path(__file__).resolve().parents[2])
# pivot_dicts = ['standard'] output_dir = "v20220514_tableaupr_results"
# scenarios = [ f for f in os.listdir(runs_dir) if 'x2' in f and 'outputs' in os.listdir(Path(runs_dir) / f) ] #grab this suite and only scens that contain "outputs" pivot_dicts = ['standard']
# scenarios = [ f for f in scenarios if 'cap.csv' in os.listdir(Path(runs_dir) / f / 'outputs') ] #check that outputs contains results scenarios = [ f for f in os.listdir(runs_dir) if 'outputs' in os.listdir(Path(runs_dir) / f) ] #grab this suite and only scens that contain "outputs"
# DOLLAR_YEAR = '2020'
# Test arguments: # Test arguments:
runs_dir = r'\\nrelnas01\ReEDS\Some Project\runs\Some Runs Folder' # file path to the folder that contains your runs
reeds_path = r'\\nrelnas01\ReEDS\Some Location\ReEDS-2.0' # file path to where your ReEDS repo is located
output_dir = r'\\nrelnas01\ReEDS\Some Location\tableau_report' # where you want the tableau results to output to
pivot_dicts = ['standard']
scenarios = [ f for f in os.listdir(runs_dir) ]
scenarios = [ f for f in scenarios if 'cap.csv' in os.listdir(Path(runs_dir) / f / 'outputs') ] #check that outputs contains results
# scenarios.remove('name of folder') quick way to remove a run if say, it failed
# scenarios = ['PTCFIX_v4_Mid_Case'] # quick test of only one scenario
DOLLAR_YEAR = '2020'
# Create results directory in runs_dir/runs if it doesn't exist:
output_path = (Path(runs_dir) / output_dir)
output_path.mkdir(parents=False,exist_ok=True)
# Redirect output to a log file within the output directory:
sys.stdout = open(output_path / 'tableau_postprocessing_log.txt', 'a')
sys.stderr = open(output_path /'tableau_postprocessing_log.txt', 'a')
# Internal defaults (only used for bokehpivot currently):
PV_YEAR = reeds_bokeh.DEFAULT_PV_YEAR
DISCOUNT_RATE = reeds_bokeh.DEFAULT_DISCOUNT_RATE
END_YEAR = reeds_bokeh.DEFAULT_END_YEAR
# -- Concatenate scenarios --
# Load in Tableau table definitions for all specified csvs:
csv_paths, table_defs, columnname_defs = create_table_definitions(Path(reeds_path,'postprocessing','tableau','tables_to_aggregate.csv'))
# -- Create pivot tables from concatenated csvs --
# Set bokehpivot internals needed for cost calculations:
core.GL['widgets'] = {'var_dollar_year': bmw.TextInput(title='Dollar Year', value=str(DOLLAR_YEAR), css_classes=['wdgkey-dollar_year', 'reeds-vars-drop'], visible=False),
'var_discount_rate': bmw.TextInput(title='Discount Rate', value=str(DISCOUNT_RATE), css_classes=['wdgkey-discount_rate', 'reeds-vars-drop'], visible=False),
'var_pv_year': bmw.TextInput(title='Present Value Reference Year', value=str(PV_YEAR), css_classes=['wdgkey-pv_year', 'reeds-vars-drop'], visible=False),
'var_end_year': bmw.TextInput(title='Present Value End Year', value=str(END_YEAR), css_classes=['wdgkey-end_year', 'reeds-vars-drop'], visible=False)}
# Read in regional mapping table as well as WKT line geometries:
region_mapping = get_region_mapping(reeds_path)
line_geometries = pd.read_csv(Path(reeds_path,'inputs','shapefiles','r_rr_lines_to_25_nearest_neighbors.csv')) #includes lines for 25 nearest neighbors--could include all but tabke would be 40k rows
cs_geometries = pd.read_csv(Path(reeds_path,'inputs','shapefiles','ctus_cs_polygons_BVRE.csv')) #storage formation polygons
cs_geometries = cs_geometries[['Formation','Formation Deposition','Formation Depth (ft)','Formation Thickness (ft)','Formation Basin','Formation Lithology','Formation CO2 Storage Capacity (MMT CO2)','Formation Centroid State','Formation Polygon Geometry']]
r_cs_spurline_geometries = pd.read_csv(Path(reeds_path,'inputs','shapefiles','ctus_r_cs_spurlines_200mi.csv')) #includes lines for 25 nearest neighbors--could include all but tabke would be 40k rows
r_cs_spurline_geometries['distance_m'] = r_cs_spurline_geometries['distance_m'] * 0.000621371 #m to mi
r_cs_spurline_geometries = r_cs_spurline_geometries.rename(columns={'WKT':'Spur Line Geometry',
'ba_str':'r',
'FmnID':'cs',
'distance_m':'Spur Line Length (mi)'})
# Read in table containing list of included csvs to get label values for each csv:
csv_list = pd.read_csv(Path(reeds_path,'postprocessing','tableau','tables_to_aggregate.csv'))
create_new = True #create a new .hyper file on the first iteration
for pivot_dict in pivot_dicts:
for pivot_name in PIVOT_DEFS[pivot_dict]:
pivot_info = PIVOT_DEFS[pivot_dict][pivot_name]
# Initialize the Tableau column definitions for the pivot table's ID columns:
col_defs = [ TableDefinition.Column(column_types[col][0], column_types[col][1], NULLABLE) for col in pivot_info['id_columns'] ] #probably need to manually change some of these names
pivot = pd.DataFrame(columns = pivot_info['id_columns'])
for this_csv, this_operation in zip(pivot_info['csvs'],pivot_info['operation']):
print(f'Processing {this_csv}')
if pivot_name not in pivots_without_csvs: #tables that aren't in tables_to_aggregate are created in the switches below
try:
this_df = concatenate_csvs(this_csv,csv_paths[this_csv],columnname_defs[this_csv],scenarios,runs_dir)
except ValueError:
continue
if this_df.empty:
print(f'The concatenated csv for {this_csv} is empty. Skipping that csv for {pivot_name}.')
continue
else:
this_df = pd.DataFrame()
# Custom table manipulations below:
this_col_list = []
if this_df.empty and pivot_name not in pivots_without_csvs:
print(f'Warning: {this_csv} is empty. Still adding a column of NULL values to the pivot table.')
elif this_csv == 'prod_load':
this_df['i'] = 'Production Load'
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv in ['prod_h2_price','prod_h2ct_cost']:
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='p', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
if this_csv == 'prod_h2_price':
this_col_list = {'DAC':'Direct Air CO2 Capture Price (2004 $/metric ton)',
'H2_blue':'Blue Hydrogen Price (2004 $/metric ton)',
'H2_green':'Green Hydrogen Price (2004 $/metric ton)'}
elif this_csv == 'prod_h2ct_cost':
this_col_list = {'H2_blue':'Blue Hydrogen Fuel Price (2004 $/MMBtu)',
'H2_green':'Green Hydrogen Fuel Price (2004 $/MMBtu)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'stor_inout':
this_df = this_df.loc[this_df['stor_in_or_out']=='IN'].drop(['stor_in_or_out'],axis=1)
this_df = this_df.groupby(pivot_info['id_columns'],as_index=False).sum()
this_df = this_df.rename(columns={'Storage Operation (MWh)':'Storage Charging and Pumping (MWh)'})
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv in ['opRes_supply_h','opRes_supply']:
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='ortype', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'reg':'Reserve Supply - Reg (MWh-h)',
'spin':'Reserve Supply - Spin (MW-h)',
'flex':'Reserve Supply - Flex (MW-h)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'opres_trade':
this_df['trtype'] = np.nan
this_df['trtype'] = this_df['trtype'].astype(str)
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='ortype', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'reg':'Reserve Trade - Reg (MW-h)',
'spin':'Reserve Trade - Spin (MW-h)',
'flex':'Reserve Trade - Flex (MW-h)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv in ['emit_irt','emit_r']:
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='e', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'CO2':'CO2 Emissions from Generation (MMT)',
'SO2':'SO2 Emissions from Generation (MMT)',
'NOX':'NOX Emissions from Generation (MMT)',
'CH4':'CH4 Emissions from Generation (MMT)',
'CO2e':'CO2e Emissions from Generation (MMT)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'reqt_price':
this_df['type_subtype'] = this_df['type'] + '_' + this_df['subtype']
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='type_subtype', aggfunc=np.mean).droplevel(0,axis=1).reset_index()
this_col_list = {'annual_cap_CO2':'CO2 Cap Compliance Price (2004 $)',
'load_na':'Energy Price (2004 $/MWh)',
'oper_res_flex':'Reserve Price - Flex (2004 $/MW-h)',
'oper_res_reg':'Reserve Price - Reg (2004 $/MW-h)',
'oper_res_spin':'Reserve Price - Spin (2004 $/MW-h)',
'res_marg_ann_na':'Planning Reserve Compliance Price (2004 $)',
'res_marg_na':'duplicate',
'state_rps_CES':'State CES Compliance Price (2004 $)',
'state_rps_RPS_All':'State All-RPS Price (2004 $)',
'state_rps_RPS_Solar':'State Solar RPS Price (2004 $)'}
this_df = this_df.rename(columns=this_col_list)
this_df = this_df.drop(['duplicate'],axis=1)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'reqt_quant' and this_operation == 'custom':
this_df['type_subtype'] = this_df['type'] + '_' + this_df['subtype']
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='type_subtype', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'annual_cap_CO2':'CO2 Emissions Cap (Mt CO2)',
'load_na':'Hourly Load (MWh)',
'nat_gen_na':'CES Generation Requirement (MWh)',
'oper_res_flex':'Reserve Provision - Flex (MW-h)',
'oper_res_reg':'Reserve Provision - Reg (MW-h)',
'oper_res_spin':'Reserve Provision - Spin (MW-h)',
'res_marg_ann_na':'Planning Reserve Margin (%))',
'res_marg_na':'duplicate',
'state_rps_CES':'State CES Generation (MWh)',
'state_rps_RPS_All':'State All-RPS Generation (MWh))',
'state_rps_RPS_Solar':'State Solar RPS Generation (MWh)',
'state_rps_RPS_Wind':'State Wind RPS Generation (MWh)'}
this_df = this_df.rename(columns=this_col_list)
this_df = this_df.drop(['duplicate'],axis=1)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'emit_captured_r':
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='capture_types', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'co2_ccs':'CO2 Captured via CCS Generation (metric tons)',
'co2_smr-ccs':'CO2 Captured via SMR H2 Production (metric tons)',
'co2_dac':'CO2 Captured via DAC (metric tons)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'reqt_quant' and this_operation == 'load_only': #grab only timeslice load from reqt_quant
this_df = this_df.loc[this_df['type']=='load']
this_df['type_subtype'] = this_df['type'] + '_' + this_df['subtype']
this_df['i'] = 'Load'
this_df = this_df.pivot_table(index=pivot_info['id_columns'], columns='type_subtype', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'load_na':'Load (MWh)'}
this_df = this_df.rename(columns=this_col_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'systemcost_ba': #Special: we use bokehpivot preprocessing functions here for discounting, annualizing, and inflating
this_df_list = []
#Read in cost category mapping:
cost_cat_map = pd.read_csv(Path(reeds_path) / 'postprocessing' / 'bokehpivot' / 'in' / 'reeds2' / 'cost_cat_map.csv', header=0, names=['cost_cat','cost_cat_display'])
for this_scenario in scenarios:
# Keeping the table calculations below separate for each cost output (despite the code being a bit inefficient) to facilitate understanding of each cost calc.
# First, retrieve discounted and undiscounted annualized costs:
# (This is equivalent to bokehpivot's ReEDS preset for 'Sys Cost Annualized (Bil $)': presets 'Discounted by Year' and 'Undiscounted by Year')
this_sc = reeds_bokeh.df_to_lowercase(pd.read_csv(Path(runs_dir) / this_scenario / 'outputs' / 'systemcost_ba.csv', header=0, names=['cost_cat', 'r', 'year', 'Cost (Bil $)']))
this_sc = this_sc.loc[this_sc['Cost (Bil $)'] != 'Undf']
this_sc.loc[this_sc['Cost (Bil $)'] == 'Eps'] = 0
dfs = {'sc': this_sc,
'sw': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'switches.csv', header=None, names=['switch','value']),
'val_r': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'val_r.csv',header=None),
'df_capex_init': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'df_capex_init.csv'),
'crf': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'crf.csv', header=0, names=['year', 'crf']),
'scalars': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'scalars.csv', header=None, usecols=[0,1], names=['scalar', 'value'])}
this_ann_df = pre_systemcost(dfs,annualize=True,shift_capital=True,maintain_ba_index=True) #this func is from reeds2.py, altered to keep BA
this_ann_df = this_ann_df.merge(cost_cat_map,on='cost_cat',how='left')
this_ann_df = this_ann_df.rename(columns={'Cost (Bil $)':f'Sys Cost Annualized - Undiscounted (Bil {DOLLAR_YEAR}$)',
'Discounted Cost (Bil $)':f'Sys Cost Annualized - Discounted to {PV_YEAR} (Bil {DOLLAR_YEAR}$)'}) #these have already been inflated in pre_systemcost
# Second, retrieve discounted and undiscounted annual system costs with O&M costs truncated to just 2050 (not to 2070):
# (This is equivalent to bokehpivot's ReEDS preset for 'Sys Cost truncated at final year (Bil $)': presets 'Discounted by Year' and 'Undiscounted by Year')
this_sc = reeds_bokeh.df_to_lowercase(pd.read_csv(Path(runs_dir) / this_scenario / 'outputs' / 'systemcost_ba_bulk_ew.csv', header=0, names=['cost_cat', 'r', 'year', 'Cost (Bil $)']))
this_sc = this_sc.loc[this_sc['Cost (Bil $)'] != 'Undf']
this_sc.loc[this_sc['Cost (Bil $)'] == 'Eps'] = 0
dfs = {'sc': this_sc,
'sw': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'switches.csv', header=None, names=['switch','value']),
'scalars': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'scalars.csv', header=None, usecols=[0,1], names=['scalar', 'value'])}
this_trunc_df = pre_systemcost(dfs,shift_capital=True,maintain_ba_index=True)
this_trunc_df = this_trunc_df.merge(cost_cat_map,on='cost_cat',how='left')
this_trunc_df = this_trunc_df.rename(columns={'Cost (Bil $)':f'Sys Cost Truncated at Final Year - Undiscounted (Bil {DOLLAR_YEAR}$)',
'Discounted Cost (Bil $)':f'Sys Cost Truncated at Final Year - Discounted to {PV_YEAR} (Bil {DOLLAR_YEAR}$)'}) #these have already been inflated in pre_systemcost
# Third, retrieve discounted and undiscounted annual system costs WITHOUT truncating O&M costs to 2050 (allowing them to continue to 2070):
# (This is equivalent to bokehpivot's ReEDS preset for 'Sys Cost beyond final year (Bil $)': presets 'Discounted by Year' and 'Undiscounted by Year')
this_sc = reeds_bokeh.df_to_lowercase(pd.read_csv(Path(runs_dir) / this_scenario / 'outputs' / 'systemcost_ba_bulk.csv', header=0, names=['cost_cat', 'r', 'year', 'Cost (Bil $)']))
this_sc = this_sc.loc[this_sc['Cost (Bil $)'] != 'Undf']
this_sc.loc[this_sc['Cost (Bil $)'] == 'Eps'] = 0
dfs = {'sc': this_sc,
'sw': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'switches.csv', header=None, names=['switch','value']),
'scalars': pd.read_csv(Path(runs_dir) / this_scenario / 'inputs_case' / 'scalars.csv', header=None, usecols=[0,1], names=['scalar', 'value'])}
this_untrunc_df = pre_systemcost(dfs,shift_capital=True,maintain_ba_index=True)
this_untrunc_df = this_untrunc_df.merge(cost_cat_map,on='cost_cat',how='left')
this_untrunc_df = this_untrunc_df.rename(columns={'Cost (Bil $)':f'Sys Cost Beyond Final Year - Undiscounted (Bil {DOLLAR_YEAR}$)',
'Discounted Cost (Bil $)':f'Sys Cost Beyond Final Year - Discounted to {PV_YEAR} (Bil {DOLLAR_YEAR}$)'}) #these have already been inflated in pre_systemcost
this_scen_df = pd.merge(this_ann_df,this_trunc_df,on=['r','year','cost_cat_display','cost_cat'],how='outer')
this_scen_df = this_scen_df.merge(this_untrunc_df,on=['r','year','cost_cat_display','cost_cat'],how='outer')
this_scen_df.insert(0,'scenario',this_scenario)
this_scen_df = this_scen_df.rename(columns={'year':'t'})
this_scen_df = this_scen_df[ pivot_info['id_columns'] + [ col for col in this_scen_df.columns if col not in pivot_info['id_columns'] ]]
this_df_list.append(this_scen_df)
this_df = pd.concat(this_df_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'retail':
this_df_list = []
for this_scenario in scenarios:
if not (Path(runs_dir) / this_scenario / 'outputs' / 'retail' / 'retail_rate_components.csv').is_file():
print(f'Scenario {this_scenario} did not report retail rate module outputs.')
else:
this_scen_df = retail_rate_calculations.get_dfplot(run_dir=str(Path(runs_dir) / this_scenario), inputpath=str(Path(reeds_path) / 'postprocessing' / 'retail_rate_module' / 'inputs.csv'), plot_dollar_year=int(DOLLAR_YEAR),tableau_export=True)
this_scen_df = this_scen_df.rename(columns=retail_rate_calculations.tracelabels) #rename cost categories using the plotting dict in the retail rate module
this_scen_df = this_scen_df.rename(columns={'busbar_load':'Busbar Load (MWh)',
'end_use_load':'End-Use Load (MWh)',
'distPV_gen':'Distributed PV Generation (MWh)',
'retail_load':'Retail Load (MWh)',
'ptc_grossup':f'PTC Grossup ({DOLLAR_YEAR}¢/kWh)',
'itc_normalized_value':f'ITC Normalized Value ({DOLLAR_YEAR}¢/kWh)'})
this_scen_df_mwh = this_scen_df[['Busbar Load (MWh)','End-Use Load (MWh)','Distributed PV Generation (MWh)','Retail Load (MWh)']]
this_scen_df_mwh['price_component'] = "" #create a column of type object
this_scen_df_prices = this_scen_df.drop(['Busbar Load (MWh)','End-Use Load (MWh)','Distributed PV Generation (MWh)','Retail Load (MWh)'],axis=1)
this_scen_df_prices = this_scen_df_prices.melt(ignore_index=False, var_name = 'price_component', value_name=f'Retail Rate by Component ({DOLLAR_YEAR}¢/kWh)')
this_scen_df_incentives = this_scen_df[[f'PTC Grossup ({DOLLAR_YEAR}¢/kWh)',f'ITC Normalized Value ({DOLLAR_YEAR}¢/kWh)']]
this_scen_df_incentives = this_scen_df_incentives.melt(ignore_index=False, var_name = 'price_component', value_name=f'Tax Credit ({DOLLAR_YEAR}¢/kWh)')
this_scen_df = pd.merge(this_scen_df_prices,this_scen_df_incentives,on=['state','t','price_component'],how='outer')
this_scen_df = pd.merge(this_scen_df,this_scen_df_mwh,on=['state','t','price_component'],how='outer')
this_scen_df['price_component'] = this_scen_df['price_component'].replace("",np.nan)
this_scen_df = this_scen_df.reset_index()
this_scen_df = this_scen_df.rename(columns={'state':'st'})
this_scen_df.insert(0,'scenario',this_scenario)
this_df_list.append(this_scen_df)
if not this_df_list:
print('No scenarios reported retail rate module outputs. Skipping the retail_rates pivot table.')
continue
this_df = pd.concat(this_df_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif this_csv == 'region_mapping':
# Read in regional mapping table:
this_df = region_mapping.copy()
# # Exclude i**** individual site supply regions from mapping table if none are present in the scenarios being processed in order to speed up merges:
# if pivot[pivot['r'].str.startswith('i')].empty:
# this_region_mapping = region_mapping.loc[~region_mapping['r'].str.startswith('i'),:]
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
# Set the column definitions here rather than below, where all columns are assumed to be of type double.
this_col_def = [ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in this_col_list if col not in ['r','BA Polygon Geometry','BA Centroid Geometry']]
this_col_def = this_col_def + [ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in this_col_list if col in ['BA Polygon Geometry','BA Centroid Geometry']]
elif this_csv == 'line_mapping':
# Read in line geometries table:
this_df = line_geometries
this_df = this_df.rename(columns={'from_ba':'rf',
'to_ba':'rt',
'WKT':'Line Geometry'})
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
# Set the column definitions here rather than below, where all columns are assumed to be of type double.
this_col_def = [ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in this_col_list if col not in ['Line Geometry']]
this_col_def = this_col_def + [ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in this_col_list if col in ['Line Geometry']]
elif this_csv in region_mapping.columns:
this_df = pd.read_csv(Path(reeds_path,'inputs','shapefiles','WKT_csvs',(this_csv + '_WKT.csv')))
this_df = this_df.rename(columns={'WKT':column_types[this_csv][0] + " Geometry"})
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
this_df = this_df[[this_csv,column_types[this_csv][0] + " Geometry"]] #reorder to keep ID col first
# Set the column definitions here rather than below, where all columns are assumed to be of type double.
this_col_def = [TableDefinition.Column(column_types[this_csv][0] + " Geometry", SqlType.geography(), NULLABLE)]
elif this_csv == 'ctus_r_cs_mapping':
# Read in CO2 spur line geometry table:
this_df = r_cs_spurline_geometries
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
# Set the column definitions here rather than below, where all columns are assumed to be of type double.
this_col_def = [ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in this_col_list if col not in ['Spur Line Geometry']]
this_col_def = this_col_def + [ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in this_col_list if col in ['Spur Line Geometry']]
elif this_csv == 'ctus_cs_mapping':
# Read in CO2 storage formation geometry table:
this_df = cs_geometries
this_df = this_df.rename(columns={'Formation':'cs'})
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
# Set the column definitions here rather than below, where all columns are assumed to be of type double.
this_col_def = [ TableDefinition.Column(col, SqlType.text(), NULLABLE) for col in this_col_list if col not in ['Formation Polygon Geometry']]
this_col_def = this_col_def + [ TableDefinition.Column(col, SqlType.geography(), NULLABLE) for col in this_col_list if col in ['Formation Polygon Geometry']]
elif this_csv == 'meta':
this_df_list = []
for this_scenario in scenarios:
if not (Path(runs_dir) / this_scenario / 'meta.csv').is_file():
print(f'Scenario {this_scenario} did not report scenario metadata and solve times in metadata.csv.')
else:
if pivot_name == 'runtimes':
this_scen_df = pd.read_csv(Path(runs_dir) / this_scenario / 'meta.csv', header=3, names=['t','subprocess','starttime','stoptime','Processing Time (min)'])
this_scen_df['Processing Time (min)'] /= 60 #sec to min
this_scen_df.loc[this_scen_df['t']==0] = np.nan
this_scen_df.insert(1,'process',"")
this_scen_df = this_scen_df.loc[~this_scen_df['Processing Time (min)'].isnull()] #eliminate erroneous "end" subprocess in inputs processing
this_scen_df['process'] = this_scen_df['subprocess'].map({'createmodel.gms':'Input Processing',
'pickle_jar':'Augur',
'd_solveoneyear.gms':'ReEDS Solve',
'd_solveallyears.gms':'ReEDS Solve'})
this_scen_df.loc[this_scen_df['subprocess'].str.contains('ReEDS_Augur',case=False,na=False),'process'] = 'Augur'
this_scen_df.loc[this_scen_df['subprocess'].str.contains('input_processing',case=False,na=False),'process'] = 'Input Processing'
this_scen_df.loc[this_scen_df['subprocess'].str.contains('report',case=False,na=False),'process'] = 'Reporting (Postprocessing)'
this_scen_df.insert(0,'scenario',this_scenario)
this_df_list.append(this_scen_df)
elif pivot_name == 'metadata':
this_scen_df = pd.read_csv(Path(runs_dir) / this_scenario / 'meta.csv', header=0)
this_scen_df = this_scen_df.iloc[[0],:] #keep only the first row
this_scen_df.insert(0,'scenario',this_scenario)
this_df_list.append(this_scen_df)
if not this_df_list:
print(f'No scenarios reported metadata and runtime info in metadata.csv. Skipping the {pivot_name} pivot table.')
continue
this_df = pd.concat(this_df_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
elif 'plexos' in pivot_name:
from pivot.query import PLEXOSSolution
this_df_list = []
for this_scenario in scenarios:
this_plexos_dir = Path(runs_dir) / this_scenario / 'plexos_export' / 'solutions'
if not (this_plexos_dir).is_dir():
print(f'Scenario {this_scenario} did not report PLEXOS solutions.')
continue
else:
plexos_solution_parent_dirs = os.listdir(this_plexos_dir)
all_plexos_solution_paths = [ os.listdir(this_plexos_dir / x ) for x in plexos_solution_parent_dirs ]
plexos_solution_paths = []
for i,p in zip(range(len(all_plexos_solution_paths)),all_plexos_solution_paths):
plexos_solution_paths = plexos_solution_paths + [ (plexos_solution_parent_dirs[i],x) for x in p if '.h5' in x ]
## If the solutions are in a subfolder, use the following:
# # plexos_solution_paths = plexos_solution_paths + [ (plexos_solution_parent_dirs[i],x) for x in p if '.h5' in x ]
# for intermediary in os.listdir(this_plexos_dir / plexos_solution_parent_dirs[i]):
# if os.path.isdir(this_plexos_dir / plexos_solution_parent_dirs[i] / intermediary):
# all_soln = os.listdir(this_plexos_dir / plexos_solution_parent_dirs[i] / intermediary)
# plexos_solution_paths = plexos_solution_paths + [ (plexos_solution_parent_dirs[i],str( Path(intermediary) / x)) for x in all_soln if '.h5' in x ]
# First, get the generator--region mapping:
try:
with PLEXOSSolution( str(this_plexos_dir / plexos_solution_parent_dirs[0] / plexos_solution_paths[0][1]) ) as db:
gen_to_ba = db.relations["regions_generators"].to_frame().reset_index()
gen_to_ba.index = gen_to_ba['child']
gen_to_ba = gen_to_ba['parent']
# If we're querying emissions, we also need the generator-to-category (i) mapping:
if this_csv == 'plexos_emissions':
gen_to_i = db.objects["generators"].to_frame().reset_index()
gen_to_i.index = gen_to_i['name']
gen_to_i = gen_to_i['category']
except Exception: #try the next one if it exists
with PLEXOSSolution( str(this_plexos_dir / plexos_solution_parent_dirs[0] / plexos_solution_paths[1][1]) ) as db:
print(f'Querying first h5 from {plexos_solution_paths[1]} for BA mapping did not work. Trying the next one.')
gen_to_ba = db.relations["regions_generators"].to_frame().reset_index()
gen_to_ba.index = gen_to_ba['child']
gen_to_ba = gen_to_ba['parent']
# If we're querying emissions, we also need the generator-to-category (i) mapping:
if this_csv == 'plexos_emissions':
gen_to_i = db.objects["generators"].to_frame().reset_index()
gen_to_i.index = gen_to_i['name']
gen_to_i = gen_to_i['category']
# Only perform interval queries if we want interval data:
if '8760' in pivot_info['id_columns']:
timescale_required = 'interval'
else:
timescale_required = 'day' #otherwise need day, not year, so that we can still eliminate warm-start days
# Now, loop through solutions and grow a df:
this_scen_df_list = []
for this_soln in plexos_solution_paths:
try:
with PLEXOSSolution( str(this_plexos_dir / this_soln[0] / this_soln[1]) ) as db:
if this_csv == 'plexos_capacity':
df = db.generator("Installed Capacity", timescale=timescale_required)
elif this_csv == 'plexos_availableenergy':
df = db.generator("Available Energy", timescale=timescale_required)
elif this_csv == 'plexos_generation':
df = db.generator("Generation", timescale=timescale_required)
elif this_csv == 'plexos_load':
df = db.region("Load", timescale=timescale_required)
elif this_csv == 'plexos_losses':
df = db.region("Interregional Transmission Losses", timescale=timescale_required)
elif this_csv == 'plexos_pumpload':
df = db.generator("Pump Load", timescale=timescale_required)
elif this_csv == 'plexos_use':
df = db.region("Unserved Energy", timescale=timescale_required)
elif this_csv == 'plexos_lmp':
df = db.region("Price", timescale=timescale_required)
elif this_csv == 'plexos_emissions':
df = db.emissions_generators("Production", timescale=timescale_required)
except Exception:
print(f'HDF5 query hit an error for {this_soln[0]}/{this_soln[1]}')
pass
df = df.to_frame()
df.insert(0,'plexos_scenario',this_soln[0])
this_scen_df_list.append(df)
except Exception:
print(f'HDF5 query hit an error for {this_soln[0]}/{this_soln[1]}')
pass
this_scen_df = pd.concat(this_scen_df_list)
this_scen_df = this_scen_df.reset_index()
# Eliminate overlapping warm-start days at the beginning of each month:
this_scen_df = this_scen_df.groupby(list(this_scen_df.columns[:-1])).head(1)
if this_csv == 'plexos_capacity':
this_scen_df = this_scen_df.drop_duplicates() #for getting rid of repeated capacity values
this_scen_df['t'] = this_scen_df['timestamp'].dt.year
this_scen_df = this_scen_df.rename(columns={'category':'i',
'timestamp':'8760'})
# Map to BA and aggregate to state level if desired:
if this_csv in ['plexos_load','plexos_use','plexos_lmp']:
this_scen_df = this_scen_df.rename(columns={'name':'r',
'category':'st'})
elif 'r' in pivot_info['id_columns'] or 'st' in pivot_info['id_columns']:
if 'child' in this_scen_df.columns:
this_scen_df['r'] = this_scen_df['child'].map(gen_to_ba)
else:
this_scen_df['r'] = this_scen_df['name'].map(gen_to_ba)
if 'st' in pivot_info['id_columns']:
this_scen_df = this_scen_df.merge(region_mapping[['Balancing Area','State/Province']].drop_duplicates(),left_on='r',right_on='Balancing Area',how='left').drop(['r','Balancing Area'],axis=1)
this_scen_df = this_scen_df.rename(columns={'State/Province':'st'})
if this_csv == 'plexos_emissions':
this_scen_df['i'] = this_scen_df['child'].map(gen_to_i)
this_scen_df = this_scen_df[['plexos_scenario','parent','i','r','t',0]]
this_scen_df = this_scen_df.pivot_table(index=['plexos_scenario','i','r','t'], columns='parent', aggfunc=np.sum).droplevel(0,axis=1).reset_index()
this_col_list = {'CO2':'CO2 Emissions (metric tons)',
'SO2':'SO2 Emissions (metric tons)',
'NOX':'NOx Emissions (metric tons)'}
this_scen_df = this_scen_df.rename(columns=this_col_list)
else:
this_scen_df = this_scen_df[[c for c in pivot_info['id_columns'] if c != 'scenario'] + [0]]
if this_operation == 'sum':
this_scen_df = this_scen_df.groupby([c for c in pivot_info['id_columns'] if c != 'scenario']).sum().reset_index()
elif this_operation == 'mean':
this_scen_df = this_scen_df.groupby([c for c in pivot_info['id_columns'] if c != 'scenario']).mean().reset_index()
this_scen_df.columns = [c for c in pivot_info['id_columns'] if c != 'scenario'] + [plexos_param_names[this_csv]]
this_scen_df.insert(0,'scenario',this_scenario)
this_df_list.append(this_scen_df)
if not this_df_list:
print(f'No scenarios reported PLEXOS results for {this_csv}. Skipping that csv.')
continue
this_df = pd.concat(this_df_list)
this_col_list = [ x for x in this_df.columns if x not in pivot_info['id_columns'] ]
# For the rest of the csvs, just sum or take an average:
elif pivot_info['operation'][pivot_info['csvs'] == this_csv] == 'sum':
id_cols_in_df = [ col for col in pivot_info['id_columns'] if col in this_df.columns ]
this_df = this_df.groupby(id_cols_in_df,as_index=False).sum()
elif pivot_info['operation'][pivot_info['csvs'] == this_csv] == 'mean':
id_cols_in_df = [ col for col in pivot_info['id_columns'] if col in this_df.columns ]
this_df = this_df.groupby(id_cols_in_df,as_index=False).mean()
elif this_operation not in ['sum','mean']:
raise ValueError(f"""Operation "{this_operation}" is not implemented.""")
# Allow for csvs that don't contain all the pivot table's ID columns by creating those columns and filling them with NaNs:
id_cols_not_in_df = [ col for col in pivot_info['id_columns'] if col not in this_df.columns ]
if any(id_cols_not_in_df):
for this_col in id_cols_not_in_df:
this_df[this_col] = np.nan #need pandas Series of type object with np.nan values
this_df[this_col] = this_df[this_col].astype(object)
# Assign the pre-defined column to this_col_list if it hasn't been custom-made above:
if not this_col_list and pivot_name != 'metadata':
this_col_list = [ csv_list.loc[(csv_list['ignore']!=1) & (csv_list['csv']==this_csv),'label'].to_list()[0] ]
# Up until now, any outputs in units of $ should be in 2004$.
# Unforgivably hacky--Anywhere there's a "2004" in a column name, convert the dollar year to the one desired:
if any('2004' in s for s in this_col_list):
for s in [x for x in this_col_list if '2004' in x]:
this_df[s] = inflate_series(this_df[s]) #this bokehpivot function uses the dollar year DOLLAR_YEAR, via core.GL (set above)
this_col_list = [ {s:s.replace('2004',DOLLAR_YEAR)}.get(x, x) for x in this_col_list ]
this_df = this_df.rename(columns={s:s.replace('2004',DOLLAR_YEAR)})
# Exclude any extra columns and record the Tableau column definition(s) for this csv:
this_df = this_df[pivot_info['id_columns'] + this_col_list] #sort to expected order and exclude any extraneous columns
# The columns of region_mapping, line_mapping, and the geometry tables include geometry types are are set above:
if pivot_name not in ['region_mapping','line_mapping','ctus_r_cs_mapping','ctus_cs_mapping'] and this_csv not in region_mapping.columns:
this_col_def = [ TableDefinition.Column(col, SqlType.double(), NULLABLE) for col in this_col_list ]
col_defs = col_defs + this_col_def
pivot = pd.merge(pivot,this_df,how='outer',on=pivot_info['id_columns'])
# Optionally merge spatial data into the pivot.
# Only do this if the pivot csv exported is to be used independently
# and it needs regional data natively merged in
if False:
pivot, col_defs = merge_spatial_data(pivot,pivot_info,col_defs,region_mapping)
# Write to csv:
pivot.fillna('NULL').to_csv(output_path / (pivot_name + '.csv'),index=False) #have to fill NAs to NULL for direct writing to .hyper
# Write to .hyper file:
this_table_def = TableDefinition(
table_name=pivot_name,
columns = col_defs
)
try:
update_hyper_file_from_csv(this_table_def,output_path / (pivot_name + '.csv'),output_path / (output_dir + '_pivot.hyper'),create_new)
create_new = False
print(f'''Added {pivot_name} to {(output_dir + '_pivot.hyper')}.''')
except HyperException as ex:
print(ex)
print('Done.')
if __name__ == '__main__':
main()