forked from pdebench/PDEBench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_diff_sorp.py
122 lines (93 loc) · 3.66 KB
/
gen_diff_sorp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
import os
import dotenv
# load environment variables from `.env` file if it exists
# recursively searches for `.env` in all folders starting from work dir
# this allows us to keep defaults local to the machine
# e.g. HPC versus local laptop
dotenv.load_dotenv()
import time
# or if the environment variables will be fixed for all executions, we can hard-code the environment variables like this:
num_threads = "4"
os.environ["OMP_NUM_THREADS"] = num_threads
os.environ["MKL_NUM_THREADS"] = num_threads
os.environ["OPENBLAS_NUM_THREADS"] = num_threads
os.environ["VECLIB_MAXIMUM_THREADS"] = num_threads
os.environ["NUMEXPR_NUM_THREADS"] = num_threads
import dotenv
import hydra
from hydra.utils import get_original_cwd
from omegaconf import DictConfig, OmegaConf
import logging
import multiprocessing as mp
from itertools import repeat
import numpy as np
from src import utils
import h5py
from uploader import dataverse_upload
log = logging.getLogger(__name__)
def simulator(config, i):
from src import sim_diff_sorp
config.sim.seed = i
log.info(f"Starting seed {i}")
start_time = time.time()
sim_obj = sim_diff_sorp.Simulator(**config.sim)
data_sample = sim_obj.generate_sample()
duration = time.time() - start_time
log.info(f"Seed {config.sim.seed} took {duration} to finish")
seed_str = str(i).zfill(4)
with h5py.File(utils.expand_path(config.output_path), "a") as data_f:
## Chunking for compression and data access
## https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage
## should be by batch and less than 1MB
## lzf compression for float32 is kind of pointless though.
data_f.create_dataset(
f"{seed_str}/data", data=data_sample, dtype="float32", compression="lzf"
)
data_f.create_dataset(
f"{seed_str}/grid/x", data = sim_obj.x, dtype="float32", compression="lzf"
)
data_f.create_dataset(
f"{seed_str}/grid/t", data=sim_obj.t, dtype="float32", compression="lzf"
)
data_f.attrs[f"{seed_str}/config"] = OmegaConf.to_yaml(config)
@hydra.main(config_path="configs/", config_name="diff-sorp")
def main(config: DictConfig):
"""
use config specifications to generate dataset
"""
# Imports should be nested inside @hydra.main to optimize tab completion
# Read more here: https://github.com/facebookresearch/hydra/issues/934
# Change to original working directory to import modules
temp_path = os.getcwd()
os.chdir(get_original_cwd())
from src import utils
import h5py
# Change back to the hydra working directory
os.chdir(temp_path)
work_path = os.path.dirname(config.work_dir)
output_path = os.path.join(work_path, config.data_dir, config.output_path)
if not os.path.isdir(output_path):
os.makedirs(output_path)
config.output_path = os.path.join(output_path, config.output_path) + '.h5'
num_samples_init = 0
num_samples_final = 10000
pool = mp.Pool(mp.cpu_count())
seed = np.arange(num_samples_init, num_samples_final)
seed = seed.tolist()
pool.starmap(simulator, zip(repeat(config), seed))
if config.upload:
dataverse_upload(
file_path=config.output_path,
dataverse_url=os.getenv(
'DATAVERSE_URL', 'https://darus.uni-stuttgart.de'),
dataverse_token=os.getenv(
'DATAVERSE_API_TOKEN', ''),
dataverse_dir=config.name,
dataverse_id=os.getenv(
'DATAVERSE_ID', ''),
log=log)
return
import os
if __name__ == "__main__":
test = main()