-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
73 lines (61 loc) · 2.59 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Copyright 2022 The VDM Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os # nopep8
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0" # Disable TF info/warnings # nopep8
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
# os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "true"
# os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".95"
import jax
import tensorflow as tf
from absl import logging
from absl import flags
from absl import app
from ml_collections import config_flags
from utils import get_workdir
import experiment_vdm
FLAGS = flags.FLAGS
config_flags.DEFINE_config_file("config", None, "Training configuration.", lock_config=False)
flags.DEFINE_string("workdir", None, "Work unit directory.")
flags.DEFINE_string("checkpoint", "", "Checkpoint to evaluate.")
flags.DEFINE_string("mode", "train", "train / eval / sample")
flags.DEFINE_string("model", "vdm", "vdm")
flags.mark_flags_as_required(["config", "workdir"])
flags.DEFINE_string("log_level", "info", "info/warning/error")
def main(argv):
del argv
if jax.process_index() == 0:
logging.set_verbosity(FLAGS.log_level)
else:
logging.set_verbosity("error")
logging.warning("=== Start of main() ===")
# Hide any GPUs from TensorFlow. Otherwise TF might reserve memory and make
# it unavailable to JAX. (Not necessary with TPU.)
tf.config.experimental.set_visible_devices([], "GPU")
logging.info("JAX process: %d / %d", jax.process_index(), jax.process_count())
logging.info("JAX devices: %r", jax.devices())
if FLAGS.model == "vdm":
experiment = experiment_vdm.Experiment_VDM(FLAGS.config)
if FLAGS.mode == "train":
workdir = os.path.join(FLAGS.workdir, get_workdir())
logging.info("Training at workdir: " + FLAGS.workdir)
experiment.train_and_evaluate(workdir)
elif FLAGS.mode == "eval":
experiment.evaluate(FLAGS.workdir, FLAGS.checkpoint)
elif FLAGS.mode == "sample":
experiment.sample(FLAGS.workdir, FLAGS.checkpoint)
else:
raise Exception("Unknown FLAGS.mode")
if __name__ == "__main__":
jax.config.config_with_absl()
app.run(main)