-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathconfig_generator.py
226 lines (180 loc) · 11.1 KB
/
config_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import argparse
import os
from utils import utils
from reinforcement_learning.learning_algorithm import LearningAlgorithm
from reinforcement_learning.isa_qrm_algorithm import ISAAlgorithmBase, ISAAlgorithmQRM
from reinforcement_learning.isa_hrl_algorithm import ISAAlgorithmHRL
BASE_ISA_SEEDS = [25101993, 25041996, 31121960, 20091953, 30422020, 31071993, 11091714, 25071992, 1123581321, 31415926]
def _set_environment_config(config, args, environments, experiment_path):
config["environments"] = environments
config["task_generation_method"] = "random"
config["num_tasks"] = args.num_tasks
config["use_environment_seed"] = True
config["starting_environment_seed"] = args.seed
config["folder_names"] = [os.path.join(experiment_path, env) for env in environments]
def _set_gpu_config(config, use_gpu):
config[ISAAlgorithmBase.USE_GPU] = use_gpu
def _set_seed_config(config, isa_seed_value):
config[ISAAlgorithmBase.USE_SEED] = isa_seed_value is not None
if isa_seed_value is not None:
config[ISAAlgorithmBase.SEED_VALUE] = isa_seed_value
def _set_checkpoint_config(config, environments, multitask, experiment_path):
config[ISAAlgorithmBase.CHECKPOINT_ENABLE] = True
config[ISAAlgorithmBase.CHECKPOINT_FREQUENCY] = 1000 # int(config[LearningAlgorithm.NUM_EPISODES] / 10)
if multitask:
config[ISAAlgorithmBase.CHECKPOINT_FOLDER] = experiment_path
else:
config[ISAAlgorithmBase.CHECKPOINT_FOLDER] = os.path.join(experiment_path, environments[0])
def _set_interleaved_config(config, args):
config[ISAAlgorithmBase.INTERLEAVED_FIELD] = args.interleaved_learning
config[LearningAlgorithm.USE_COMPRESSED_TRACES] = args.use_compressed_traces
config[LearningAlgorithm.IGNORE_EMPTY_OBSERVATIONS] = args.ignore_empty_observations
if args.interleaved_learning:
config[ISAAlgorithmBase.ILASP_TIMEOUT_FIELD] = 7200
config[ISAAlgorithmBase.ILASP_VERSION_FIELD] = "2"
config[ISAAlgorithmBase.AVOID_LEARNING_ONLY_NEGATIVE] = args.avoid_learning_negative_only_formulas
config[ISAAlgorithmBase.USE_RESTRICTED_OBSERVABLES] = args.use_restricted_observables
config[ISAAlgorithmBase.MAX_DISJUNCTION_SIZE] = args.max_disj_size
config[ISAAlgorithmBase.LEARN_ACYCLIC_GRAPH] = args.learn_acyclic
if args.symmetry_breaking_method is not None:
config[ISAAlgorithmBase.SYMMETRY_BREAKING_METHOD] = args.symmetry_breaking_method
config[ISAAlgorithmBase.PRIORITIZE_OPTIMAL_SOLUTIONS] = args.prioritize_optimal_solutions
else:
config[ISAAlgorithmBase.INITIAL_AUTOMATON_MODE] = "target"
def _set_gridworld_rl_config(config, args):
config["hide_state_variables"] = True
if args.domain == "craftworld":
config["enforce_single_observable_per_location"] = True
config["height"] = 39
config["width"] = 39
config[LearningAlgorithm.DEBUG] = False
config[LearningAlgorithm.TRAIN_MODEL] = True
config[LearningAlgorithm.NUM_EPISODES] = 10000
config[LearningAlgorithm.MAX_EPISODE_LENGTH] = args.maximum_episode_length # 250 is good for officeworld
config[LearningAlgorithm.LEARNING_RATE] = 0.1
config[LearningAlgorithm.EXPLORATION_RATE] = 0.1
config[LearningAlgorithm.DISCOUNT_RATE] = 0.99
config[LearningAlgorithm.IS_TABULAR_CASE] = True
def _set_waterworld_rl_config(config, args):
config["random_restart"] = True
config[LearningAlgorithm.DEBUG] = False
config[LearningAlgorithm.TRAIN_MODEL] = True
config[LearningAlgorithm.NUM_EPISODES] = 50000
config[LearningAlgorithm.MAX_EPISODE_LENGTH] = args.maximum_episode_length # 100 can be good for waterworld (higher causes not focus on goal)
config[LearningAlgorithm.LEARNING_RATE] = 1e-5
config[LearningAlgorithm.EXPLORATION_RATE] = 0.1
config[LearningAlgorithm.DISCOUNT_RATE] = 0.9
config[LearningAlgorithm.IS_TABULAR_CASE] = False
config[LearningAlgorithm.GREEDY_EVALUATION_FREQUENCY] = 500
config[LearningAlgorithm.GREEDY_EVALUATION_EPISODES] = 10
config[ISAAlgorithmBase.USE_DOUBLE_DQN] = True
config[ISAAlgorithmBase.TARGET_NET_UPDATE_FREQUENCY] = 100
config[ISAAlgorithmBase.NUM_HIDDEN_LAYERS] = 4
config[ISAAlgorithmBase.NUM_NEURONS_PER_LAYER] = 64
config[ISAAlgorithmBase.USE_EXPERIENCE_REPLAY] = True
config[ISAAlgorithmBase.EXPERIENCE_REPLAY_BUFFER_SIZE] = 50000
config[ISAAlgorithmBase.EXPERIENCE_REPLAY_BATCH_SIZE] = 32
config[ISAAlgorithmBase.EXPERIENCE_REPLAY_START_SIZE] = 1000
def _set_algorithm_config(config, domain, algorithm, rl_guidance_method):
if algorithm == "qrm":
config[ISAAlgorithmQRM.USE_REWARD_SHAPING] = rl_guidance_method is not None
if rl_guidance_method is not None:
config[ISAAlgorithmQRM.REWARD_SHAPING_METHOD] = rl_guidance_method
elif algorithm == "hrl":
config[ISAAlgorithmHRL.USE_NUM_POSITIVE_MATCHINGS] = True
config[ISAAlgorithmHRL.ALWAYS_REUSE_QFUNCTION] = False
config[ISAAlgorithmHRL.UPDATE_ALL_POLICY_BANK] = False if domain == "waterworld" else True # very costly for DQN approach to update everything...
if rl_guidance_method is None:
config[ISAAlgorithmHRL.ENABLE_PSEUDOREWARD_ON_DEADEND] = False
config[ISAAlgorithmHRL.PSEUDOREWARD_AFTER_STEP] = 0.0
else:
config[ISAAlgorithmHRL.ENABLE_PSEUDOREWARD_ON_DEADEND] = True
config[ISAAlgorithmHRL.PSEUDOREWARD_AFTER_STEP] = -0.01
def _get_experiment_config(args, experiment_path, isa_seed_value, environments):
config = {}
_set_environment_config(config, args, environments, experiment_path)
if args.domain == "officeworld":
_set_gridworld_rl_config(config, args)
elif args.domain == "craftworld":
_set_gridworld_rl_config(config, args)
elif args.domain == "waterworld":
_set_waterworld_rl_config(config, args)
_set_gpu_config(config, args.use_gpu)
_set_seed_config(config, isa_seed_value)
_set_checkpoint_config(config, environments, args.multitask, experiment_path)
_set_interleaved_config(config, args)
_set_algorithm_config(config, args.domain, args.algorithm, args.rl_guidance_method)
return config
def _create_experiments(args, experiment_path, isa_seed_value, experiment_directories):
utils.mkdir(experiment_path)
environments = args.environments if args.environments is not None else _get_tasks(args.domain)
if args.multitask:
config = _get_experiment_config(args, experiment_path, isa_seed_value, environments)
utils.write_json_obj(config, os.path.join(experiment_path, "config.json"))
experiment_directories.append(experiment_path)
else:
for env in environments:
env_path = os.path.join(experiment_path, env)
utils.mkdir(env_path)
config = _get_experiment_config(args, experiment_path, isa_seed_value, [env])
utils.write_json_obj(config, os.path.join(env_path, "config.json"))
experiment_directories.append(env_path)
def _get_officeworld_tasks():
return ["coffee", "coffee-mail", "visit-abcd"]
def _get_craftworld_tasks():
return ["make-plank", "make-stick", "make-cloth", "make-rope", "make-bridge", "make-bed", "make-axe", "make-shears",
"get-gold", "get-gem"]
def _get_waterworld_tasks():
return ["water-rgb", "water-rg-b", "water-rg"]
def _get_tasks(domain):
if domain == "officeworld":
return _get_officeworld_tasks()
elif domain == "craftworld":
return _get_craftworld_tasks()
elif domain == "waterworld":
return _get_waterworld_tasks()
else:
raise RuntimeError("Error: Unknown domain '%s'." % domain)
def _generate_isa_random_seeds(num_runs):
isa_random_seeds = []
seed_sum = 0
for i in range(num_runs):
seed = BASE_ISA_SEEDS[i % len(BASE_ISA_SEEDS)] + seed_sum
isa_random_seeds.append(seed)
if (i + 1) % len(BASE_ISA_SEEDS) == 0:
seed_sum += 1
return isa_random_seeds
def _get_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("domain", help="domain whose tasks will be used (officeworld, craftworld, waterworld)")
parser.add_argument("algorithm", help="which algorithm to use with interleaved automata learning (qrm, hrl)")
parser.add_argument("num_runs", type=int, help="how many runs to create")
parser.add_argument("root_experiments_path", help="folder where the experiment folders are created")
parser.add_argument("experiment_folder_name", help="name of the experiment folder")
parser.add_argument("--maximum_episode_length", "-m", type=int, default=100, help="maximum episode length")
parser.add_argument("--num_tasks", "-t", type=int, default=1, help="number of MDPs used")
parser.add_argument("--seed", type=int, default=0, help="starting environment seed (the 't' tasks have starting from this value)")
parser.add_argument("--interleaved_learning", "-i", action="store_true", help="whether to learn automata while learning")
parser.add_argument("--use_restricted_observables", "-r", action="store_true", help="whether to only use the observables required by the task")
parser.add_argument("--max_disj_size", "-d", type=int, default=1, help="the maximum number of edges between two states")
parser.add_argument("--learn_acyclic", "-a", action="store_true", help="whether the learned automaton is enforced to be acyclic")
parser.add_argument("--symmetry_breaking_method", "-s", default=None, help="symmetry breaking method to use (bfs, bfs-alternative, increasing-path)")
parser.add_argument("--use_compressed_traces", "-c", action="store_true", help="whether to use compressed traces")
parser.add_argument("--ignore_empty_observations", "-e", action="store_true", help="whether to ignore empty observations")
parser.add_argument("--prioritize_optimal_solutions", "-p", action="store_true", help="whether to use additional criteria for ranking optimal solutions")
parser.add_argument("--rl_guidance_method", "-g", default=None, help="use method to guide the RL agent (qrm: max_distance, min_distance, hrl: pseudorewards)")
parser.add_argument("--avoid_learning_negative_only_formulas", "-n", action="store_true", help="whether to allow learning formulas formed only by negative literals")
parser.add_argument("--environments", nargs='+', default=None, help="list of environments of the specified domain")
parser.add_argument("--use_gpu", action="store_true", help="whether to use the gpu")
parser.add_argument("--timed", action="store_true", help="whether it is an experiment whose running time should be compared with others")
parser.add_argument("--multitask", action="store_true", help="whether the experiments are multitask")
return parser
if __name__ == "__main__":
args = _get_argparser().parse_args()
seeds = _generate_isa_random_seeds(args.num_runs)
root_experiments_path = os.path.abspath(args.root_experiments_path)
folder_name = os.path.join(root_experiments_path, args.experiment_folder_name)
for i in range(1, args.num_runs + 1):
experiment_directories = []
experiment_path = os.path.join(folder_name, "batch_%d" % i)
_create_experiments(args, experiment_path, seeds[i - 1], experiment_directories)