From 08e264a41aa52b172dcf8dc18a0acaa71e0b21d8 Mon Sep 17 00:00:00 2001 From: puyuan Date: Thu, 23 Jan 2025 18:39:00 +0800 Subject: [PATCH] polish(pu): polish dmc-pixel config --- .../config/dmc2gym_pixels_sez_config.py | 60 +++++++++++-------- .../config/dmc2gym_pixels_smz_config.py | 6 +- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/zoo/dmc2gym/config/dmc2gym_pixels_sez_config.py b/zoo/dmc2gym/config/dmc2gym_pixels_sez_config.py index 5a3e16e90..54757f7d3 100644 --- a/zoo/dmc2gym/config/dmc2gym_pixels_sez_config.py +++ b/zoo/dmc2gym/config/dmc2gym_pixels_sez_config.py @@ -3,6 +3,15 @@ # ============================================================== # begin of the most frequently changed config specified by the user # ============================================================== +from zoo.dmc2gym.config.dmc_state_env_space_map import dmc_state_env_action_space_map, dmc_state_env_obs_space_map + +env_id = 'cartpole-balance' # You can specify any DMC tasks here +action_space_size = dmc_state_env_action_space_map[env_id] +obs_space_size = dmc_state_env_obs_space_map[env_id] + +domain_name = env_id.split('-')[0] +task_name = env_id.split('-')[1] + collector_env_num = 8 n_episode = 8 evaluator_env_num = 3 @@ -10,39 +19,37 @@ K = 20 # num_of_sampled_actions num_simulations = 50 update_per_collect = None -replay_ratio = 0.25 +replay_ratio = 0.1 batch_size = 256 -max_env_step = int(5e6) -reanalyze_ratio = 0. +max_env_step = int(1e6) # ======== debug config ======== -collector_env_num = 2 -n_episode = 2 -evaluator_env_num = 2 -continuous_action_space = True -K = 2 # num_of_sampled_actions -num_simulations = 5 -replay_ratio = 0.05 -update_per_collect =2 -batch_size = 4 +# collector_env_num = 2 +# n_episode = 2 +# evaluator_env_num = 2 +# continuous_action_space = True +# K = 5 # num_of_sampled_actions +# num_simulations = 5 +# replay_ratio = 0.05 +# update_per_collect =2 +# batch_size = 4 # ============================================================== # end of the most frequently changed config specified by the user # ============================================================== dmc2gym_pixels_sampled_efficientzero_config = dict( - exp_name=f'data_sez_debug/dmc2gym_pixels_sampled_efficientzero_k{K}_ns{num_simulations}_upc{update_per_collect}-rr{replay_ratio}_rer{reanalyze_ratio}_seed0', + exp_name=f'data_sez/dmc2gym_pixels_sampled_efficientzero_k{K}_ns{num_simulations}_upc{update_per_collect}-rr{replay_ratio}_seed0', env=dict( env_id='dmc2gym-v0', - domain_name="cartpole", - task_name="swingup", + continuous=True, + domain_name=domain_name, + task_name=task_name, from_pixels=True, # pixel/image obs - frame_skip=8, + frame_skip=2, + frame_stack_num=3, warp_frame=True, scale=True, - frame_stack_num=3, channels_first=True, - stop_value=1e6, - continuous=True, collector_env_num=collector_env_num, evaluator_env_num=evaluator_env_num, n_evaluator_episode=evaluator_env_num, @@ -71,19 +78,21 @@ model_path=None, cuda=True, env_type='not_board_games', - game_segment_length=200, + game_segment_length=100, update_per_collect=update_per_collect, batch_size=batch_size, optim_type='AdamW', learning_rate=0.0001, - grad_clip_value=5, - policy_entropy_weight=5e-3, num_simulations=num_simulations, - reanalyze_ratio=reanalyze_ratio, + reanalyze_ratio=0, + policy_entropy_weight=5e-2, + grad_clip_value=5, + manual_temperature_decay=True, + threshold_training_steps_for_final_temperature=int(2.5e4), n_episode=n_episode, eval_freq=int(2e3), replay_ratio=replay_ratio, - replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions. + replay_buffer_size=int(1e5), collector_env_num=collector_env_num, evaluator_env_num=evaluator_env_num, ), @@ -96,8 +105,7 @@ type='dmc2gym_lightzero', import_names=['zoo.dmc2gym.envs.dmc2gym_lightzero_env'], ), - # env_manager=dict(type='subprocess'), - env_manager=dict(type='base'), + env_manager=dict(type='subprocess'), policy=dict( type='sampled_efficientzero', import_names=['lzero.policy.sampled_efficientzero'], diff --git a/zoo/dmc2gym/config/dmc2gym_pixels_smz_config.py b/zoo/dmc2gym/config/dmc2gym_pixels_smz_config.py index 3ffc0c69b..fd8ee5608 100644 --- a/zoo/dmc2gym/config/dmc2gym_pixels_smz_config.py +++ b/zoo/dmc2gym/config/dmc2gym_pixels_smz_config.py @@ -18,8 +18,8 @@ K = 20 # num_of_sampled_actions num_simulations = 50 update_per_collect = None -replay_ratio = 0.25 -batch_size = 64 +replay_ratio = 0.1 +batch_size = 256 max_env_step = int(1e6) norm_type = 'LN' seed = 0 @@ -46,7 +46,7 @@ domain_name=domain_name, task_name=task_name, from_pixels=True, # pixel/image obs - frame_skip=8, + frame_skip=2, frame_stack_num=3, warp_frame=True, scale=True,