Skip to content

Commit

Permalink
feature(zt): add metadrive-simulator env and related onppo config (#574)
Browse files Browse the repository at this point in the history
* embed metadrive into di-enginej

* adapt to the latest version of metadrive

* adapt to metadrive

* adapt metadrive feature

* change decision frequency

* fix some problems

* add show bird view observation

* add metadrive in readme.md
  • Loading branch information
timothijoe authored Feb 15, 2023
1 parent bd46c7d commit 0472a3a
Show file tree
Hide file tree
Showing 10 changed files with 815 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ P.S: The `.py` file in `Runnable Demo` can be found in `dizoo`
| 32 |[beergame](https://github.com/OptMLGroup/DeepBeerInventory-RL) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/beergame/beergame.png) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/beergame/envs)<br>环境指南 |
| 33 |[classic_control/acrobot](https://github.com/openai/gym/tree/master/gym/envs/classic_control) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/classic_control/acrobot/acrobot.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/classic_control/acrobot/envs)<br>环境指南 |
| 34 |[box2d/car_racing](https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) <br> ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/box2d/carracing/car_racing.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/box2d/carracing/envs)<br>环境指南 |
| 35 |[metadrive](https://github.com/metadriverse/metadrive) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/metadrive/metadrive_env.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/metadrive/env)<br>环境指南 |

![discrete](https://img.shields.io/badge/-discrete-brightgreen) means discrete action space

Expand Down
Empty file added dizoo/metadrive/__init__.py
Empty file.
Empty file.
111 changes: 111 additions & 0 deletions dizoo/metadrive/config/metadrive_onppo_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from easydict import EasyDict
from functools import partial
from tensorboardX import SummaryWriter
import metadrive
import gym
from ding.envs import BaseEnvManager, SyncSubprocessEnvManager
from ding.config import compile_config
from ding.model.template import QAC, VAC
from ding.policy import PPOPolicy
from ding.worker import SampleSerialCollector, InteractionSerialEvaluator, BaseLearner
from dizoo.metadrive.env.drive_env import MetaDrivePPOOriginEnv
from dizoo.metadrive.env.drive_wrapper import DriveEnvWrapper

metadrive_basic_config = dict(
exp_name='metadrive_onppo_seed0',
env=dict(
metadrive=dict(
use_render=False,
traffic_density=0.10, # Density of vehicles occupying the roads, range in [0,1]
map='XSOS', # Int or string: an easy way to fill map_config
horizon=4000, # Max step number
driving_reward=1.0, # Reward to encourage agent to move forward.
speed_reward=0.1, # Reward to encourage agent to drive at a high speed
use_lateral_reward=False, # reward for lane keeping
out_of_road_penalty=40.0, # Penalty to discourage driving out of road
crash_vehicle_penalty=40.0, # Penalty to discourage collision
decision_repeat=20, # Reciprocal of decision frequency
out_of_route_done=True, # Game over if driving out of road
),
manager=dict(
shared_memory=False,
max_retry=2,
context='spawn',
),
n_evaluator_episode=16,
stop_value=255,
collector_env_num=8,
evaluator_env_num=8,
),
policy=dict(
cuda=True,
action_space='continuous',
model=dict(
obs_shape=[5, 84, 84],
action_shape=2,
action_space='continuous',
bound_type='tanh',
encoder_hidden_size_list=[128, 128, 64],
),
learn=dict(
epoch_per_collect=10,
batch_size=64,
learning_rate=3e-4,
entropy_weight=0.001,
value_weight=0.5,
clip_ratio=0.02,
adv_norm=False,
value_norm=True,
grad_clip_value=10,
),
collect=dict(n_sample=3000, ),
eval=dict(evaluator=dict(eval_freq=1000, ), ),
),
)
main_config = EasyDict(metadrive_basic_config)


def wrapped_env(env_cfg, wrapper_cfg=None):
return DriveEnvWrapper(MetaDrivePPOOriginEnv(env_cfg), wrapper_cfg)


def main(cfg):
cfg = compile_config(
cfg, SyncSubprocessEnvManager, PPOPolicy, BaseLearner, SampleSerialCollector, InteractionSerialEvaluator
)
collector_env_num, evaluator_env_num = cfg.env.collector_env_num, cfg.env.evaluator_env_num
collector_env = SyncSubprocessEnvManager(
env_fn=[partial(wrapped_env, cfg.env.metadrive) for _ in range(collector_env_num)],
cfg=cfg.env.manager,
)
evaluator_env = SyncSubprocessEnvManager(
env_fn=[partial(wrapped_env, cfg.env.metadrive) for _ in range(evaluator_env_num)],
cfg=cfg.env.manager,
)
model = VAC(**cfg.policy.model)
policy = PPOPolicy(cfg.policy, model=model)
tb_logger = SummaryWriter('./log/{}/'.format(cfg.exp_name))
learner = BaseLearner(cfg.policy.learn.learner, policy.learn_mode, tb_logger, exp_name=cfg.exp_name)
collector = SampleSerialCollector(
cfg.policy.collect.collector, collector_env, policy.collect_mode, tb_logger, exp_name=cfg.exp_name
)
evaluator = InteractionSerialEvaluator(
cfg.policy.eval.evaluator, evaluator_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name
)
learner.call_hook('before_run')
while True:
if evaluator.should_eval(learner.train_iter):
stop, rate = evaluator.eval(learner.save_checkpoint, learner.train_iter, collector.envstep)
if stop:
break
# Sampling data from environments
new_data = collector.collect(cfg.policy.collect.n_sample, train_iter=learner.train_iter)
learner.train(new_data, collector.envstep)
learner.call_hook('after_run')
collector.close()
evaluator.close()
learner.close()


if __name__ == '__main__':
main(main_config)
96 changes: 96 additions & 0 deletions dizoo/metadrive/config/metadrive_onppo_eval_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from easydict import EasyDict
from functools import partial
from tensorboardX import SummaryWriter
import torch
from ding.envs import BaseEnvManager, SyncSubprocessEnvManager
from ding.config import compile_config
from ding.model.template import VAC
from ding.policy import PPOPolicy
from ding.worker import SampleSerialCollector, InteractionSerialEvaluator, BaseLearner
from dizoo.metadrive.env.drive_env import MetaDrivePPOOriginEnv
from dizoo.metadrive.env.drive_wrapper import DriveEnvWrapper

# Load the trained model from this direction, if None, it will initialize from scratch
model_dir = None
metadrive_basic_config = dict(
exp_name='metadrive_onppo_eval_seed0',
env=dict(
metadrive=dict(
use_render=True,
traffic_density=0.10, # Density of vehicles occupying the roads, range in [0,1]
map='XSOS', # Int or string: an easy way to fill map_config
horizon=4000, # Max step number
driving_reward=1.0, # Reward to encourage agent to move forward.
speed_reward=0.10, # Reward to encourage agent to drive at a high speed
use_lateral_reward=False, # reward for lane keeping
out_of_road_penalty=40.0, # Penalty to discourage driving out of road
crash_vehicle_penalty=40.0, # Penalty to discourage collision
decision_repeat=20, # Reciprocal of decision frequency
out_of_route_done=True, # Game over if driving out of road
show_bird_view=False, # Only used to evaluate, whether to draw five channels of bird-view image
),
manager=dict(
shared_memory=False,
max_retry=2,
context='spawn',
),
n_evaluator_episode=16,
stop_value=255,
collector_env_num=1,
evaluator_env_num=1,
),
policy=dict(
cuda=True,
action_space='continuous',
model=dict(
obs_shape=[5, 84, 84],
action_shape=2,
action_space='continuous',
bound_type='tanh',
encoder_hidden_size_list=[128, 128, 64],
),
learn=dict(
epoch_per_collect=10,
batch_size=64,
learning_rate=3e-4,
entropy_weight=0.001,
value_weight=0.5,
clip_ratio=0.02,
adv_norm=False,
value_norm=True,
grad_clip_value=10,
),
collect=dict(n_sample=1000, ),
eval=dict(evaluator=dict(eval_freq=1000, ), ),
),
)
main_config = EasyDict(metadrive_basic_config)


def wrapped_env(env_cfg, wrapper_cfg=None):
return DriveEnvWrapper(MetaDrivePPOOriginEnv(env_cfg), wrapper_cfg)


def main(cfg):
cfg = compile_config(cfg, BaseEnvManager, PPOPolicy, BaseLearner, SampleSerialCollector, InteractionSerialEvaluator)
evaluator_env_num = cfg.env.evaluator_env_num
show_bird_view = cfg.env.metadrive.show_bird_view
wrapper_cfg = {'show_bird_view': show_bird_view}
evaluator_env = BaseEnvManager(
env_fn=[partial(wrapped_env, cfg.env.metadrive, wrapper_cfg) for _ in range(evaluator_env_num)],
cfg=cfg.env.manager,
)
model = VAC(**cfg.policy.model)
policy = PPOPolicy(cfg.policy, model=model)
if model_dir is not None:
policy._load_state_dict_collect(torch.load(model_dir, map_location='cpu'))
tb_logger = SummaryWriter('./log/{}/'.format(cfg.exp_name))
evaluator = InteractionSerialEvaluator(
cfg.policy.eval.evaluator, evaluator_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name
)
stop, rate = evaluator.eval()
evaluator.close()


if __name__ == '__main__':
main(main_config)
Empty file added dizoo/metadrive/env/__init__.py
Empty file.
Loading

0 comments on commit 0472a3a

Please sign in to comment.