diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 77558a5..091d610 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -10,7 +10,7 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
@@ -20,19 +20,11 @@ jobs:
python -m pip install --upgrade pip
pip install setuptools wheel twine
- - name: Build and publish to Test PyPI
- env:
- TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }}
- run: |
- python setup.py sdist bdist_wheel
- twine check dist/*
- twine upload --repository testpypi dist/*
-
- name: Build and publish to PyPI
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
+ twine check dist/*
twine upload dist/*
diff --git a/.github/workflows/github-actions.yml b/.github/workflows/test-main.yml
similarity index 84%
rename from .github/workflows/github-actions.yml
rename to .github/workflows/test-main.yml
index 60588f1..ed37e39 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/test-main.yml
@@ -1,4 +1,4 @@
-name: CI
+name: Test
on:
push:
@@ -17,7 +17,7 @@ jobs:
steps:
- name: Checkout code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
@@ -26,7 +26,8 @@ jobs:
- name: Install dependencies
run: |
- pip install setuptools==65.5.0
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
pip install -e .[dev]
- name: Pylint
@@ -43,7 +44,7 @@ jobs:
- name: Pytest
run: |
- coverage run -m pytest tests/
+ coverage run --source voxelgym2D -m pytest tests/
coverage report
- name: Build docs
diff --git a/LICENSE b/LICENSE
index 72b2e54..7893339 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2023 Hari
+Copyright (c) 2023 Harisankar Babu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index f760489..5cb030f 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,44 @@
# voxelgym2D
+
+[![MIT License](https://img.shields.io/github/license/harisankar95/voxelgym2D)](LICENSE)
+[![PyPI](https://img.shields.io/pypi/v/voxelgym2D)](https://pypi.org/project/voxelgym2D/)
+[![Pipeline](https://github.com/harisankar95/voxelgym2D/actions/workflows/test-main.yml/badge.svg?branch=main)](https://github.com/harisankar95/voxelgym2D/actions/workflows/test-main.yml)
+[![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
A gym environment for voxel/grid based reinforcement learning for path planning.
## Results with [SB3](https://github.com/DLR-RM/stable-baselines3) (v1.6.2) : PPO :smile:
+
Here are the results of training a PPO agent on the `onestep-v0` using the example [here](examples/train_ppo.py). Below you will find the episode reward and episode length over steps during training. As the agent learns, the episode reward increases and the episode length reduces are the agent learns to identify the goal and reach it in the shortest possible path.
-
## Installation
+
```bash
pip install git+https://github.com/harisankar95/voxelgym2D.git
```
@@ -43,7 +50,9 @@ pip install voxelgym2D
```
## Development
+
To install the package in development mode, run the following command in the root directory of the repository:
+
```bash
git clone https://github.com/harisankar95/voxelgym2D.git ~/path/to/repo
cd ~/path/to/repo
@@ -54,41 +63,53 @@ pip install -e .[dev,sb3]
```
## Usage
+
```python
import voxelgym2D
-import gym
+import gymnasium as gym
env = gym.make("voxelgym2D:onestep-v0")
-env.reset()
+observation, info = env.reset(seed=123456)
env.render()
```
## Examples
+
The examples can be found [here](examples).
## License
+
This project is licensed under the terms of the [MIT license](LICENSE).
## Documentation
+
The documentation can be found [here](https://harisankar95.github.io/voxelgym2D/).
## Changelog
+
### 0.1.0
+
- Initial release of voxelgym 2D environments tested with stable_baselines 3 (v1.6.2) and python 3.8
+
### 0.2.0
+
- Available on PyPI, sphinx documentation in work
+### 0.3.0
+
+- Migration to gymnasium
+- Agent can now be reset to multiple start positions
+
## TODO
+
- [x] Add 2D environments
-- [ ] Test with gym 0.26.2
- [ ] Add documentation
-## Known issues
-- [ ] Currently only supports gym==0.21.0 :neutral_face:, hence setuptools==65.5.0 is required to install gym.
-
## Contributing
+
Contributions are welcome! Please open an issue or a pull request.
## References
+
- [OpenAI Gym](https://arxiv.org/abs/1606.01540)
- [Stable Baselines 3](http://jmlr.org/papers/v22/20-1364.html)
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
new file mode 100644
index 0000000..ff4a75e
--- /dev/null
+++ b/docs/INSTALL.md
@@ -0,0 +1,28 @@
+# Installation
+
+## PyPI
+
+Install the package from PyPI using pip:
+
+```bash
+pip install voxelgym2D
+```
+
+## GitHub
+
+```bash
+pip install git+https://github.com/harisankar95/voxelgym2D.git
+```
+
+## For development purpose use editable mode
+
+To install the package in development mode, run the following command in the root directory of the repository:
+
+```bash
+git clone https://github.com/harisankar95/voxelgym2D.git ~/path/to/repo
+cd ~/path/to/repo
+pip install -e .[dev]
+
+# to aditionally install stable_baselines 3 and pytorch (optional)
+pip install -e .[dev,sb3]
+```
diff --git a/docs/INTRO.md b/docs/INTRO.md
new file mode 100644
index 0000000..a1bc086
--- /dev/null
+++ b/docs/INTRO.md
@@ -0,0 +1,16 @@
+# Voxelgym2D
+
+A gym environment for voxel/grid based reinforcement learning for path planning.
+
+
+
+
+
+
+ |
+
+
+ |
+
+
+
diff --git a/docs/USAGE.md b/docs/USAGE.md
new file mode 100644
index 0000000..d05046b
--- /dev/null
+++ b/docs/USAGE.md
@@ -0,0 +1,21 @@
+# Examples
+
+For usage examples with detailed descriptions take a look at the [examples](https://github.com/harisankar95/voxelgym2D/tree/main/examples/) folder.
+
+## Basic usage
+
+```python
+import gymnasium as gym
+
+env = gym.make("voxelgym2D:onestep-v0")
+observation, info = env.reset(seed=123456)
+
+done = False
+while not done:
+ action = env.action_space.sample() # agent policy that uses the observation and info
+ observation, reward, terminated, truncated, info = env.step(action)
+
+ done = terminated or truncated
+ env.render()
+
+env.close()
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..ca6b99e
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,77 @@
+/* Based on Stable Baselines 3 theme
+* https://github.com/DLR-RM/stable-baselines3/
+* */
+:root {
+ --main-bg-color: #B6C8DB;
+ --link-color: #6DB59F;
+}
+
+/* Header fonts y */
+h1,
+h2,
+.rst-content .toctree-wrapper p.caption,
+h3,
+h4,
+h5,
+h6,
+legend,
+p.caption {
+ font-family: "Lato", "proxima-nova", "Helvetica Neue", Arial, sans-serif;
+}
+
+
+/* Docs background */
+.wy-side-nav-search {
+ background-color: var(--main-bg-color);
+}
+
+/* Mobile version */
+.wy-nav-top {
+ background-color: var(--main-bg-color);
+}
+
+/* Change link colors (except for the menu) */
+a {
+ color: var(--link-color);
+}
+
+a:hover {
+ color: #798EA9;
+}
+
+.wy-menu a {
+ color: #b3b3b3;
+}
+
+.wy-menu a:hover {
+ color: #b3b3b3;
+}
+
+a.icon.icon-home {
+ color: #b3b3b3;
+}
+
+.version {
+ color: var(--link-color) !important;
+}
+
+
+/* Make code blocks have a background */
+.codeblock,
+pre.literal-block,
+.rst-content .literal-block,
+.rst-content pre.literal-block,
+div[class^='highlight'] {
+ background: #FFFFFF;
+ ;
+}
+
+/* Change style of types in the docstrings .rst-content .field-list */
+.field-list .xref.py.docutils,
+.field-list code.docutils,
+.field-list .docutils.literal.notranslate {
+ border: None;
+ padding-left: 0;
+ padding-right: 0;
+ color: #404040;
+}
\ No newline at end of file
diff --git a/docs/_templates/versions.html b/docs/_templates/versions.html
new file mode 100644
index 0000000..c49f844
--- /dev/null
+++ b/docs/_templates/versions.html
@@ -0,0 +1,27 @@
+{%- if current_version %}
+
+
+ Other Versions
+ v: {{ current_version.name }}
+
+
+
+ {%- if versions.tags %}
+
+ - Tags
+ {%- for item in versions.tags %}
+ - {{ item.name }}
+ {%- endfor %}
+
+ {%- endif %}
+ {%- if versions.branches %}
+
+ - Branches
+ {%- for item in versions.branches %}
+ - {{ item.name }}
+ {%- endfor %}
+
+ {%- endif %}
+
+
+{%- endif %}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 5ae22e7..17dfafe 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -3,43 +3,209 @@
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
import os
import sys
-sys.path.insert(0, os.path.abspath('..'))
-# -- Project information -----------------------------------------------------
-# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+sys.path.insert(0, os.path.abspath(".."))
+
+# read the version from version.txt
+with open(os.path.join("../voxelgym2D", "version.txt"), encoding="utf-8") as file_handler:
+ __version__ = file_handler.read().strip()
+
+
+project = "Voxelgym2D"
+copyright = "2023, Harisankar Babu"
+author = "Harisankar Babu"
+release = __version__
+version = __version__
-project = 'Voxelgym2D'
-copyright = '2023, Harisankar Babu'
-author = 'Harisankar Babu'
-release = '0.1'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
- 'sphinx.ext.autodoc',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.napoleon',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.githubpages',
- 'sphinx.ext.inheritance_diagram',
- ]
+ "sphinx.ext.autodoc", # for autodoc
+ "sphinx.ext.ifconfig", # for if statements
+ "sphinx.ext.autosummary", # for autosummary
+ "sphinx.ext.doctest", # for doctest
+ "sphinx.ext.todo", # for todo list
+ "sphinx.ext.viewcode", # for source code
+ "sphinx.ext.napoleon", # for google style docstrings
+ "sphinx.ext.githubpages", # for github pages
+ "sphinx.ext.inheritance_diagram", # for inheritance diagrams
+ "sphinx.ext.graphviz", # for graphviz
+ "sphinx.ext.mathjax", # for math
+ "sphinx_autodoc_typehints", # for type hints
+ "sphinx_autodoc_annotation", # for annotations
+ "sphinx_copybutton", # for copy button
+ "sphinx-prompt", # for prompt
+ "notfound.extension", # for 404 page
+ "versionwarning.extension", # for version warning
+ "recommonmark", # for markdown
+ "nbsphinx", # for notebooks
+]
-templates_path = ['_templates']
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+templates_path = ["_templates"]
+html_sidebars = {
+ "**": [
+ "_templates/versions.html",
+ ],
+}
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+language = "en"
+
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
-html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_show_sourcelink = False
+html_show_sphinx = False
+html_copy_source = False
+html_show_copyright = True
+html_use_index = True
+# html
+html_theme_options = {
+ "canonical_url": "",
+ "display_version": True,
+ "prev_next_buttons_location": "bottom",
+ "style_external_links": True,
+ "style_nav_header_background": "white",
+ # Toc options
+ "collapse_navigation": False,
+ "sticky_navigation": True,
+ "navigation_depth": 4,
+ "includehidden": True,
+ "titles_only": False,
+}
+
+# generate autosummary even if no references
+autosummary_generate = True
+autosummary_imported_members = True
+
+# autodoc
+autodoc_mock_imports = []
+autodoc_typehints = "description"
+autodoc_inherit_docstrings = True
+autodoc_preserve_defaults = True
+autodoc_default_options = {
+ "members": True,
+ "member-order": "bysource",
+ "special-members": "__init__",
+ "undoc-members": True,
+ "private-members": True,
+ "exclude-members": "__weakref__",
+ "show-inheritance": True,
+ "inherited-members": True,
+ "ignore-module-all": True,
+}
+
+# coverage
+coverage_show_missing_items = True
+coverage_skip_undoc_in_source = True
+
+# syntax highlighting
+pygments_style = "sphinx"
+highlight_language = "python3"
+
+# napoleon
+napoleon_numpy_docstring = True
+
+# todo-section
+todo_include_todos = False
+
+# inheritance diagrams
+# smaller diagrams with rectangular nodes
+inheritance_graph_attrs = {
+ "rankdir": "TB",
+ "size": '"6.0, 8.0"',
+ "fontsize": 12,
+ "ratio": "compress",
+ "bgcolor": "transparent",
+}
+
+inheritance_node_attrs = {
+ "shape": "rect",
+ "fontsize": 12,
+ "color": "orange",
+ "style": "filled",
+ "fillcolor": "white",
+}
+
+inheritance_edge_attrs = {
+ "arrowsize": 0.5,
+ "penwidth": 1.0,
+ "color": "orange",
+}
+
+# graphviz
+graphviz_output_format = "svg"
+graphviz_dot_args = [
+ "-Gbgcolor=transparent",
+ "-Nfontname=Helvetica",
+ "-Efontname=Helvetica",
+ "-Gfontname=Helvetica",
+ "-Gfontsize=12",
+ "-Nfontsize=12",
+ "-Efontsize=12",
+]
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "Voxelgym2D-doc"
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements: dict = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ # 'papersize': 'letterpaper',
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files.
+latex_documents = [
+ (master_doc, "Voxelgym2D.tex", "Voxelgym2D Documentation", "Voxelgym2D Contributors", "manual"),
+]
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "Voxelgym2D", "Voxelgym2D Documentation", [author], 1)]
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files.
+texinfo_documents = [
+ (
+ master_doc,
+ "Voxelgym2D",
+ "Voxelgym2D Documentation",
+ author,
+ "Voxelgym2D",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
+]
diff --git a/docs/index.rst b/docs/index.rst
index d27606a..32607ea 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,9 +7,12 @@ Welcome to Voxelgym2D's documentation!
======================================
.. toctree::
- :maxdepth: 2
+ :maxdepth: 3
:caption: Contents:
+ Voxelgym2D <./INTRO.md>
+ Installation <./INSTALL.md>
+ Usage <./USAGE.md>
modules
Indices and tables
@@ -17,4 +20,3 @@ Indices and tables
* :ref:`genindex`
* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/voxelgym2D.envs.rst b/docs/voxelgym2D.envs.rst
index ba3e2b2..c6d7985 100644
--- a/docs/voxelgym2D.envs.rst
+++ b/docs/voxelgym2D.envs.rst
@@ -1,6 +1,11 @@
voxelgym2D.envs package
=======================
+.. automodule:: voxelgym2D.envs
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
Submodules
----------
@@ -19,11 +24,3 @@ voxelgym2D.envs.env\_one\_step module
:members:
:undoc-members:
:show-inheritance:
-
-Module contents
----------------
-
-.. automodule:: voxelgym2D.envs
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/voxelgym2D.rst b/docs/voxelgym2D.rst
index 412d1b3..8348128 100644
--- a/docs/voxelgym2D.rst
+++ b/docs/voxelgym2D.rst
@@ -1,6 +1,11 @@
voxelgym2D package
==================
+.. automodule:: voxelgym2D
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
Subpackages
-----------
@@ -8,12 +13,3 @@ Subpackages
:maxdepth: 4
voxelgym2D.envs
-
-Module contents
----------------
-
-.. automodule:: voxelgym2D
- :members:
- :undoc-members:
- :show-inheritance:
- :inherited-members:
diff --git a/examples/onestep.py b/examples/onestep.py
new file mode 100644
index 0000000..19d9f80
--- /dev/null
+++ b/examples/onestep.py
@@ -0,0 +1,14 @@
+import gymnasium as gym
+
+env = gym.make("voxelgym2D:onestep-v0")
+observation, info = env.reset(seed=123456)
+
+done = False
+while not done:
+ action = env.action_space.sample() # agent policy that uses the observation and info
+ observation, reward, terminated, truncated, info = env.step(action)
+
+ done = terminated or truncated
+ env.render()
+
+env.close()
diff --git a/examples/train_ppo.py b/examples/train_ppo.py
index 56759bb..a82bde1 100644
--- a/examples/train_ppo.py
+++ b/examples/train_ppo.py
@@ -2,11 +2,11 @@
import os
from typing import Callable
-import gym
+import gymnasium as gym
import numpy as np
import torch
from stable_baselines3 import PPO
-from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
@@ -14,9 +14,6 @@
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from torch import nn
-from tqdm.auto import tqdm
-
-import voxelgym2D
# Create log dir
LOG_DIR = "./logs/ppo_onestep/"
@@ -61,7 +58,6 @@ class SaveOnBestTrainingRewardCallback(BaseCallback):
"""
Callback for saving a model (the check is done every ``check_freq`` steps)
based on the training reward (in practice, we recommend using ``EvalCallback``).
-
:param check_freq: (int)
:param log_dir: (str) Path to the folder where the model will be saved.
It must contains the file created by the ``Monitor`` wrapper.
@@ -78,10 +74,8 @@ def __init__(self, check_freq, log_dir, verbose=1):
def _init_callback(self) -> None:
# Create folder if needed
- if self.save_path is not None:
- os.makedirs(self.save_path, exist_ok=True)
- if self.chckpoint_path is not None:
- os.makedirs(self.chckpoint_path, exist_ok=True)
+ if self.log_dir is not None:
+ os.makedirs(self.log_dir, exist_ok=True)
def _on_step(self) -> bool:
if self.n_calls % self.check_freq == 0:
@@ -89,15 +83,15 @@ def _on_step(self) -> bool:
x, y = ts2xy(load_results(self.log_dir), "timesteps")
if len(x) > 0:
# Mean training reward over the last 100 episodes
- mean_reward = np.mean(y[-100:])
+ _mean_reward = np.mean(y[-100:])
if self.verbose > 0:
print(f"Num timesteps: {self.num_timesteps}")
print(f"Best mean reward: {self.best_mean_reward:.2f}")
- print(f"Last mean reward per episode: {mean_reward:.2f}")
+ print(f"Last mean reward per episode: {_mean_reward:.2f}")
# New best model, you could save the agent here
- if mean_reward > self.best_mean_reward:
- self.best_mean_reward = mean_reward
+ if _mean_reward > self.best_mean_reward:
+ self.best_mean_reward = _mean_reward
# Example for saving best model
if self.verbose > 0:
print(f"Saving new best model at {x[-1]} timesteps")
@@ -112,44 +106,10 @@ def _on_step(self) -> bool:
return True
-class ProgressBarCallback(BaseCallback):
- """
- :param pbar: (tqdm.pbar) Progress bar object
- """
-
- def __init__(self, pbar):
- super().__init__()
- self._pbar = pbar
-
- def _on_step(self):
- # Update the progress bar:
- self._pbar.n = self.num_timesteps
- self._pbar.update(0)
-
-
-# this callback uses the 'with' block, allowing for correct initialisation and destruction
-class ProgressBarManager:
- """For tqdm progress bar in a with block."""
-
- def __init__(self, total_timesteps): # init object with total timesteps
- self.pbar = None
- self.total_timesteps = total_timesteps
-
- def __enter__(self): # create the progress bar and callback, return the callback
- self.pbar = tqdm(total=self.total_timesteps)
- return ProgressBarCallback(self.pbar)
-
- def __exit__(self, exc_type, exc_val, exc_tb): # close the callback
- self.pbar.n = self.total_timesteps
- self.pbar.update(0)
- self.pbar.close()
-
-
# scheduler
def linear_schedule(initial_value: float) -> Callable[[float], float]:
"""
Linear learning rate schedule.
-
:param initial_value: Initial learning rate.
:return: schedule that computes
current learning rate depending on remaining progress
@@ -158,7 +118,6 @@ def linear_schedule(initial_value: float) -> Callable[[float], float]:
def func(progress_remaining: float) -> float:
"""
Progress will decrease from 1 (beginning) to 0.
-
:param progress_remaining:
:return: current learning rate
"""
@@ -176,15 +135,13 @@ def func(progress_remaining: float) -> float:
seed=1327455,
monitor_dir=LOG_DIR,
env_kwargs={
- "mapfile": "200x200x200_dense.npy",
+ "mapfile": "600x600.npy",
"view_size": 21,
+ "image_size": 42,
"max_collisions": 0,
"max_steps": 60,
- "show_path": True,
"discrete_actions": True,
- "multi_output": False,
- "partial_reward": True,
- "image_size": 42,
+ "render_mode": "None",
},
vec_env_cls=SubprocVecEnv,
)
@@ -209,33 +166,41 @@ def func(progress_remaining: float) -> float:
ent_coef=0.01,
vf_coef=0.5,
verbose=1,
- tensorboard_log="tb_logs/ppo_onestep",
+ tensorboard_log="tb_logs/ppo_onestep/",
target_kl=0.4,
)
+ # Create eval env
eval_env = DummyVecEnv(
[
lambda: Monitor(
gym.make(
"voxelgym2D:onestep-v0",
- mapfile="200x200x200_dense.npy",
+ mapfile="600x600.npy",
view_size=21,
+ image_size=42,
max_collisions=0,
max_steps=60,
- show_path=True,
discrete_actions=True,
- multi_output=False,
- partial_reward=True,
- test_mode=True,
- image_size=42,
+ render_mode="None",
),
filename=os.path.join(LOG_DIR, "eval"),
)
]
)
- # n_eval_episodes = 50 since soft_reset_freq in base_env is 50
- mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
+ # Use deterministic actions for evaluation
+ eval_callback = EvalCallback(
+ eval_env,
+ best_model_save_path=None,
+ log_path=os.path.join(LOG_DIR, "eval"),
+ eval_freq=10000,
+ n_eval_episodes=50,
+ deterministic=True,
+ render=False,
+ )
+
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50, warn=False)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
# Create Callback
@@ -243,17 +208,13 @@ def func(progress_remaining: float) -> float:
TOTAL_TIME_STEPS = 10000000
- with ProgressBarManager(TOTAL_TIME_STEPS) as progress_callback:
- # This is equivalent to callback=CallbackList([progress_callback, auto_save_callback])
- model.learn(
- total_timesteps=TOTAL_TIME_STEPS,
- eval_env=eval_env,
- n_eval_episodes=50,
- eval_freq=10000,
- callback=[progress_callback, auto_save_callback],
- )
+ model.learn(
+ total_timesteps=TOTAL_TIME_STEPS,
+ callback=[auto_save_callback, eval_callback],
+ progress_bar=True,
+ )
model.save(os.path.join(LOG_DIR, "ppo_saved"))
- mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50, warn=False)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
diff --git a/setup.cfg b/setup.cfg
index 53d2dd6..e76d18d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,6 +20,11 @@ source = voxelgym2D
[coverage:report]
show_missing = True
+exclude_also =
+ def __repr__
+ def __str__
+ def __lt__
+ def __eq__
[pylint]
disable = missing-docstring,
@@ -32,7 +37,7 @@ disable = missing-docstring,
too-many-arguments,
too-many-branches,
# many functions will naturally have unused arguments.
- unused-argument
+ unused-argument,
[pylint.FORMAT]
max-line-length = 120
diff --git a/setup.py b/setup.py
index b25b624..1ddcd93 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,18 @@
+import os
+
from setuptools import find_packages, setup
+# read the version from version.txt
+with open(os.path.join("voxelgym2D", "version.txt"), encoding="utf-8") as file_handler:
+ __version__ = file_handler.read().strip()
+
setup(
name="voxelgym2D",
- version="0.2",
+ version=__version__,
description="Gym environment for 2D grid path planning",
author="Harisankar Babu",
author_email="harisankar995@gmail.com",
- keywords="reinforcement-learning machine-learning gym openai python data-science",
+ keywords=["reinforcement-learning", "machine-learning", "gym", "openai", "python", "gymnasium"],
license="MIT",
url="https://github.com/harisankar95/voxelgym2D.git",
classifiers=[
@@ -18,17 +24,16 @@
"Programming Language :: Python :: 3.8",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
- packages=find_packages(),
+ packages=[package for package in find_packages() if package.startswith("voxelgym2D")],
package_data={
- "voxelgym2D": ["envs/maps/*.npy"],
+ "voxelgym2D": ["envs/maps/*.npy", "version.txt"],
},
install_requires=[
- # sb3 support for gym 0.21
- "gym==0.21",
+ "gymnasium",
"numpy",
"scikit-image",
"opencv-python",
- "pathfinding==1.0.1",
+ "pathfinding>=1.0.4",
# rendering
"matplotlib",
],
@@ -43,11 +48,16 @@
"tox",
"sphinx",
"sphinx_rtd_theme",
+ "recommonmark",
+ "nbsphinx",
+ "sphinx-autodoc-typehints",
+ "sphinx-copybutton",
+ "sphinx-prompt",
+ "sphinx-notfound-page",
+ "sphinx-version-warning",
+ "sphinx-autodoc-annotation",
],
- "sb3": [
- "stable-baselines3[extra]==1.6.2",
- "sb3-contrib==1.6.2",
- ],
+ "sb3": ["stable-baselines3[extra]>=2.0.0", "sb3-contrib>=2.0.0", "rl_zoo3>=2.0.0"],
},
- python_requires=">=3.7",
+ python_requires=">=3.8",
)
diff --git a/tests/test_env.py b/tests/test_env.py
index b496e1b..5f5164d 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -1,19 +1,17 @@
"""Test the environment."""
-import gym
+import gymnasium as gym
import numpy as np
-import pytest
-from gym.utils.env_checker import check_env
+from gymnasium.utils.env_checker import check_env
-import voxelgym2D
+from voxelgym2D.envs import VoxelGymOneStep
def test_onsestep():
"""test onestep env"""
env = gym.make("voxelgym2D:onestep-v0")
- check_env(env)
- _ = env.reset()
- _, i = env.reset(return_info=True)
+ check_env(env.unwrapped, skip_render_check=True)
+ _, i = env.reset(seed=1234)
# assert i is a dict
assert isinstance(i, dict)
@@ -22,7 +20,19 @@ def test_onsestep():
env = gym.make(
"voxelgym2D:onestep-v0",
discrete_actions=False,
- inference_mode=True,
- multi_output=True,
)
- check_env(env)
+ check_env(env.unwrapped, skip_render_check=True)
+
+
+def test_action_to_bins():
+ """Test the action to bins function"""
+ assert VoxelGymOneStep.action_to_bins(np.array([-1])) == 0
+ assert VoxelGymOneStep.action_to_bins(np.array([-0.75])) == 1
+ assert VoxelGymOneStep.action_to_bins(np.array([-0.5])) == 2
+ assert VoxelGymOneStep.action_to_bins(np.array([-0.25])) == 3
+ assert VoxelGymOneStep.action_to_bins(np.array([0])) == 4
+
+ assert VoxelGymOneStep.action_to_bins(np.array([0.25])) == 5
+ assert VoxelGymOneStep.action_to_bins(np.array([0.5])) == 6
+ assert VoxelGymOneStep.action_to_bins(np.array([0.75])) == 7
+ assert VoxelGymOneStep.action_to_bins(np.array([1])) == 7
diff --git a/tox.ini b/tox.ini
index bf296a4..ebb3a90 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,21 +1,26 @@
[tox]
-requires = tox>=4.0.0
-envlist = py38, linter, formatter
+requires = tox>=4.5.0
+envlist = clean, py38, linter, formatter
[testenv]
description = Run tests with pytest under {basepython}
+use_develop = true
deps =
- setuptools==65.5.0
coverage
pytest
+ pytest-cov
commands =
- coverage run -m pytest tests/
- coverage report
+ pytest --cov-report term-missing --cov-config=setup.cfg --cov=voxelgym2D --cov-append tests/
+
+[testenv:clean]
+deps = coverage
+skip_install = true
+commands = coverage erase
[testenv:linter]
description = Run pylint to check code quality and mypy to check type hints
+use_develop = true
deps =
- setuptools==65.5.0
pylint
mypy
commands =
diff --git a/voxelgym2D/__init__.py b/voxelgym2D/__init__.py
index 9722efe..a418bf5 100644
--- a/voxelgym2D/__init__.py
+++ b/voxelgym2D/__init__.py
@@ -1,6 +1,7 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
register(
id="onestep-v0",
entry_point="voxelgym2D.envs:VoxelGymOneStep",
+ nondeterministic=True,
)
diff --git a/voxelgym2D/envs/base_env.py b/voxelgym2D/envs/base_env.py
index 02be648..4109d07 100644
--- a/voxelgym2D/envs/base_env.py
+++ b/voxelgym2D/envs/base_env.py
@@ -1,82 +1,140 @@
"""Base class for all environments"""
-import logging
import math
import os
+import sys
from collections import OrderedDict
from itertools import product
-from typing import Dict, List, Optional, Tuple, Union
+from logging import Formatter, Logger, LogRecord, StreamHandler
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import cv2
-import gym
+import gymnasium as gym
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
-from gym.utils import seeding
+from gymnasium.utils import seeding
from pathfinding.core.diagonal_movement import DiagonalMovement
from pathfinding.core.grid import Grid
+from pathfinding.core.node import GridNode
from pathfinding.finder.a_star import AStarFinder
from skimage import draw
-# based on https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
-class CustomFormatter(logging.Formatter):
- """Colors for different log levels."""
+class CustomFormatter(Formatter):
+ """Custom formatter with colors for different log levels."""
- grey = "\x1b[38;20m"
- yellow = "\x1b[33;20m"
- red = "\x1b[31;20m"
- bold_red = "\x1b[31;1m"
+ _format = "%(asctime)s - %(levelname)s - %(message)s - %(filename)s:%(lineno)d"
+ # different color for different log level
+ # https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
+ white = "\x1b[37;1m"
+ green = "\x1b[32;1m"
+ yellow = "\x1b[33;1m"
+ purple = "\x1b[35;1m"
+ red = "\x1b[31;1m"
reset = "\x1b[0m"
- format_ = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
-
- FORMATS = {
- logging.DEBUG: grey + format_ + reset,
- logging.INFO: grey + format_ + reset,
- logging.WARNING: yellow + format_ + reset,
- logging.ERROR: red + format_ + reset,
- logging.CRITICAL: bold_red + format_ + reset,
+ COLORED_FORMATS = {
+ "DEBUG": white + _format + reset,
+ "INFO": green + _format + reset,
+ "WARNING": yellow + _format + reset,
+ "ERROR": purple + _format + reset,
+ "CRITICAL": red + _format + reset,
}
- def format(self, record):
- log_fmt = self.FORMATS.get(record.levelno)
- formatter_ = logging.Formatter(log_fmt, datefmt="%H:%M:%S")
- return formatter_.format(record)
+ def format(self, record: LogRecord) -> str:
+ """
+ Format the log record.
+ Parameters
+ ----------
+ record : LogRecord
+ Log record
+
+ Returns
+ -------
+ str
+ Formatted log record
+ """
+ log_fmt = self.COLORED_FORMATS.get(record.levelname)
+ formatter = Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
+ return formatter.format(record)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-formatter = CustomFormatter()
-stream_handler = logging.StreamHandler()
-stream_handler.setFormatter(formatter)
-logger.addHandler(stream_handler)
+
+class CustomLogger:
+ """Logger class."""
+
+ def __init__(self, name: str, log_level: str = "ERROR"):
+ """
+ Initialize the logger.
+
+ Parameters
+ ----------
+ name : str
+ Name of the logger.
+ log_level : str, optional
+ Log level, by default "INFO"
+ """
+ if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
+ raise ValueError(f"Invalid log level: {log_level}")
+ self.log_level = log_level
+ self.name = name
+ self._logger: Logger
+ self._setup_logger()
+
+ def _setup_logger(self):
+ """Setup the logger."""
+ self._logger = Logger(self.name)
+ self._logger.setLevel(self.log_level)
+ formatter = CustomFormatter()
+
+ # log to stdout
+ handler = StreamHandler(sys.stdout)
+ handler.setLevel(self.log_level)
+ handler.setFormatter(formatter)
+ self._logger.addHandler(handler)
+
+ def get_logger(self) -> Logger:
+ """
+ Get the logger
+
+ Returns
+ -------
+ logging.Logger
+ Logger
+ """
+ return self._logger
class BaseEnv(gym.Env):
"""Base class for all environments"""
- metadata = {"render.modes": ["human"]}
+ metadata: Dict[str, Any] = {"render_modes": ["None"], "render_fps": 1}
def __init__(
self,
- mapfile: str = "200x200x200_dense.npy",
+ render_mode: Optional[str] = None,
+ mapfile: str = "600x600.npy",
view_size: int = 21,
+ image_size: int = 42,
max_collisions: int = 0,
max_steps: int = 60,
show_path: bool = True,
multi_output: bool = False,
partial_reward: bool = True,
- image_size: int = 42,
- test_mode: bool = False,
inference_mode: bool = False,
+ log_level: str = "ERROR",
):
"""
Parameters
----------
+ render_mode : Optional[str], optional
+ render mode, by default None
mapfile : str
name of the map file in the maps folder
view_size : int
size of the view window for observation
+ image_size : int
+ size of the image to be returned as observation
max_collisions : int
maximum number of collisions allowed before episode ends
max_steps : int
@@ -87,26 +145,33 @@ def __init__(
whether to add additional outputs in the observation
partial_reward : bool
whether to give rewards for each step
- image_size : int
- size of the image to be returned as observation
- test_mode : bool
- whether to run in test mode, for evaluation during training
inference_mode : bool
whether to run in inference mode
+ log_level : str, optional
+ log level, by default "ERROR". One of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
"""
+ self.render_mode = "None" if render_mode is None else render_mode
+ if self.render_mode not in self.metadata["render_modes"]:
+ raise ValueError(f"Invalid render_mode: {self.render_mode}")
- super().__init__()
# current file path
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
# load map
self.grid_map = np.load(os.path.join(__location__, "maps", mapfile))
+ self.MAX_VAL_UINT = int(0.9 * np.iinfo(np.uint32).max) # 0.9 to avoid overflow
+
+ # an arry to keep track of the start and target location
+ self.start_locations, self.target_locations = self._start_end_counts()
self.world_size = self.grid_map.shape
- if view_size < 10:
- logger.warning("view_size should be at least 10, setting to 10")
- self.view_size = 10
+ # initialize the logger
+ self.logger = CustomLogger(self.__class__.__name__, log_level=log_level).get_logger()
+
+ if view_size < 16:
+ self.logger.warning("view_size should be at least 16, setting to 16")
+ self.view_size = 16
elif view_size > np.amin(self.world_size):
- logger.warning(
+ self.logger.warning(
"view_size should be less than half of the world size, setting to %s",
int((np.amin(self.world_size) - 2) / 2),
)
@@ -116,42 +181,73 @@ def __init__(
view_ratio = image_size / view_size # should be even
if view_ratio % 2 != 0:
- logger.warning("view_ratio should be even, setting to %s", (int(view_ratio) + 1))
+ self.logger.warning("view_ratio should be even, setting to %s", (int(view_ratio) + 1))
self.image_size = int(view_ratio + 1) * view_size
else:
self.image_size = image_size
# set the agent to middle of the world map
self._new_world = np.ones((int(2 * self.view_size), int(2 * self.view_size)))
+ # place holder for target locations for the sliced view (_new_world)
+ self._new_target_locations = np.ones((int(2 * self.view_size), int(2 * self.view_size)), dtype=np.uint32)
self._new_world_center: np.ndarray = np.array([self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32)
self._next_new_world_center: np.ndarray = np.array(
[self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32
)
+ # placeholder for mapping function
+ self._mapping = lambda x, y: (x, y)
+ # create a list of locations where the agent can be placed
+ # 25 possible locations for agent
+ factor = 5
+ self.possible_start_locations = np.array(
+ list(
+ product(
+ np.linspace(
+ start=2 * self.view_size / factor,
+ stop=2 * self.view_size,
+ num=factor,
+ endpoint=False,
+ dtype=int,
+ ),
+ np.linspace(
+ start=2 * self.view_size / factor,
+ stop=2 * self.view_size,
+ num=factor,
+ endpoint=False,
+ dtype=int,
+ ),
+ )
+ )
+ )
+
self._agent_location: np.ndarray = np.array([self.view_size, self.view_size], dtype=np.int32)
self._target_location: np.ndarray = np.array([self.view_size, self.view_size], dtype=np.int32)
+ self.ini_distance_to_target = 0.0
self.min_distance_to_target = 0.0
self.cost_astar = 0.0
+ self.ini_cost_astar = 0.0
self.min_cost_astar = 0.0
self.astar_runs = 0
self._path: List = []
+ self.ini_astarPath: List = []
self.astarPath: List = []
+ self._astar_grid = Grid()
self.action = [-1, -1]
self.action_cost = 0.0
- self.obs_world = np.zeros((3, self.image_size, self.image_size), dtype=np.uint8)
+ self.obs_world = np.zeros((3, self.image_size, self.image_size))
+ self.obs_world_astar = np.zeros((3, self.image_size, self.image_size), dtype=np.uint8)
self.base_obs_world = np.zeros((3, self.image_size, self.image_size), dtype=np.float32)
self.num_targets_reached = 0
self.current_step = 0
self.num_collisions = 0
- self.test_mode = test_mode
self.inference_mode = inference_mode
- # set back the agent to middle of the world map after these many targets
- self.soft_reset_freq = 1500
- if self.test_mode:
- self.soft_reset_freq = 50
+ # set back the agent to a new location than the previous
+ # target location after these many episodes
+ self.soft_reset_freq = 10
- self.target_num = -1
+ self.target_num = 0
self.max_collisions = max_collisions
self.max_steps = max_steps
self.show_path = show_path
@@ -161,14 +257,15 @@ def __init__(
self.delta = list(product([-1, 0, 1], repeat=2))
self.delta.remove((0, 0))
+ # random number generator for shuffling the possible target locations
+ seed_seq = np.random.SeedSequence()
+ self.random_gen: np.random.Generator = np.random.Generator(np.random.PCG64(seed_seq))
+
# math constants
self.SQRT2 = math.sqrt(2)
self.EXP1 = math.exp(1)
self.EXP1_1 = self.EXP1 - 1
- self.seed()
- self.action_space = None
-
self.multi_output = multi_output
if self.multi_output:
self.observation_space = gym.spaces.Dict(
@@ -185,41 +282,78 @@ def __init__(
low=0, high=255, shape=(3, self.image_size, self.image_size), dtype=np.uint8
)
- def get_logger(self) -> logging.Logger:
+ def get_logger(self) -> Logger:
"""
Returns the logger
Returns
-------
- logger : logging.Logger
+ logger : Logger
logger object
"""
- return logger
+ return self.logger
- def seed(self, seed: Optional[int] = None) -> List[Optional[int]]:
+ @staticmethod
+ def find_obstacle_neighbor_count(grid_map: np.ndarray) -> np.ndarray:
"""
- Sets the seed for this env's random number generator(s).
+ Finds the number of neighboring obstacles for each cell in the grid map
Parameters
----------
- seed : int, optional
- Seed for the random number generator(s), by default None
+ grid_map : np.ndarray
+ grid map with obstacles marked as 1s and free cells marked as 0s
+
+ Returns
+ -------
+ neighbors : np.ndarray
+ number of neighboring obstacles for each cell in the grid map
+ """
+ # add a border of 1s around the grid map
+ padded_grid_map = np.pad(grid_map, pad_width=1, mode="constant", constant_values=0)
+ # get the neighbors of all cells
+ neighbors = (
+ padded_grid_map[:-2, :-2]
+ + padded_grid_map[:-2, 1:-1]
+ + padded_grid_map[:-2, 2:]
+ + padded_grid_map[1:-1, :-2]
+ + padded_grid_map[1:-1, 2:]
+ + padded_grid_map[2:, :-2]
+ + padded_grid_map[2:, 1:-1]
+ + padded_grid_map[2:, 2:]
+ )
+ # return the count of neighboring obstacles
+ return neighbors
+
+ def _start_end_counts(self) -> Tuple[np.ndarray, np.ndarray]:
+ """
+ Create arrays to keep track of the start and end cell counts
Returns
-------
- Returns the list of seed used in this env's random number generators
+ start_counts : np.ndarray
+ shape like self.grid_map with the count of start cells
+ end_counts : np.ndarray
+ shape like self.grid_map with the count of end cells
"""
- self.np_random, seed = seeding.np_random(seed)
- return [seed]
+ neighbors = self.find_obstacle_neighbor_count(self.grid_map)
- def make_astar_matrix(self) -> None:
+ # find all cells with value 0 and without any neighbors with value > 0
+ possible_start_cells = (np.logical_and(self.grid_map == 0, neighbors == 0)).astype(np.uint32)
+ # change all cells with value 0 to max value of int as they are not possible start cells
+ possible_start_cells[possible_start_cells == 0] = self.MAX_VAL_UINT
+ # end cells are same as possible start cells
+ possible_end_cells = np.copy(possible_start_cells)
+ return possible_start_cells, possible_end_cells
+
+ def _make_astar_matrix(self) -> None:
"""
Creates the astar matrix for the current world map and sets the astar grid
"""
- _astar_matrix = np.abs(1.0 - self._new_world).astype(np.int32)
+ # set the astar matrix to 1 for all cells with value 0 in the world map and 0 otherwise
+ _astar_matrix = np.abs(1.0 - self._new_world)
self._astar_grid = Grid(matrix=_astar_matrix.tolist())
- def run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, int]:
+ def _run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, int]:
"""
Runs the A* algorithm on the current world map and returns the path,
path cost and number of nodes visited
@@ -247,21 +381,44 @@ def run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, i
finder = AStarFinder(diagonal_movement=DiagonalMovement.always)
else:
finder = AStarFinder(diagonal_movement=DiagonalMovement.only_when_no_obstacle)
- path, runs = finder.find_path(start, end, self._astar_grid)
- if len(path) > 0:
- path_cost = end.g
- else:
- path_cost = np.inf
+ path_w_node, runs = finder.find_path(start, end, self._astar_grid)
+ path_cost = np.inf
+ path = []
+ if len(path_w_node) > 0:
+ for node in path_w_node:
+ is_gn = isinstance(node, GridNode)
+ x, y = (node.x, node.y) if is_gn else node[:2]
+ path.append((x, y))
+
+ path_cost = 0.0
+ for dx in np.array(path[1:]) - np.array(path[:-1]):
+ path_cost += np.sqrt(np.sum(dx**2))
+
return path, path_cost, runs
- def slice_grid_map(self) -> None:
- """
- Slices the grid map into a 2D numpy array
- of size (2*view_size, 2*view_size)
+ def _slice_grid_map(self) -> Tuple[Callable, Union[np.ndarray, None]]:
"""
+ Slices the grid map into a 2D numpy array of size (2*view_size, 2*view_size)
+ Generate a mapping from the sliced grid map to the original grid map
+ Returns
+ -------
+ mapping : Callable(int, int)
+ mapping from the sliced grid map to the original grid map
+ potential_start_location : Union[np.ndarray, None]
+ potential start location for the agent
+ """
# slice the grid map into a 2D numpy array
self._new_world = np.ones((int(2 * self.view_size), int(2 * self.view_size)))
+
+ # new taget locations is of shape (2*view_size, 2*view_size) with all values
+ # as self.MAX_VAL_UINT and dtype as np.uint32
+ self._new_target_locations = np.full(
+ shape=(int(2 * self.view_size), int(2 * self.view_size)),
+ fill_value=self.MAX_VAL_UINT,
+ dtype=np.uint32,
+ )
+
y_min = int(max(self._new_world_center[0] - self.view_size, 0))
y_max = int(min(self._new_world_center[0] + self.view_size, self.world_size[0]))
x_min = int(max(self._new_world_center[1] - self.view_size, 0))
@@ -273,13 +430,46 @@ def slice_grid_map(self) -> None:
x_max_new = int(self.view_size + x_max - self._new_world_center[1])
self._new_world[y_min_new:y_max_new, x_min_new:x_max_new] = self.grid_map[y_min:y_max, x_min:x_max]
+ self._new_target_locations[y_min_new:y_max_new, x_min_new:x_max_new] = self.target_locations[
+ y_min:y_max, x_min:x_max
+ ]
# set the edges to 1
self._new_world[0, :-1] = self._new_world[:-1, -1] = self._new_world[-1, 1:] = self._new_world[1:, 0] = 1
- # create the new astar grid
- self.make_astar_matrix()
- def find_target(self) -> np.ndarray:
+ # set the edges to self.MAX_VAL_UINT in the new target locations
+ self._new_target_locations[0, :-1] = self._new_target_locations[:-1, -1] = self._new_target_locations[
+ -1, 1:
+ ] = self._new_target_locations[1:, 0] = self.MAX_VAL_UINT
+ # create the new astar grid
+ self._make_astar_matrix()
+
+ # define the mapping from the sliced grid map to the original grid map
+ def _mapping(x: int, y: int) -> Tuple[int, int]:
+ return x + x_min - x_min_new, y + y_min - y_min_new
+
+ # find the start location in the sliced grid map
+ # shuffle the possible_start_locations
+ self.np_random.shuffle(self.possible_start_locations)
+ # iterate over possible_start_locations
+ potential_start_location = None
+ for start_location in self.possible_start_locations:
+ # check if the start location along with spacing = 1 is free of obstacles
+ spacing = 1
+ if np.all(
+ self._new_world[
+ start_location[1] - spacing : start_location[1] + (spacing + 1),
+ start_location[0] - spacing : start_location[0] + (spacing + 1),
+ ]
+ == 0
+ ):
+ # if free of obstacles, set the agent location to the start location
+ potential_start_location = start_location
+ break
+
+ return _mapping, potential_start_location
+
+ def _find_target(self) -> np.ndarray:
"""
Finds a target location for the agent to move to
@@ -287,72 +477,62 @@ def find_target(self) -> np.ndarray:
-------
target_location : np.ndarray
target location
+
+ Raises
+ ------
+ RuntimeError
+ If a target location cannot be found
"""
# 10% of the time find easy target (close to agent)
- easy_target = True if (self.np_random.rand() < 0.10) else False
- colliding = True
+ easy_target = self.np_random.random() < 0.10
+ # if not easy path, we don't want to sample within 8 cells of the agent
+ # increase the count of cells in self._new_target_locations to self.MAX_VAL_UINT
+ # for cells within 8 cells of the agent else 2 to have nothing close to the agent
+ spread = 2 if easy_target else 8
count = 0
- while colliding:
- nearby_clear = True
- if not easy_target:
- straight_path = True
- target_location = self._agent_location
- while np.array_equal(target_location, self._agent_location):
+ while True:
+ self._new_target_locations[
+ self._agent_location[0] - spread : self._agent_location[0] + (spread + 1),
+ self._agent_location[1] - spread : self._agent_location[1] + (spread + 1),
+ ] = self.MAX_VAL_UINT
+ # get a list of all the target locations with the count less than self.MAX_VAL_UINT
+ possible_target_locations = np.argwhere(self._new_target_locations < self.MAX_VAL_UINT)[:, ::-1]
+ # shuffle the possible_target_locations
+ self.random_gen.shuffle(possible_target_locations)
+ # iterate while possible_target_locations
+ for target_location in possible_target_locations:
if not easy_target:
- random_r = self.np_random.randint(6, int(self.SQRT2 * self.view_size - 1))
- else:
- random_r = self.np_random.randint(2, int((self.SQRT2 * self.view_size - 1) / 2))
- random_theta = self.np_random.uniform(-np.pi, np.pi)
- random_location = np.array([random_r * np.cos(random_theta), random_r * np.sin(random_theta)])
- random_location = np.round(random_location).astype(np.int32)
- target_location = random_location + self._agent_location
- if np.amin(target_location) < 0 or np.amax(target_location) >= int(2 * self.view_size):
- target_location = self._agent_location
- if self._new_world[target_location[1], target_location[0]] == 0:
- spacing = 1
- # check immediate neighbors for obstacles
- y_min = int(max(target_location[1] - spacing, 0))
- y_max = int(min(target_location[1] + spacing + 1, 2 * self.view_size))
- x_min = int(max(target_location[0] - spacing, 0))
- x_max = int(min(target_location[0] + spacing + 1, 2 * self.view_size))
- if np.count_nonzero(self._new_world[y_min:y_max, x_min:x_max] == 1.0) > 0:
- nearby_clear = False
-
- if nearby_clear:
- if not easy_target:
- # check if its a straight path
- rr, cc = draw.line(
- self._agent_location[0],
- self._agent_location[1],
- target_location[0],
- target_location[1],
- )
- straight_line = list(zip(rr, cc))
- for pt in straight_line:
- if self._new_world[pt[1], pt[0]] == 1:
- straight_path = False
- break
-
- if not straight_path:
- # Astar search to find the shortest path to the target
- self.astarPath, self.cost_astar, self.astar_runs = self.run_astar(target_location)
- if len(self.astarPath) > 0 and self.astar_runs > 60:
- colliding = False
- else:
- # Astar search to find the shortest path to the target
- self.astarPath, self.cost_astar, self.astar_runs = self.run_astar(target_location)
- if len(self.astarPath) > 0:
- colliding = False
+ # check if its a straight path
+ rr, cc = draw.line(
+ self._agent_location[0],
+ self._agent_location[1],
+ target_location[0],
+ target_location[1],
+ )
+ straight_pixels = self._new_world[cc, rr]
+ straight_path = not np.any(straight_pixels == 1)
+
+ if straight_path:
+ continue
+
+ # Astar search to find the shortest path to the target
+ self.astarPath, self.cost_astar, self.astar_runs = self._run_astar(target_location)
+ if len(self.astarPath) > 0:
+ if easy_target:
+ return target_location
+ if self.astar_runs > 60:
+ return target_location
+
+ self.logger.info(" ---Target not set, soft reset---")
+ # increase the count of agent location in the self.state_counts to self.MAX_VAL_UINT
+ # so that the agent location is not sampled again
+ mapped_start = self._mapping(self._agent_location[0], self._agent_location[1])
+ self.start_locations[mapped_start[::-1]] = self.MAX_VAL_UINT
+ self._soft_reset()
count += 1
- if count > 9999:
- logger.info(
- " ---Target not set in %s tries! Setting agent back to reset conditions!",
- count,
- )
- count = 0
- self.soft_reset()
- return target_location
+ if count > 100000:
+ raise RuntimeError("Cannot find a target location")
def _get_info(self) -> Dict:
"""
@@ -376,6 +556,7 @@ def _get_info(self) -> Dict:
"current step": self.current_step,
"no. of collisions": self.num_collisions,
"grid map": self._new_world,
+ "obs with astar path": self.obs_world_astar,
}
# base observation is the world map which remains constant throughout the episode
@@ -406,6 +587,12 @@ def _get_obs(self) -> Union[np.ndarray, OrderedDict]:
self.obs_world[:, self._agent_location[1], self._agent_location[0]] = 0.3 * 255.0
self.obs_world[0, self._agent_location[1], self._agent_location[0]] = 1.0 * 255.0
+ # mark the astar path in the self.obs_world_astar
+ self.obs_world_astar = np.copy(self.obs_world)
+ for pt in self.astarPath:
+ self.obs_world_astar[:, pt[1], pt[0]] = 0.6 * 255.0
+ self.obs_world_astar = self.obs_world_astar.astype(dtype=np.uint8)
+
if self.show_path and (len(self._path) != 0):
for pt in self._path:
if self.obs_world[2, pt[1], pt[0]] == 0:
@@ -418,69 +605,124 @@ def _get_obs(self) -> Union[np.ndarray, OrderedDict]:
dsize=(self.image_size, self.image_size),
interpolation=cv2.INTER_NEAREST,
)
- self.obs_world = np.moveaxis(self.obs_world, -1, 0)
- if not self.multi_output:
- return self.obs_world.astype(dtype=np.uint8)
+ self.obs_world = np.moveaxis(self.obs_world, -1, 0).astype(dtype=np.uint8)
+ if not self.multi_output:
+ return self.obs_world
return OrderedDict(
{
# normalize delta
"delta": ((self._target_location - self._agent_location) / (2 * self.view_size - 1)).astype(np.float32),
- "world": self.obs_world.astype(dtype=np.uint8),
+ "world": self.obs_world,
}
)
- # set the agent's location at the center of the map
- def soft_reset(self) -> None:
+ def _get_new_index_from_counts(self, counts_mat: np.ndarray, alpha_p: float = 1.0) -> Tuple[int, int]:
+ """
+ Returns a new index sampled from the counts matrix
+
+ Parameters
+ ----------
+ counts_mat : np.ndarray
+ counts matrix from which is used to sample the new index
+ alpha_p : float
+ parameter to control the sampling probability
+
+ Returns
+ -------
+ sampled_index : Tuple[int, int]
+ sampled index from the counts matrix in the form (y, x)
+ """
+ flattened_counts = counts_mat.flatten()
+ # higher the count, lower the probability of sampling that cell
+ probabilities = np.exp(-alpha_p * flattened_counts)
+ probabilities /= np.sum(probabilities)
+
+ # sample a cell based on the probabilities
+ sampled_index = self.np_random.choice(np.arange(len(flattened_counts)), p=probabilities)
+ # convert the sampled index to 2D index
+ sampled_index = np.unravel_index(sampled_index, counts_mat.shape)
+ return sampled_index # (y, x)
+
+ # set the world center based on sampling from current counts
+ # tries to set the world center to cells with lower counts
+ def _soft_reset(self) -> None:
"""Moves the agent to the center of the map and resets the target"""
- self._new_world_center = np.array([self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32)
- self.slice_grid_map()
- self._agent_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+ sampled_index = self._get_new_index_from_counts(self.start_locations)
+ self._new_world_center = np.array([sampled_index[0], sampled_index[1]], dtype=np.int32)
+ self._mapping, potential_start_location = self._slice_grid_map()
+ if potential_start_location is None:
+ potential_start_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+ self._agent_location = potential_start_location
self.target_num = 0
def reset(
- self, return_info: bool = False
- ) -> Union[Tuple[Union[np.ndarray, OrderedDict], Dict], np.ndarray, OrderedDict]:
+ self,
+ *,
+ seed: Union[int, None] = None,
+ options: Union[Dict, None] = None,
+ ) -> Tuple[Union[np.ndarray, OrderedDict], Dict]:
"""
- Resets the environment to the initial state and returns the initial observation
+ Resets the environment to the initial state and returns the initial observation and info
Parameters
----------
- return_info : bool, optional
- whether to return the info dictionary, by default False
+ seed : Union[int, None]
+ seed to use for the environment
+ options : Union[Dict, None]
+ options to use for the environment
Returns
-------
obs : np.ndarray or OrderedDict
observation from manystep environment
- info : Dict (optional)
- info dictionary (optional) of the last step in the stack
+ info : Dict
+ info dictionary of the last step in the stack
"""
+
+ # Initialize the RNG if the seed is manually passed
+ super().reset(seed=seed)
+
+ # seed the other random number generator
+ if seed is not None:
+ self.random_gen, _ = seeding.np_random(seed)
+
self.target_num += 1
if (self.target_num % self.soft_reset_freq) == 0:
- self.soft_reset()
- if self.test_mode:
- self.seed(1327455)
+ self._soft_reset()
else:
self._new_world_center = self._next_new_world_center
self.current_step = 0
self.num_collisions = 0
- self.slice_grid_map()
- self._agent_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+ self._mapping, potential_start_location = self._slice_grid_map()
+ if potential_start_location is None:
+ potential_start_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+ self._agent_location = potential_start_location
# We will sample the target's location randomly until it does
# not coincide with the agent's location
- self._target_location = self.find_target()
- self._next_new_world_center = self._new_world_center + (self._target_location - self._agent_location)[::-1]
- self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
- self.min_cost_astar = self.cost_astar
+ self._target_location = self._find_target()
+
+ # increase the count of the start and target locations
+ mapped_start = self._mapping(self._agent_location[0], self._agent_location[1])
+ self.start_locations[mapped_start[::-1]] += 1
+ mapped_target = self._mapping(self._target_location[0], self._target_location[1])
+ self.target_locations[mapped_target[::-1]] += 1
+
+ # set the next new world center at the mapped target location
+ self._next_new_world_center = mapped_target[::-1]
+ self.ini_distance_to_target = self.min_distance_to_target = float(
+ np.linalg.norm(self._target_location - self._agent_location)
+ )
+ self.ini_astarPath = self.astarPath
+ self.ini_cost_astar = self.min_cost_astar = self.cost_astar
self._path = []
self.action = [-1, -1]
self.action_cost = 0.0
self._create_base_obs()
observation = self._get_obs()
- return (observation, self._get_info()) if return_info else observation
+ return (observation, self._get_info())
def _compute_reward(self, completion_reward: bool = False):
"""
@@ -490,7 +732,7 @@ def _compute_reward(self, completion_reward: bool = False):
def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
"""
- Takes the action and returns the new agent lo
+ Takes the action and returns the new agent location
"""
raise NotImplementedError
@@ -500,9 +742,9 @@ def close(self) -> None:
"""
plt.close("all")
- def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], float, bool, Dict]:
+ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], float, bool, bool, Dict]:
"""
- Takes a step in the environment and returns the observation, reward, done and info
+ Takes a step in the environment and returns the observation, reward, terminated, truncated and info
Parameters
----------
@@ -511,10 +753,19 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
Returns
-------
- Observation, Reward, Done, Info : Tuple[Union[np.ndarray, OrderedDict], float, bool, Dict]
- the observation, reward, done and info
+ observation : np.ndarray or OrderedDict
+ observation
+ reward : float
+ reward
+ terminated : bool
+ whether the episode terminated
+ truncated : bool
+ whether the episode was truncated
+ info : Dict
+ info dictionary
"""
- done = False
+ terminated = False
+ truncated = False
valid_action_path, collision = self._take_action(action)
self.current_step += 1
@@ -526,13 +777,13 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
if collision:
self.num_collisions += 1
reward = -1.0
- else: # only do it if the agent moves
+ elif len(valid_action_path) > 1: # only do it if the agent moves
self._path = valid_action_path[:-1] # remove the agent location
if np.array_equal(self._target_location, self._agent_location):
- done = True
+ terminated = True
reward += self._compute_reward(completion_reward=True)
self.num_targets_reached += 1
- logger.info(
+ self.logger.info(
"%s Target reached in %s steps, Collisions : %s",
self.ordinal(self.num_targets_reached),
self.current_step,
@@ -540,43 +791,49 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
)
else:
reward += self._compute_reward(completion_reward=False)
- self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
- self.min_cost_astar = self.cost_astar
+ else:
+ self.logger.warning("No movement caused by action: %s!", action)
+
+ self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
+ self.min_cost_astar = self.cost_astar
if self.num_collisions > self.max_collisions:
- done = True
+ terminated = True
if not self.partial_reward:
reward += -self.cost_astar / 100
- logger.info(" --Max Collisions! Collisions: %s/%s", self.num_collisions, self.max_collisions + 1)
- elif (not done) and (self.current_step > self.max_steps):
- done = True
+ self.logger.info(
+ " --Max Collisions! Collisions: %s/%s",
+ self.num_collisions,
+ self.max_collisions + 1,
+ )
+ elif (not terminated) and (self.current_step > self.max_steps):
+ terminated = True
if not self.partial_reward:
reward += -self.cost_astar / 100
- logger.info(
- " --Max Steps: %s/%s!",
+ self.logger.info(
+ " --Max Steps! Steps: %s/%s",
self.current_step,
self.max_steps + 1,
)
observation = self._get_obs()
info = self._get_info()
- return observation, reward, done, info
+ return observation, reward, terminated, truncated, info
- def render(self, mode="human") -> None:
+ def render(self) -> None:
"""
Renders the environment
- Parameters
- ----------
- mode : str, optional
- the mode to render in, by default "human"
-
Returns
-------
None
"""
- plt.title("Voxelgym")
- plt.imshow(np.moveaxis(self.obs_world.astype(np.uint8), 0, 2))
+ plt.subplot(1, 2, 1)
+ plt.title("Agent View")
+ plt.imshow(np.moveaxis(self.obs_world, 0, 2))
+ plt.subplot(1, 2, 2)
+ plt.title("Current Astar Path")
+ plt.imshow(np.moveaxis(self.obs_world_astar, 0, 2))
palette = [[77, 77, 255], [77, 255, 77], [255, 77, 77], [144, 144, 144]]
classes = ["obstacles", "target", "agent", "last visited"]
diff --git a/voxelgym2D/envs/env_one_step.py b/voxelgym2D/envs/env_one_step.py
index 0c95189..98c1042 100644
--- a/voxelgym2D/envs/env_one_step.py
+++ b/voxelgym2D/envs/env_one_step.py
@@ -1,37 +1,42 @@
-"""Environment corresponding to Onestep action space"""
+"""Voxel environment corresponding to Onestep action space"""
-from typing import List, Tuple
+from typing import List, Optional, Tuple
-import gym
+import gymnasium as gym
import numpy as np
from .base_env import BaseEnv
class VoxelGymOneStep(BaseEnv):
- """Environment corresponding to Onestep action space"""
+ """Voxel environment corresponding to Onestep action space"""
def __init__(
self,
- mapfile: str = "200x200x200_dense.npy",
+ render_mode: Optional[str] = None,
+ mapfile: str = "600x600.npy",
view_size: int = 21,
+ image_size: int = 42,
max_collisions: int = 0,
max_steps: int = 60,
show_path: bool = True,
multi_output: bool = False,
partial_reward: bool = True,
- image_size: int = 42,
- test_mode: bool = False,
inference_mode: bool = False,
discrete_actions: bool = True,
+ log_level: str = "ERROR",
):
"""
Parameters
----------
+ render_mode : Optional[str], optional
+ render mode, by default None
mapfile : str
name of the map file in the maps folder
view_size : int
size of the view window for observation
+ image_size : int
+ size of the image to be returned as observation
max_collisions : int
maximum number of collisions allowed before episode ends
max_steps : int
@@ -42,34 +47,34 @@ def __init__(
whether to add additional outputs in the observation
partial_reward : bool
whether to give rewards for each step
- image_size : int
- size of the image to be returned as observation
- test_mode : bool
- whether to run in test mode, for evaluation during training
inference_mode : bool
whether to run in inference mode
discrete_actions : bool
whether to use discrete actions
+ log_level : str, optional
+ log level, by default "ERROR". One of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
"""
super().__init__(
+ render_mode,
mapfile,
view_size,
+ image_size,
max_collisions,
max_steps,
show_path,
multi_output,
partial_reward,
- image_size,
- test_mode,
inference_mode,
+ log_level,
)
self.discrete_actions = discrete_actions
if self.discrete_actions:
self.action_space = gym.spaces.Discrete(len(self.delta))
else:
- self.action_space = gym.spaces.Box(-1, 1, shape=(len(self.delta),))
+ # self.action_space = gym.spaces.Box(-1, 1, shape=(len(self.delta)))
+ self.action_space = gym.spaces.Box(-1, 1, shape=[1])
def _compute_reward(self, completion_reward: bool = False) -> float:
"""
@@ -91,30 +96,49 @@ def _compute_reward(self, completion_reward: bool = False) -> float:
if completion_reward:
reward_completion = 0.5
+ return reward_completion
- else:
- reward_completion = -self.action_cost
-
- _new_astarPath, _cost_astar, self.astar_runs = self.run_astar(self._target_location)
- if len(_new_astarPath) > 0:
- self.astarPath = _new_astarPath
- self.cost_astar = _cost_astar
+ reward_completion = -self.action_cost
- reward_euc_astar = self.min_cost_astar - _cost_astar
- else:
- # fallback to euclidean distance if astar fails
- reward_euc_astar = self.min_distance_to_target - float(
- np.linalg.norm(self._target_location - self._agent_location)
- )
+ _new_astarPath, _cost_astar, self.astar_runs = self._run_astar(self._target_location)
+ if len(_new_astarPath) > 0:
+ self.astarPath = _new_astarPath
+ self.cost_astar = _cost_astar
- if completion_reward:
- return reward_completion
- if (self.cost_astar < self.min_cost_astar) and self.partial_reward:
- return round((2 * reward_euc_astar + reward_completion) / 10, 4)
- if (self.cost_astar > self.min_cost_astar) and self.partial_reward:
+ reward_euc_astar = self.min_cost_astar - _cost_astar
+ improved = self.min_cost_astar > _cost_astar
+ else:
+ # fallback to euclidean distance if astar fails
+ current_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
+ reward_euc_astar = self.min_distance_to_target - current_distance_to_target
+ improved = self.min_distance_to_target > current_distance_to_target
+
+ if self.partial_reward:
+ if improved:
+ return round((2 * reward_euc_astar + reward_completion) / 10, 4)
return round((reward_euc_astar + reward_completion) / 10, 4)
+
return round(reward_completion / 10, 4)
+ @staticmethod
+ def action_to_bins(action: np.ndarray) -> int:
+ """
+ Converts the action to bins of size 1/4 and returns the bin number in the range [0, 7] for actions
+
+ Parameters
+ ----------
+ action : np.ndarray
+ action to be converted to bin number
+
+ Returns
+ -------
+ bin : int
+ bin number in the range [0, 7]
+ """
+ # Clip action to [-1, 1]
+ clipped = np.clip(action, -1, 1)
+ return min(int((clipped + 1) * 4), 7)
+
def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
"""
Takes the action and updates the agent location
@@ -132,11 +156,12 @@ def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
True if the agent collides with an obstacle, else False
"""
if not self.discrete_actions:
- self.action = list(self.delta[np.argmax(action)])
+ # convert angle to bin number
+ action_idx = self.action_to_bins(action)
+ self.action = list(self.delta[action_idx])
else:
self.action = list(self.delta[int(action)])
- self.action_cost = float(np.linalg.norm(self.action))
action_location = self._agent_location + np.array(self.action, dtype=np.int32)
action_path = [action_location.tolist()]
@@ -154,4 +179,10 @@ def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
# complete path by including initial location
valid_action_path.insert(0, self._agent_location.tolist())
+ # compute action cost
+ self.action_cost = 0.0
+ for _, dx in enumerate(np.array(valid_action_path[1:]) - np.array(valid_action_path[:-1])):
+ self.action_cost += np.sqrt(np.sum(dx**2))
+ self.action_cost = round(self.action_cost, 4)
+
return valid_action_path, collision
diff --git a/voxelgym2D/envs/maps/100x100x100_dense.npy b/voxelgym2D/envs/maps/100x100x100_dense.npy
deleted file mode 100644
index 1962b83..0000000
Binary files a/voxelgym2D/envs/maps/100x100x100_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/200x200x200_dense.npy b/voxelgym2D/envs/maps/200x200x200_dense.npy
deleted file mode 100644
index 6101fe7..0000000
Binary files a/voxelgym2D/envs/maps/200x200x200_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/50x50x50_dense.npy b/voxelgym2D/envs/maps/50x50x50_dense.npy
deleted file mode 100644
index 50cdb81..0000000
Binary files a/voxelgym2D/envs/maps/50x50x50_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/600x600.npy b/voxelgym2D/envs/maps/600x600.npy
new file mode 100644
index 0000000..97f6eb2
Binary files /dev/null and b/voxelgym2D/envs/maps/600x600.npy differ
diff --git a/voxelgym2D/version.txt b/voxelgym2D/version.txt
new file mode 100644
index 0000000..9325c3c
--- /dev/null
+++ b/voxelgym2D/version.txt
@@ -0,0 +1 @@
+0.3.0
\ No newline at end of file