diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 77558a5..091d610 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -10,7 +10,7 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
@@ -20,19 +20,11 @@ jobs:
         python -m pip install --upgrade pip
         pip install setuptools wheel twine
 
-    - name: Build and publish to Test PyPI
-      env:
-        TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }}
-      run: |
-        python setup.py sdist bdist_wheel
-        twine check dist/*
-        twine upload --repository testpypi dist/*
-
     - name: Build and publish to PyPI
       env:
         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
         python setup.py sdist bdist_wheel
+        twine check dist/*
         twine upload dist/*
diff --git a/.github/workflows/github-actions.yml b/.github/workflows/test-main.yml
similarity index 84%
rename from .github/workflows/github-actions.yml
rename to .github/workflows/test-main.yml
index 60588f1..ed37e39 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/test-main.yml
@@ -1,4 +1,4 @@
-name: CI
+name: Test
 
 on:
   push:
@@ -17,7 +17,7 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -26,7 +26,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install setuptools==65.5.0
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine
           pip install -e .[dev]
   
       - name: Pylint
@@ -43,7 +44,7 @@ jobs:
 
       - name: Pytest
         run: |
-          coverage run -m pytest tests/
+          coverage run --source voxelgym2D -m pytest tests/
           coverage report
 
       - name: Build docs
diff --git a/LICENSE b/LICENSE
index 72b2e54..7893339 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Hari
+Copyright (c) 2023 Harisankar Babu
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index f760489..5cb030f 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,44 @@
 # voxelgym2D
+
+[![MIT License](https://img.shields.io/github/license/harisankar95/voxelgym2D)](LICENSE)
+[![PyPI](https://img.shields.io/pypi/v/voxelgym2D)](https://pypi.org/project/voxelgym2D/)
+[![Pipeline](https://github.com/harisankar95/voxelgym2D/actions/workflows/test-main.yml/badge.svg?branch=main)](https://github.com/harisankar95/voxelgym2D/actions/workflows/test-main.yml)
+[![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
 A gym environment for voxel/grid based reinforcement learning for path planning.
 
 <div id="solution-table">
     <table>
-	    <tr>
-    	    <td style="padding:10px">
-        	    <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_1.gif" width="375"/>
-      	    </td>
+     <tr>
+         <td style="padding:10px">
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_1.gif" width="375"/>
+           </td>
             <td style="padding:10px">
-            	<img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_2.gif" width="375"/>
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_2.gif" width="375"/>
             </td>
         </tr>
     </table>
 </div>
 
 ## Results with [SB3](https://github.com/DLR-RM/stable-baselines3) (v1.6.2) : PPO :smile:
+
 Here are the results of training a PPO agent on the `onestep-v0` using the example [here](examples/train_ppo.py). Below you will find the episode reward and episode length over steps during training. As the agent learns, the episode reward increases and the episode length reduces are the agent learns to identify the goal and reach it in the shortest possible path.
 
 <div id="solution-table">
     <table>
-	    <tr>
-    	    <td style="padding:10px">
-        	    <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/ep_reward.png" width="375"/>
-      	    </td>
+     <tr>
+         <td style="padding:10px">
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/ep_reward.png" width="375"/>
+           </td>
             <td style="padding:10px">
-            	<img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/ep_length.png" width="375"/>
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/ep_length.png" width="375"/>
             </td>
         </tr>
     </table>
 </div>
 
-
 ## Installation
+
 ```bash
 pip install git+https://github.com/harisankar95/voxelgym2D.git
 ```
@@ -43,7 +50,9 @@ pip install voxelgym2D
 ```
 
 ## Development
+
 To install the package in development mode, run the following command in the root directory of the repository:
+
 ```bash
 git clone https://github.com/harisankar95/voxelgym2D.git ~/path/to/repo
 cd ~/path/to/repo
@@ -54,41 +63,53 @@ pip install -e .[dev,sb3]
 ```
 
 ## Usage
+
 ```python
 import voxelgym2D
-import gym
+import gymnasium as gym
 
 env = gym.make("voxelgym2D:onestep-v0")
-env.reset()
+observation, info = env.reset(seed=123456)
 env.render()
 ```
 
 ## Examples
+
 The examples can be found [here](examples).
 
 ## License
+
 This project is licensed under the terms of the [MIT license](LICENSE).
 
 ## Documentation
+
 The documentation can be found [here](https://harisankar95.github.io/voxelgym2D/).
 
 ## Changelog
+
 ### 0.1.0
+
 - Initial release of voxelgym 2D environments tested with stable_baselines 3 (v1.6.2) and python 3.8
+
 ### 0.2.0
+
 - Available on PyPI, sphinx documentation in work
 
+### 0.3.0
+
+- Migration to gymnasium
+- Agent can now be reset to multiple start positions
+
 ## TODO
+
 - [x] Add 2D environments
-- [ ] Test with gym 0.26.2
 - [ ] Add documentation
 
-## Known issues
-- [ ] Currently only supports gym==0.21.0 :neutral_face:, hence setuptools==65.5.0 is required to install gym.
-
 ## Contributing
+
 Contributions are welcome! Please open an issue or a pull request.
 
 ## References
+
 - [OpenAI Gym](https://arxiv.org/abs/1606.01540)
 - [Stable Baselines 3](http://jmlr.org/papers/v22/20-1364.html)
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
new file mode 100644
index 0000000..ff4a75e
--- /dev/null
+++ b/docs/INSTALL.md
@@ -0,0 +1,28 @@
+# Installation
+
+## PyPI
+
+Install the package from PyPI using pip:
+
+```bash
+pip install voxelgym2D
+```
+
+## GitHub
+
+```bash
+pip install git+https://github.com/harisankar95/voxelgym2D.git
+```
+
+## For development purpose use editable mode
+
+To install the package in development mode, run the following command in the root directory of the repository:
+
+```bash
+git clone https://github.com/harisankar95/voxelgym2D.git ~/path/to/repo
+cd ~/path/to/repo
+pip install -e .[dev]
+
+# to aditionally install stable_baselines 3 and pytorch (optional)
+pip install -e .[dev,sb3]
+```
diff --git a/docs/INTRO.md b/docs/INTRO.md
new file mode 100644
index 0000000..a1bc086
--- /dev/null
+++ b/docs/INTRO.md
@@ -0,0 +1,16 @@
+# Voxelgym2D
+
+A gym environment for voxel/grid based reinforcement learning for path planning.
+
+<div id="solution-table">
+    <table>
+     <tr>
+         <td style="padding:10px">
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_1.gif" width="375"/>
+           </td>
+            <td style="padding:10px">
+             <img src="https://github.com/harisankar95/voxelgym2D/raw/main/resources/solution_2.gif" width="375"/>
+            </td>
+        </tr>
+    </table>
+</div>
diff --git a/docs/USAGE.md b/docs/USAGE.md
new file mode 100644
index 0000000..d05046b
--- /dev/null
+++ b/docs/USAGE.md
@@ -0,0 +1,21 @@
+# Examples
+
+For usage examples with detailed descriptions take a look at the [examples](https://github.com/harisankar95/voxelgym2D/tree/main/examples/) folder.
+
+## Basic usage
+
+```python
+import gymnasium as gym
+
+env = gym.make("voxelgym2D:onestep-v0")
+observation, info = env.reset(seed=123456)
+
+done = False
+while not done:
+    action = env.action_space.sample()  # agent policy that uses the observation and info
+    observation, reward, terminated, truncated, info = env.step(action)
+
+    done = terminated or truncated
+    env.render()
+
+env.close()
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..ca6b99e
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,77 @@
+/* Based on Stable Baselines 3 theme
+*  https://github.com/DLR-RM/stable-baselines3/
+* */
+:root {
+    --main-bg-color: #B6C8DB;
+    --link-color: #6DB59F;
+}
+
+/* Header fonts y */
+h1,
+h2,
+.rst-content .toctree-wrapper p.caption,
+h3,
+h4,
+h5,
+h6,
+legend,
+p.caption {
+    font-family: "Lato", "proxima-nova", "Helvetica Neue", Arial, sans-serif;
+}
+
+
+/* Docs background */
+.wy-side-nav-search {
+    background-color: var(--main-bg-color);
+}
+
+/* Mobile version */
+.wy-nav-top {
+    background-color: var(--main-bg-color);
+}
+
+/* Change link colors (except for the menu) */
+a {
+    color: var(--link-color);
+}
+
+a:hover {
+    color: #798EA9;
+}
+
+.wy-menu a {
+    color: #b3b3b3;
+}
+
+.wy-menu a:hover {
+    color: #b3b3b3;
+}
+
+a.icon.icon-home {
+    color: #b3b3b3;
+}
+
+.version {
+    color: var(--link-color) !important;
+}
+
+
+/* Make code blocks have a background */
+.codeblock,
+pre.literal-block,
+.rst-content .literal-block,
+.rst-content pre.literal-block,
+div[class^='highlight'] {
+    background: #FFFFFF;
+    ;
+}
+
+/* Change style of types in the docstrings .rst-content .field-list */
+.field-list .xref.py.docutils,
+.field-list code.docutils,
+.field-list .docutils.literal.notranslate {
+    border: None;
+    padding-left: 0;
+    padding-right: 0;
+    color: #404040;
+}
\ No newline at end of file
diff --git a/docs/_templates/versions.html b/docs/_templates/versions.html
new file mode 100644
index 0000000..c49f844
--- /dev/null
+++ b/docs/_templates/versions.html
@@ -0,0 +1,27 @@
+{%- if current_version %}
+<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+        <span class="fa fa-book"> Other Versions</span>
+        v: {{ current_version.name }}
+        <span class="fa fa-caret-down"></span>
+    </span>
+    <div class="rst-other-versions">
+        {%- if versions.tags %}
+        <dl>
+            <dt>Tags</dt>
+            {%- for item in versions.tags %}
+            <dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
+            {%- endfor %}
+        </dl>
+        {%- endif %}
+        {%- if versions.branches %}
+        <dl>
+            <dt>Branches</dt>
+            {%- for item in versions.branches %}
+            <dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
+            {%- endfor %}
+        </dl>
+        {%- endif %}
+    </div>
+</div>
+{%- endif %}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 5ae22e7..17dfafe 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -3,43 +3,209 @@
 # For the full list of built-in configuration values, see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 import os
 import sys
-sys.path.insert(0, os.path.abspath('..'))
 
-# -- Project information -----------------------------------------------------
-# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+sys.path.insert(0, os.path.abspath(".."))
+
+# read the version from version.txt
+with open(os.path.join("../voxelgym2D", "version.txt"), encoding="utf-8") as file_handler:
+    __version__ = file_handler.read().strip()
+
+
+project = "Voxelgym2D"
+copyright = "2023, Harisankar Babu"
+author = "Harisankar Babu"
+release = __version__
+version = __version__
 
-project = 'Voxelgym2D'
-copyright = '2023, Harisankar Babu'
-author = 'Harisankar Babu'
-release = '0.1'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.githubpages',
-    'sphinx.ext.inheritance_diagram',
-    ]
+    "sphinx.ext.autodoc",  # for autodoc
+    "sphinx.ext.ifconfig",  # for if statements
+    "sphinx.ext.autosummary",  # for autosummary
+    "sphinx.ext.doctest",  # for doctest
+    "sphinx.ext.todo",  # for todo list
+    "sphinx.ext.viewcode",  # for source code
+    "sphinx.ext.napoleon",  # for google style docstrings
+    "sphinx.ext.githubpages",  # for github pages
+    "sphinx.ext.inheritance_diagram",  # for inheritance diagrams
+    "sphinx.ext.graphviz",  # for graphviz
+    "sphinx.ext.mathjax",  # for math
+    "sphinx_autodoc_typehints",  # for type hints
+    "sphinx_autodoc_annotation",  # for annotations
+    "sphinx_copybutton",  # for copy button
+    "sphinx-prompt",  # for prompt
+    "notfound.extension",  # for 404 page
+    "versionwarning.extension",  # for version warning
+    "recommonmark",  # for markdown
+    "nbsphinx",  # for notebooks
+]
 
-templates_path = ['_templates']
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+templates_path = ["_templates"]
+html_sidebars = {
+    "**": [
+        "_templates/versions.html",
+    ],
+}
 
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+language = "en"
+
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
 
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_show_sourcelink = False
+html_show_sphinx = False
+html_copy_source = False
+html_show_copyright = True
+html_use_index = True
+# html
+html_theme_options = {
+    "canonical_url": "",
+    "display_version": True,
+    "prev_next_buttons_location": "bottom",
+    "style_external_links": True,
+    "style_nav_header_background": "white",
+    # Toc options
+    "collapse_navigation": False,
+    "sticky_navigation": True,
+    "navigation_depth": 4,
+    "includehidden": True,
+    "titles_only": False,
+}
+
+# generate autosummary even if no references
+autosummary_generate = True
+autosummary_imported_members = True
+
+# autodoc
+autodoc_mock_imports = []
+autodoc_typehints = "description"
+autodoc_inherit_docstrings = True
+autodoc_preserve_defaults = True
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+    "undoc-members": True,
+    "private-members": True,
+    "exclude-members": "__weakref__",
+    "show-inheritance": True,
+    "inherited-members": True,
+    "ignore-module-all": True,
+}
+
+# coverage
+coverage_show_missing_items = True
+coverage_skip_undoc_in_source = True
+
+# syntax highlighting
+pygments_style = "sphinx"
+highlight_language = "python3"
+
+# napoleon
+napoleon_numpy_docstring = True
+
+# todo-section
+todo_include_todos = False
+
+# inheritance diagrams
+# smaller diagrams with rectangular nodes
+inheritance_graph_attrs = {
+    "rankdir": "TB",
+    "size": '"6.0, 8.0"',
+    "fontsize": 12,
+    "ratio": "compress",
+    "bgcolor": "transparent",
+}
+
+inheritance_node_attrs = {
+    "shape": "rect",
+    "fontsize": 12,
+    "color": "orange",
+    "style": "filled",
+    "fillcolor": "white",
+}
+
+inheritance_edge_attrs = {
+    "arrowsize": 0.5,
+    "penwidth": 1.0,
+    "color": "orange",
+}
+
+# graphviz
+graphviz_output_format = "svg"
+graphviz_dot_args = [
+    "-Gbgcolor=transparent",
+    "-Nfontname=Helvetica",
+    "-Efontname=Helvetica",
+    "-Gfontname=Helvetica",
+    "-Gfontsize=12",
+    "-Nfontsize=12",
+    "-Efontsize=12",
+]
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "Voxelgym2D-doc"
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements: dict = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files.
+latex_documents = [
+    (master_doc, "Voxelgym2D.tex", "Voxelgym2D Documentation", "Voxelgym2D Contributors", "manual"),
+]
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "Voxelgym2D", "Voxelgym2D Documentation", [author], 1)]
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files.
+texinfo_documents = [
+    (
+        master_doc,
+        "Voxelgym2D",
+        "Voxelgym2D Documentation",
+        author,
+        "Voxelgym2D",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
diff --git a/docs/index.rst b/docs/index.rst
index d27606a..32607ea 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,9 +7,12 @@ Welcome to Voxelgym2D's documentation!
 ======================================
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 3
    :caption: Contents:
 
+   Voxelgym2D <./INTRO.md>
+   Installation <./INSTALL.md>
+   Usage <./USAGE.md>
    modules
 
 Indices and tables
@@ -17,4 +20,3 @@ Indices and tables
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
diff --git a/docs/voxelgym2D.envs.rst b/docs/voxelgym2D.envs.rst
index ba3e2b2..c6d7985 100644
--- a/docs/voxelgym2D.envs.rst
+++ b/docs/voxelgym2D.envs.rst
@@ -1,6 +1,11 @@
 voxelgym2D.envs package
 =======================
 
+.. automodule:: voxelgym2D.envs
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Submodules
 ----------
 
@@ -19,11 +24,3 @@ voxelgym2D.envs.env\_one\_step module
    :members:
    :undoc-members:
    :show-inheritance:
-
-Module contents
----------------
-
-.. automodule:: voxelgym2D.envs
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/voxelgym2D.rst b/docs/voxelgym2D.rst
index 412d1b3..8348128 100644
--- a/docs/voxelgym2D.rst
+++ b/docs/voxelgym2D.rst
@@ -1,6 +1,11 @@
 voxelgym2D package
 ==================
 
+.. automodule:: voxelgym2D
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Subpackages
 -----------
 
@@ -8,12 +13,3 @@ Subpackages
    :maxdepth: 4
 
    voxelgym2D.envs
-
-Module contents
----------------
-
-.. automodule:: voxelgym2D
-   :members:
-   :undoc-members:
-   :show-inheritance:
-   :inherited-members:
diff --git a/examples/onestep.py b/examples/onestep.py
new file mode 100644
index 0000000..19d9f80
--- /dev/null
+++ b/examples/onestep.py
@@ -0,0 +1,14 @@
+import gymnasium as gym
+
+env = gym.make("voxelgym2D:onestep-v0")
+observation, info = env.reset(seed=123456)
+
+done = False
+while not done:
+    action = env.action_space.sample()  # agent policy that uses the observation and info
+    observation, reward, terminated, truncated, info = env.step(action)
+
+    done = terminated or truncated
+    env.render()
+
+env.close()
diff --git a/examples/train_ppo.py b/examples/train_ppo.py
index 56759bb..a82bde1 100644
--- a/examples/train_ppo.py
+++ b/examples/train_ppo.py
@@ -2,11 +2,11 @@
 import os
 from typing import Callable
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 from stable_baselines3 import PPO
-from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
 from stable_baselines3.common.env_util import make_vec_env
 from stable_baselines3.common.evaluation import evaluate_policy
 from stable_baselines3.common.monitor import Monitor
@@ -14,9 +14,6 @@
 from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
 from torch import nn
-from tqdm.auto import tqdm
-
-import voxelgym2D
 
 # Create log dir
 LOG_DIR = "./logs/ppo_onestep/"
@@ -61,7 +58,6 @@ class SaveOnBestTrainingRewardCallback(BaseCallback):
     """
     Callback for saving a model (the check is done every ``check_freq`` steps)
     based on the training reward (in practice, we recommend using ``EvalCallback``).
-
     :param check_freq: (int)
     :param log_dir: (str) Path to the folder where the model will be saved.
       It must contains the file created by the ``Monitor`` wrapper.
@@ -78,10 +74,8 @@ def __init__(self, check_freq, log_dir, verbose=1):
 
     def _init_callback(self) -> None:
         # Create folder if needed
-        if self.save_path is not None:
-            os.makedirs(self.save_path, exist_ok=True)
-        if self.chckpoint_path is not None:
-            os.makedirs(self.chckpoint_path, exist_ok=True)
+        if self.log_dir is not None:
+            os.makedirs(self.log_dir, exist_ok=True)
 
     def _on_step(self) -> bool:
         if self.n_calls % self.check_freq == 0:
@@ -89,15 +83,15 @@ def _on_step(self) -> bool:
             x, y = ts2xy(load_results(self.log_dir), "timesteps")
             if len(x) > 0:
                 # Mean training reward over the last 100 episodes
-                mean_reward = np.mean(y[-100:])
+                _mean_reward = np.mean(y[-100:])
                 if self.verbose > 0:
                     print(f"Num timesteps: {self.num_timesteps}")
                     print(f"Best mean reward: {self.best_mean_reward:.2f}")
-                    print(f"Last mean reward per episode: {mean_reward:.2f}")
+                    print(f"Last mean reward per episode: {_mean_reward:.2f}")
 
                 # New best model, you could save the agent here
-                if mean_reward > self.best_mean_reward:
-                    self.best_mean_reward = mean_reward
+                if _mean_reward > self.best_mean_reward:
+                    self.best_mean_reward = _mean_reward
                     # Example for saving best model
                     if self.verbose > 0:
                         print(f"Saving new best model at {x[-1]} timesteps")
@@ -112,44 +106,10 @@ def _on_step(self) -> bool:
         return True
 
 
-class ProgressBarCallback(BaseCallback):
-    """
-    :param pbar: (tqdm.pbar) Progress bar object
-    """
-
-    def __init__(self, pbar):
-        super().__init__()
-        self._pbar = pbar
-
-    def _on_step(self):
-        # Update the progress bar:
-        self._pbar.n = self.num_timesteps
-        self._pbar.update(0)
-
-
-# this callback uses the 'with' block, allowing for correct initialisation and destruction
-class ProgressBarManager:
-    """For tqdm progress bar in a with block."""
-
-    def __init__(self, total_timesteps):  # init object with total timesteps
-        self.pbar = None
-        self.total_timesteps = total_timesteps
-
-    def __enter__(self):  # create the progress bar and callback, return the callback
-        self.pbar = tqdm(total=self.total_timesteps)
-        return ProgressBarCallback(self.pbar)
-
-    def __exit__(self, exc_type, exc_val, exc_tb):  # close the callback
-        self.pbar.n = self.total_timesteps
-        self.pbar.update(0)
-        self.pbar.close()
-
-
 # scheduler
 def linear_schedule(initial_value: float) -> Callable[[float], float]:
     """
     Linear learning rate schedule.
-
     :param initial_value: Initial learning rate.
     :return: schedule that computes
       current learning rate depending on remaining progress
@@ -158,7 +118,6 @@ def linear_schedule(initial_value: float) -> Callable[[float], float]:
     def func(progress_remaining: float) -> float:
         """
         Progress will decrease from 1 (beginning) to 0.
-
         :param progress_remaining:
         :return: current learning rate
         """
@@ -176,15 +135,13 @@ def func(progress_remaining: float) -> float:
         seed=1327455,
         monitor_dir=LOG_DIR,
         env_kwargs={
-            "mapfile": "200x200x200_dense.npy",
+            "mapfile": "600x600.npy",
             "view_size": 21,
+            "image_size": 42,
             "max_collisions": 0,
             "max_steps": 60,
-            "show_path": True,
             "discrete_actions": True,
-            "multi_output": False,
-            "partial_reward": True,
-            "image_size": 42,
+            "render_mode": "None",
         },
         vec_env_cls=SubprocVecEnv,
     )
@@ -209,33 +166,41 @@ def func(progress_remaining: float) -> float:
         ent_coef=0.01,
         vf_coef=0.5,
         verbose=1,
-        tensorboard_log="tb_logs/ppo_onestep",
+        tensorboard_log="tb_logs/ppo_onestep/",
         target_kl=0.4,
     )
 
+    # Create eval env
     eval_env = DummyVecEnv(
         [
             lambda: Monitor(
                 gym.make(
                     "voxelgym2D:onestep-v0",
-                    mapfile="200x200x200_dense.npy",
+                    mapfile="600x600.npy",
                     view_size=21,
+                    image_size=42,
                     max_collisions=0,
                     max_steps=60,
-                    show_path=True,
                     discrete_actions=True,
-                    multi_output=False,
-                    partial_reward=True,
-                    test_mode=True,
-                    image_size=42,
+                    render_mode="None",
                 ),
                 filename=os.path.join(LOG_DIR, "eval"),
             )
         ]
     )
 
-    # n_eval_episodes = 50 since soft_reset_freq in base_env is 50
-    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
+    # Use deterministic actions for evaluation
+    eval_callback = EvalCallback(
+        eval_env,
+        best_model_save_path=None,
+        log_path=os.path.join(LOG_DIR, "eval"),
+        eval_freq=10000,
+        n_eval_episodes=50,
+        deterministic=True,
+        render=False,
+    )
+
+    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50, warn=False)
     print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
 
     # Create Callback
@@ -243,17 +208,13 @@ def func(progress_remaining: float) -> float:
 
     TOTAL_TIME_STEPS = 10000000
 
-    with ProgressBarManager(TOTAL_TIME_STEPS) as progress_callback:
-        # This is equivalent to callback=CallbackList([progress_callback, auto_save_callback])
-        model.learn(
-            total_timesteps=TOTAL_TIME_STEPS,
-            eval_env=eval_env,
-            n_eval_episodes=50,
-            eval_freq=10000,
-            callback=[progress_callback, auto_save_callback],
-        )
+    model.learn(
+        total_timesteps=TOTAL_TIME_STEPS,
+        callback=[auto_save_callback, eval_callback],
+        progress_bar=True,
+    )
 
     model.save(os.path.join(LOG_DIR, "ppo_saved"))
 
-    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
+    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50, warn=False)
     print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
diff --git a/setup.cfg b/setup.cfg
index 53d2dd6..e76d18d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,6 +20,11 @@ source = voxelgym2D
 
 [coverage:report]
 show_missing = True
+exclude_also =
+    def __repr__
+	def __str__
+	def __lt__
+	def __eq__
 
 [pylint]
 disable = 	missing-docstring,
@@ -32,7 +37,7 @@ disable = 	missing-docstring,
 			too-many-arguments,
 			too-many-branches,
 			# many functions will naturally have unused arguments.
-    		unused-argument
+			unused-argument,
 
 [pylint.FORMAT]
 max-line-length = 120
diff --git a/setup.py b/setup.py
index b25b624..1ddcd93 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,18 @@
+import os
+
 from setuptools import find_packages, setup
 
+# read the version from version.txt
+with open(os.path.join("voxelgym2D", "version.txt"), encoding="utf-8") as file_handler:
+    __version__ = file_handler.read().strip()
+
 setup(
     name="voxelgym2D",
-    version="0.2",
+    version=__version__,
     description="Gym environment for 2D grid path planning",
     author="Harisankar Babu",
     author_email="harisankar995@gmail.com",
-    keywords="reinforcement-learning machine-learning gym openai python data-science",
+    keywords=["reinforcement-learning", "machine-learning", "gym", "openai", "python", "gymnasium"],
     license="MIT",
     url="https://github.com/harisankar95/voxelgym2D.git",
     classifiers=[
@@ -18,17 +24,16 @@
         "Programming Language :: Python :: 3.8",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    packages=find_packages(),
+    packages=[package for package in find_packages() if package.startswith("voxelgym2D")],
     package_data={
-        "voxelgym2D": ["envs/maps/*.npy"],
+        "voxelgym2D": ["envs/maps/*.npy", "version.txt"],
     },
     install_requires=[
-        # sb3 support for gym 0.21
-        "gym==0.21",
+        "gymnasium",
         "numpy",
         "scikit-image",
         "opencv-python",
-        "pathfinding==1.0.1",
+        "pathfinding>=1.0.4",
         # rendering
         "matplotlib",
     ],
@@ -43,11 +48,16 @@
             "tox",
             "sphinx",
             "sphinx_rtd_theme",
+            "recommonmark",
+            "nbsphinx",
+            "sphinx-autodoc-typehints",
+            "sphinx-copybutton",
+            "sphinx-prompt",
+            "sphinx-notfound-page",
+            "sphinx-version-warning",
+            "sphinx-autodoc-annotation",
         ],
-        "sb3": [
-            "stable-baselines3[extra]==1.6.2",
-            "sb3-contrib==1.6.2",
-        ],
+        "sb3": ["stable-baselines3[extra]>=2.0.0", "sb3-contrib>=2.0.0", "rl_zoo3>=2.0.0"],
     },
-    python_requires=">=3.7",
+    python_requires=">=3.8",
 )
diff --git a/tests/test_env.py b/tests/test_env.py
index b496e1b..5f5164d 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -1,19 +1,17 @@
 """Test the environment."""
 
-import gym
+import gymnasium as gym
 import numpy as np
-import pytest
-from gym.utils.env_checker import check_env
+from gymnasium.utils.env_checker import check_env
 
-import voxelgym2D
+from voxelgym2D.envs import VoxelGymOneStep
 
 
 def test_onsestep():
     """test onestep env"""
     env = gym.make("voxelgym2D:onestep-v0")
-    check_env(env)
-    _ = env.reset()
-    _, i = env.reset(return_info=True)
+    check_env(env.unwrapped, skip_render_check=True)
+    _, i = env.reset(seed=1234)
     # assert i is a dict
     assert isinstance(i, dict)
 
@@ -22,7 +20,19 @@ def test_onsestep():
     env = gym.make(
         "voxelgym2D:onestep-v0",
         discrete_actions=False,
-        inference_mode=True,
-        multi_output=True,
     )
-    check_env(env)
+    check_env(env.unwrapped, skip_render_check=True)
+
+
+def test_action_to_bins():
+    """Test the action to bins function"""
+    assert VoxelGymOneStep.action_to_bins(np.array([-1])) == 0
+    assert VoxelGymOneStep.action_to_bins(np.array([-0.75])) == 1
+    assert VoxelGymOneStep.action_to_bins(np.array([-0.5])) == 2
+    assert VoxelGymOneStep.action_to_bins(np.array([-0.25])) == 3
+    assert VoxelGymOneStep.action_to_bins(np.array([0])) == 4
+
+    assert VoxelGymOneStep.action_to_bins(np.array([0.25])) == 5
+    assert VoxelGymOneStep.action_to_bins(np.array([0.5])) == 6
+    assert VoxelGymOneStep.action_to_bins(np.array([0.75])) == 7
+    assert VoxelGymOneStep.action_to_bins(np.array([1])) == 7
diff --git a/tox.ini b/tox.ini
index bf296a4..ebb3a90 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,21 +1,26 @@
 [tox]
-requires = tox>=4.0.0
-envlist = py38, linter, formatter
+requires = tox>=4.5.0
+envlist = clean, py38, linter, formatter
 
 [testenv]
 description = Run tests with pytest under {basepython}
+use_develop = true
 deps = 
-    setuptools==65.5.0
     coverage
     pytest
+    pytest-cov
 commands = 
-	coverage run -m pytest tests/
-	coverage report
+    pytest --cov-report term-missing --cov-config=setup.cfg --cov=voxelgym2D --cov-append tests/
+
+[testenv:clean]
+deps = coverage
+skip_install = true
+commands = coverage erase
 
 [testenv:linter]
 description = Run pylint to check code quality and mypy to check type hints
+use_develop = true
 deps =
-    setuptools==65.5.0
     pylint
     mypy
 commands = 
diff --git a/voxelgym2D/__init__.py b/voxelgym2D/__init__.py
index 9722efe..a418bf5 100644
--- a/voxelgym2D/__init__.py
+++ b/voxelgym2D/__init__.py
@@ -1,6 +1,7 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="onestep-v0",
     entry_point="voxelgym2D.envs:VoxelGymOneStep",
+    nondeterministic=True,
 )
diff --git a/voxelgym2D/envs/base_env.py b/voxelgym2D/envs/base_env.py
index 02be648..4109d07 100644
--- a/voxelgym2D/envs/base_env.py
+++ b/voxelgym2D/envs/base_env.py
@@ -1,82 +1,140 @@
 """Base class for all environments"""
 
-import logging
 import math
 import os
+import sys
 from collections import OrderedDict
 from itertools import product
-from typing import Dict, List, Optional, Tuple, Union
+from logging import Formatter, Logger, LogRecord, StreamHandler
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import cv2
-import gym
+import gymnasium as gym
 import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
 import numpy as np
-from gym.utils import seeding
+from gymnasium.utils import seeding
 from pathfinding.core.diagonal_movement import DiagonalMovement
 from pathfinding.core.grid import Grid
+from pathfinding.core.node import GridNode
 from pathfinding.finder.a_star import AStarFinder
 from skimage import draw
 
 
-# based on https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
-class CustomFormatter(logging.Formatter):
-    """Colors for different log levels."""
+class CustomFormatter(Formatter):
+    """Custom formatter with colors for different log levels."""
 
-    grey = "\x1b[38;20m"
-    yellow = "\x1b[33;20m"
-    red = "\x1b[31;20m"
-    bold_red = "\x1b[31;1m"
+    _format = "%(asctime)s - %(levelname)s - %(message)s - %(filename)s:%(lineno)d"
+    # different color for different log level
+    # https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output
+    white = "\x1b[37;1m"
+    green = "\x1b[32;1m"
+    yellow = "\x1b[33;1m"
+    purple = "\x1b[35;1m"
+    red = "\x1b[31;1m"
     reset = "\x1b[0m"
-    format_ = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
-
-    FORMATS = {
-        logging.DEBUG: grey + format_ + reset,
-        logging.INFO: grey + format_ + reset,
-        logging.WARNING: yellow + format_ + reset,
-        logging.ERROR: red + format_ + reset,
-        logging.CRITICAL: bold_red + format_ + reset,
+    COLORED_FORMATS = {
+        "DEBUG": white + _format + reset,
+        "INFO": green + _format + reset,
+        "WARNING": yellow + _format + reset,
+        "ERROR": purple + _format + reset,
+        "CRITICAL": red + _format + reset,
     }
 
-    def format(self, record):
-        log_fmt = self.FORMATS.get(record.levelno)
-        formatter_ = logging.Formatter(log_fmt, datefmt="%H:%M:%S")
-        return formatter_.format(record)
+    def format(self, record: LogRecord) -> str:
+        """
+        Format the log record.
 
+        Parameters
+        ----------
+        record : LogRecord
+            Log record
+
+        Returns
+        -------
+        str
+            Formatted log record
+        """
+        log_fmt = self.COLORED_FORMATS.get(record.levelname)
+        formatter = Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
+        return formatter.format(record)
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-formatter = CustomFormatter()
-stream_handler = logging.StreamHandler()
-stream_handler.setFormatter(formatter)
-logger.addHandler(stream_handler)
+
+class CustomLogger:
+    """Logger class."""
+
+    def __init__(self, name: str, log_level: str = "ERROR"):
+        """
+        Initialize the logger.
+
+        Parameters
+        ----------
+        name : str
+            Name of the logger.
+        log_level : str, optional
+            Log level, by default "INFO"
+        """
+        if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
+            raise ValueError(f"Invalid log level: {log_level}")
+        self.log_level = log_level
+        self.name = name
+        self._logger: Logger
+        self._setup_logger()
+
+    def _setup_logger(self):
+        """Setup the logger."""
+        self._logger = Logger(self.name)
+        self._logger.setLevel(self.log_level)
+        formatter = CustomFormatter()
+
+        # log to stdout
+        handler = StreamHandler(sys.stdout)
+        handler.setLevel(self.log_level)
+        handler.setFormatter(formatter)
+        self._logger.addHandler(handler)
+
+    def get_logger(self) -> Logger:
+        """
+        Get the logger
+
+        Returns
+        -------
+        logging.Logger
+            Logger
+        """
+        return self._logger
 
 
 class BaseEnv(gym.Env):
     """Base class for all environments"""
 
-    metadata = {"render.modes": ["human"]}
+    metadata: Dict[str, Any] = {"render_modes": ["None"], "render_fps": 1}
 
     def __init__(
         self,
-        mapfile: str = "200x200x200_dense.npy",
+        render_mode: Optional[str] = None,
+        mapfile: str = "600x600.npy",
         view_size: int = 21,
+        image_size: int = 42,
         max_collisions: int = 0,
         max_steps: int = 60,
         show_path: bool = True,
         multi_output: bool = False,
         partial_reward: bool = True,
-        image_size: int = 42,
-        test_mode: bool = False,
         inference_mode: bool = False,
+        log_level: str = "ERROR",
     ):
         """
         Parameters
         ----------
+        render_mode : Optional[str], optional
+            render mode, by default None
         mapfile : str
             name of the map file in the maps folder
         view_size : int
             size of the view window for observation
+        image_size : int
+            size of the image to be returned as observation
         max_collisions : int
             maximum number of collisions allowed before episode ends
         max_steps : int
@@ -87,26 +145,33 @@ def __init__(
             whether to add additional outputs in the observation
         partial_reward : bool
             whether to give rewards for each step
-        image_size : int
-            size of the image to be returned as observation
-        test_mode : bool
-            whether to run in test mode, for evaluation during training
         inference_mode : bool
             whether to run in inference mode
+        log_level : str, optional
+            log level, by default "ERROR". One of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
         """
+        self.render_mode = "None" if render_mode is None else render_mode
+        if self.render_mode not in self.metadata["render_modes"]:
+            raise ValueError(f"Invalid render_mode: {self.render_mode}")
 
-        super().__init__()
         # current file path
         __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
         # load map
         self.grid_map = np.load(os.path.join(__location__, "maps", mapfile))
+        self.MAX_VAL_UINT = int(0.9 * np.iinfo(np.uint32).max)  # 0.9 to avoid overflow
+
+        # an arry to keep track of the start and target location
+        self.start_locations, self.target_locations = self._start_end_counts()
         self.world_size = self.grid_map.shape
 
-        if view_size < 10:
-            logger.warning("view_size should be at least 10, setting to 10")
-            self.view_size = 10
+        # initialize the logger
+        self.logger = CustomLogger(self.__class__.__name__, log_level=log_level).get_logger()
+
+        if view_size < 16:
+            self.logger.warning("view_size should be at least 16, setting to 16")
+            self.view_size = 16
         elif view_size > np.amin(self.world_size):
-            logger.warning(
+            self.logger.warning(
                 "view_size should be less than half of the world size, setting to %s",
                 int((np.amin(self.world_size) - 2) / 2),
             )
@@ -116,42 +181,73 @@ def __init__(
 
         view_ratio = image_size / view_size  # should be even
         if view_ratio % 2 != 0:
-            logger.warning("view_ratio should be even, setting to %s", (int(view_ratio) + 1))
+            self.logger.warning("view_ratio should be even, setting to %s", (int(view_ratio) + 1))
             self.image_size = int(view_ratio + 1) * view_size
         else:
             self.image_size = image_size
 
         # set the agent to middle of the world map
         self._new_world = np.ones((int(2 * self.view_size), int(2 * self.view_size)))
+        # place holder for target locations for the sliced view (_new_world)
+        self._new_target_locations = np.ones((int(2 * self.view_size), int(2 * self.view_size)), dtype=np.uint32)
         self._new_world_center: np.ndarray = np.array([self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32)
         self._next_new_world_center: np.ndarray = np.array(
             [self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32
         )
+        # placeholder for mapping function
+        self._mapping = lambda x, y: (x, y)
+        # create a list of locations where the agent can be placed
+        # 25 possible locations for agent
+        factor = 5
+        self.possible_start_locations = np.array(
+            list(
+                product(
+                    np.linspace(
+                        start=2 * self.view_size / factor,
+                        stop=2 * self.view_size,
+                        num=factor,
+                        endpoint=False,
+                        dtype=int,
+                    ),
+                    np.linspace(
+                        start=2 * self.view_size / factor,
+                        stop=2 * self.view_size,
+                        num=factor,
+                        endpoint=False,
+                        dtype=int,
+                    ),
+                )
+            )
+        )
+
         self._agent_location: np.ndarray = np.array([self.view_size, self.view_size], dtype=np.int32)
         self._target_location: np.ndarray = np.array([self.view_size, self.view_size], dtype=np.int32)
+        self.ini_distance_to_target = 0.0
         self.min_distance_to_target = 0.0
         self.cost_astar = 0.0
+        self.ini_cost_astar = 0.0
         self.min_cost_astar = 0.0
         self.astar_runs = 0
         self._path: List = []
+        self.ini_astarPath: List = []
         self.astarPath: List = []
+        self._astar_grid = Grid()
         self.action = [-1, -1]
         self.action_cost = 0.0
 
-        self.obs_world = np.zeros((3, self.image_size, self.image_size), dtype=np.uint8)
+        self.obs_world = np.zeros((3, self.image_size, self.image_size))
+        self.obs_world_astar = np.zeros((3, self.image_size, self.image_size), dtype=np.uint8)
         self.base_obs_world = np.zeros((3, self.image_size, self.image_size), dtype=np.float32)
 
         self.num_targets_reached = 0
         self.current_step = 0
         self.num_collisions = 0
-        self.test_mode = test_mode
         self.inference_mode = inference_mode
-        # set back the agent to middle of the world map after these many targets
-        self.soft_reset_freq = 1500
-        if self.test_mode:
-            self.soft_reset_freq = 50
+        # set back the agent to a new location than the previous
+        # target location after these many episodes
+        self.soft_reset_freq = 10
 
-        self.target_num = -1
+        self.target_num = 0
         self.max_collisions = max_collisions
         self.max_steps = max_steps
         self.show_path = show_path
@@ -161,14 +257,15 @@ def __init__(
         self.delta = list(product([-1, 0, 1], repeat=2))
         self.delta.remove((0, 0))
 
+        # random number generator for shuffling the possible target locations
+        seed_seq = np.random.SeedSequence()
+        self.random_gen: np.random.Generator = np.random.Generator(np.random.PCG64(seed_seq))
+
         # math constants
         self.SQRT2 = math.sqrt(2)
         self.EXP1 = math.exp(1)
         self.EXP1_1 = self.EXP1 - 1
 
-        self.seed()
-        self.action_space = None
-
         self.multi_output = multi_output
         if self.multi_output:
             self.observation_space = gym.spaces.Dict(
@@ -185,41 +282,78 @@ def __init__(
                 low=0, high=255, shape=(3, self.image_size, self.image_size), dtype=np.uint8
             )
 
-    def get_logger(self) -> logging.Logger:
+    def get_logger(self) -> Logger:
         """
         Returns the logger
 
         Returns
         -------
-        logger : logging.Logger
+        logger : Logger
             logger object
         """
-        return logger
+        return self.logger
 
-    def seed(self, seed: Optional[int] = None) -> List[Optional[int]]:
+    @staticmethod
+    def find_obstacle_neighbor_count(grid_map: np.ndarray) -> np.ndarray:
         """
-        Sets the seed for this env's random number generator(s).
+        Finds the number of neighboring obstacles for each cell in the grid map
 
         Parameters
         ----------
-        seed : int, optional
-            Seed for the random number generator(s), by default None
+        grid_map : np.ndarray
+            grid map with obstacles marked as 1s and free cells marked as 0s
+
+        Returns
+        -------
+        neighbors : np.ndarray
+            number of neighboring obstacles for each cell in the grid map
+        """
+        # add a border of 1s around the grid map
+        padded_grid_map = np.pad(grid_map, pad_width=1, mode="constant", constant_values=0)
+        # get the neighbors of all cells
+        neighbors = (
+            padded_grid_map[:-2, :-2]
+            + padded_grid_map[:-2, 1:-1]
+            + padded_grid_map[:-2, 2:]
+            + padded_grid_map[1:-1, :-2]
+            + padded_grid_map[1:-1, 2:]
+            + padded_grid_map[2:, :-2]
+            + padded_grid_map[2:, 1:-1]
+            + padded_grid_map[2:, 2:]
+        )
+        # return the count of neighboring obstacles
+        return neighbors
+
+    def _start_end_counts(self) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Create arrays to keep track of the start and end cell counts
 
         Returns
         -------
-            Returns the list of seed used in this env's random number generators
+        start_counts : np.ndarray
+            shape like self.grid_map with the count of start cells
+        end_counts : np.ndarray
+            shape like self.grid_map with the count of end cells
         """
-        self.np_random, seed = seeding.np_random(seed)
-        return [seed]
+        neighbors = self.find_obstacle_neighbor_count(self.grid_map)
 
-    def make_astar_matrix(self) -> None:
+        # find all cells with value 0 and without any neighbors with value > 0
+        possible_start_cells = (np.logical_and(self.grid_map == 0, neighbors == 0)).astype(np.uint32)
+        # change all cells with value 0 to max value of int as they are not possible start cells
+        possible_start_cells[possible_start_cells == 0] = self.MAX_VAL_UINT
+        # end cells are same as possible start cells
+        possible_end_cells = np.copy(possible_start_cells)
+        return possible_start_cells, possible_end_cells
+
+    def _make_astar_matrix(self) -> None:
         """
         Creates the astar matrix for the current world map and sets the astar grid
         """
-        _astar_matrix = np.abs(1.0 - self._new_world).astype(np.int32)
+        # set the astar matrix to 1 for all cells with value 0 in the world map and 0 otherwise
+        _astar_matrix = np.abs(1.0 - self._new_world)
         self._astar_grid = Grid(matrix=_astar_matrix.tolist())
 
-    def run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, int]:
+    def _run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, int]:
         """
         Runs the A* algorithm on the current world map and returns the path,
         path cost and number of nodes visited
@@ -247,21 +381,44 @@ def run_astar(self, target: np.ndarray) -> Tuple[List[Tuple[int, int]], float, i
             finder = AStarFinder(diagonal_movement=DiagonalMovement.always)
         else:
             finder = AStarFinder(diagonal_movement=DiagonalMovement.only_when_no_obstacle)
-        path, runs = finder.find_path(start, end, self._astar_grid)
-        if len(path) > 0:
-            path_cost = end.g
-        else:
-            path_cost = np.inf
+        path_w_node, runs = finder.find_path(start, end, self._astar_grid)
+        path_cost = np.inf
+        path = []
+        if len(path_w_node) > 0:
+            for node in path_w_node:
+                is_gn = isinstance(node, GridNode)
+                x, y = (node.x, node.y) if is_gn else node[:2]
+                path.append((x, y))
+
+            path_cost = 0.0
+            for dx in np.array(path[1:]) - np.array(path[:-1]):
+                path_cost += np.sqrt(np.sum(dx**2))
+
         return path, path_cost, runs
 
-    def slice_grid_map(self) -> None:
-        """
-        Slices the grid map into a 2D numpy array
-        of size (2*view_size, 2*view_size)
+    def _slice_grid_map(self) -> Tuple[Callable, Union[np.ndarray, None]]:
         """
+        Slices the grid map into a 2D numpy array of size (2*view_size, 2*view_size)
+        Generate a mapping from the sliced grid map to the original grid map
 
+        Returns
+        -------
+        mapping : Callable(int, int)
+            mapping from the sliced grid map to the original grid map
+        potential_start_location : Union[np.ndarray, None]
+            potential start location for the agent
+        """
         # slice the grid map into a 2D numpy array
         self._new_world = np.ones((int(2 * self.view_size), int(2 * self.view_size)))
+
+        # new taget locations is of shape (2*view_size, 2*view_size) with all values
+        # as self.MAX_VAL_UINT and dtype as np.uint32
+        self._new_target_locations = np.full(
+            shape=(int(2 * self.view_size), int(2 * self.view_size)),
+            fill_value=self.MAX_VAL_UINT,
+            dtype=np.uint32,
+        )
+
         y_min = int(max(self._new_world_center[0] - self.view_size, 0))
         y_max = int(min(self._new_world_center[0] + self.view_size, self.world_size[0]))
         x_min = int(max(self._new_world_center[1] - self.view_size, 0))
@@ -273,13 +430,46 @@ def slice_grid_map(self) -> None:
         x_max_new = int(self.view_size + x_max - self._new_world_center[1])
 
         self._new_world[y_min_new:y_max_new, x_min_new:x_max_new] = self.grid_map[y_min:y_max, x_min:x_max]
+        self._new_target_locations[y_min_new:y_max_new, x_min_new:x_max_new] = self.target_locations[
+            y_min:y_max, x_min:x_max
+        ]
 
         # set the edges to 1
         self._new_world[0, :-1] = self._new_world[:-1, -1] = self._new_world[-1, 1:] = self._new_world[1:, 0] = 1
-        # create the new astar grid
-        self.make_astar_matrix()
 
-    def find_target(self) -> np.ndarray:
+        # set the edges to self.MAX_VAL_UINT in the new target locations
+        self._new_target_locations[0, :-1] = self._new_target_locations[:-1, -1] = self._new_target_locations[
+            -1, 1:
+        ] = self._new_target_locations[1:, 0] = self.MAX_VAL_UINT
+        # create the new astar grid
+        self._make_astar_matrix()
+
+        # define the mapping from the sliced grid map to the original grid map
+        def _mapping(x: int, y: int) -> Tuple[int, int]:
+            return x + x_min - x_min_new, y + y_min - y_min_new
+
+        # find the start location in the sliced grid map
+        # shuffle the possible_start_locations
+        self.np_random.shuffle(self.possible_start_locations)
+        # iterate over possible_start_locations
+        potential_start_location = None
+        for start_location in self.possible_start_locations:
+            # check if the start location along with spacing = 1 is free of obstacles
+            spacing = 1
+            if np.all(
+                self._new_world[
+                    start_location[1] - spacing : start_location[1] + (spacing + 1),
+                    start_location[0] - spacing : start_location[0] + (spacing + 1),
+                ]
+                == 0
+            ):
+                # if free of obstacles, set the agent location to the start location
+                potential_start_location = start_location
+                break
+
+        return _mapping, potential_start_location
+
+    def _find_target(self) -> np.ndarray:
         """
         Finds a target location for the agent to move to
 
@@ -287,72 +477,62 @@ def find_target(self) -> np.ndarray:
         -------
         target_location : np.ndarray
             target location
+
+        Raises
+        ------
+        RuntimeError
+            If a target location cannot be found
         """
 
         # 10% of the time find easy target (close to agent)
-        easy_target = True if (self.np_random.rand() < 0.10) else False
-        colliding = True
+        easy_target = self.np_random.random() < 0.10
+        # if not easy path, we don't want to sample within 8 cells of the agent
+        # increase the count of cells in self._new_target_locations to self.MAX_VAL_UINT
+        # for cells within 8 cells of the agent else 2 to have nothing close to the agent
+        spread = 2 if easy_target else 8
         count = 0
-        while colliding:
-            nearby_clear = True
-            if not easy_target:
-                straight_path = True
-            target_location = self._agent_location
-            while np.array_equal(target_location, self._agent_location):
+        while True:
+            self._new_target_locations[
+                self._agent_location[0] - spread : self._agent_location[0] + (spread + 1),
+                self._agent_location[1] - spread : self._agent_location[1] + (spread + 1),
+            ] = self.MAX_VAL_UINT
+            # get a list of all the target locations with the count less than self.MAX_VAL_UINT
+            possible_target_locations = np.argwhere(self._new_target_locations < self.MAX_VAL_UINT)[:, ::-1]
+            # shuffle the possible_target_locations
+            self.random_gen.shuffle(possible_target_locations)
+            # iterate while possible_target_locations
+            for target_location in possible_target_locations:
                 if not easy_target:
-                    random_r = self.np_random.randint(6, int(self.SQRT2 * self.view_size - 1))
-                else:
-                    random_r = self.np_random.randint(2, int((self.SQRT2 * self.view_size - 1) / 2))
-                random_theta = self.np_random.uniform(-np.pi, np.pi)
-                random_location = np.array([random_r * np.cos(random_theta), random_r * np.sin(random_theta)])
-                random_location = np.round(random_location).astype(np.int32)
-                target_location = random_location + self._agent_location
-                if np.amin(target_location) < 0 or np.amax(target_location) >= int(2 * self.view_size):
-                    target_location = self._agent_location
-            if self._new_world[target_location[1], target_location[0]] == 0:
-                spacing = 1
-                # check immediate neighbors for obstacles
-                y_min = int(max(target_location[1] - spacing, 0))
-                y_max = int(min(target_location[1] + spacing + 1, 2 * self.view_size))
-                x_min = int(max(target_location[0] - spacing, 0))
-                x_max = int(min(target_location[0] + spacing + 1, 2 * self.view_size))
-                if np.count_nonzero(self._new_world[y_min:y_max, x_min:x_max] == 1.0) > 0:
-                    nearby_clear = False
-
-                if nearby_clear:
-                    if not easy_target:
-                        # check if its a straight path
-                        rr, cc = draw.line(
-                            self._agent_location[0],
-                            self._agent_location[1],
-                            target_location[0],
-                            target_location[1],
-                        )
-                        straight_line = list(zip(rr, cc))
-                        for pt in straight_line:
-                            if self._new_world[pt[1], pt[0]] == 1:
-                                straight_path = False
-                                break
-
-                        if not straight_path:
-                            # Astar search to find the shortest path to the target
-                            self.astarPath, self.cost_astar, self.astar_runs = self.run_astar(target_location)
-                            if len(self.astarPath) > 0 and self.astar_runs > 60:
-                                colliding = False
-                    else:
-                        # Astar search to find the shortest path to the target
-                        self.astarPath, self.cost_astar, self.astar_runs = self.run_astar(target_location)
-                        if len(self.astarPath) > 0:
-                            colliding = False
+                    # check if its a straight path
+                    rr, cc = draw.line(
+                        self._agent_location[0],
+                        self._agent_location[1],
+                        target_location[0],
+                        target_location[1],
+                    )
+                    straight_pixels = self._new_world[cc, rr]
+                    straight_path = not np.any(straight_pixels == 1)
+
+                    if straight_path:
+                        continue
+
+                # Astar search to find the shortest path to the target
+                self.astarPath, self.cost_astar, self.astar_runs = self._run_astar(target_location)
+                if len(self.astarPath) > 0:
+                    if easy_target:
+                        return target_location
+                    if self.astar_runs > 60:
+                        return target_location
+
+            self.logger.info("   ---Target not set, soft reset---")
+            # increase the count of agent location in the self.state_counts to self.MAX_VAL_UINT
+            # so that the agent location is not sampled again
+            mapped_start = self._mapping(self._agent_location[0], self._agent_location[1])
+            self.start_locations[mapped_start[::-1]] = self.MAX_VAL_UINT
+            self._soft_reset()
             count += 1
-            if count > 9999:
-                logger.info(
-                    "   ---Target not set in %s tries! Setting agent back to reset conditions!",
-                    count,
-                )
-                count = 0
-                self.soft_reset()
-        return target_location
+            if count > 100000:
+                raise RuntimeError("Cannot find a target location")
 
     def _get_info(self) -> Dict:
         """
@@ -376,6 +556,7 @@ def _get_info(self) -> Dict:
             "current step": self.current_step,
             "no. of  collisions": self.num_collisions,
             "grid map": self._new_world,
+            "obs with astar path": self.obs_world_astar,
         }
 
     # base observation is the world map which remains constant throughout the episode
@@ -406,6 +587,12 @@ def _get_obs(self) -> Union[np.ndarray, OrderedDict]:
         self.obs_world[:, self._agent_location[1], self._agent_location[0]] = 0.3 * 255.0
         self.obs_world[0, self._agent_location[1], self._agent_location[0]] = 1.0 * 255.0
 
+        # mark the astar path in the self.obs_world_astar
+        self.obs_world_astar = np.copy(self.obs_world)
+        for pt in self.astarPath:
+            self.obs_world_astar[:, pt[1], pt[0]] = 0.6 * 255.0
+        self.obs_world_astar = self.obs_world_astar.astype(dtype=np.uint8)
+
         if self.show_path and (len(self._path) != 0):
             for pt in self._path:
                 if self.obs_world[2, pt[1], pt[0]] == 0:
@@ -418,69 +605,124 @@ def _get_obs(self) -> Union[np.ndarray, OrderedDict]:
             dsize=(self.image_size, self.image_size),
             interpolation=cv2.INTER_NEAREST,
         )
-        self.obs_world = np.moveaxis(self.obs_world, -1, 0)
-        if not self.multi_output:
-            return self.obs_world.astype(dtype=np.uint8)
+        self.obs_world = np.moveaxis(self.obs_world, -1, 0).astype(dtype=np.uint8)
 
+        if not self.multi_output:
+            return self.obs_world
         return OrderedDict(
             {
                 # normalize delta
                 "delta": ((self._target_location - self._agent_location) / (2 * self.view_size - 1)).astype(np.float32),
-                "world": self.obs_world.astype(dtype=np.uint8),
+                "world": self.obs_world,
             }
         )
 
-    # set the agent's location at the center of the map
-    def soft_reset(self) -> None:
+    def _get_new_index_from_counts(self, counts_mat: np.ndarray, alpha_p: float = 1.0) -> Tuple[int, int]:
+        """
+        Returns a new index sampled from the counts matrix
+
+        Parameters
+        ----------
+        counts_mat : np.ndarray
+            counts matrix from which is used to sample the new index
+        alpha_p : float
+            parameter to control the sampling probability
+
+        Returns
+        -------
+        sampled_index : Tuple[int, int]
+            sampled index from the counts matrix in the form (y, x)
+        """
+        flattened_counts = counts_mat.flatten()
+        # higher the count, lower the probability of sampling that cell
+        probabilities = np.exp(-alpha_p * flattened_counts)
+        probabilities /= np.sum(probabilities)
+
+        # sample a cell based on the probabilities
+        sampled_index = self.np_random.choice(np.arange(len(flattened_counts)), p=probabilities)
+        # convert the sampled index to 2D index
+        sampled_index = np.unravel_index(sampled_index, counts_mat.shape)
+        return sampled_index  # (y, x)
+
+    # set the world center based on sampling from current counts
+    # tries to set the world center to cells with lower counts
+    def _soft_reset(self) -> None:
         """Moves the agent to the center of the map and resets the target"""
-        self._new_world_center = np.array([self.world_size[1] / 2, self.world_size[0] / 2], dtype=np.int32)
-        self.slice_grid_map()
-        self._agent_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+        sampled_index = self._get_new_index_from_counts(self.start_locations)
+        self._new_world_center = np.array([sampled_index[0], sampled_index[1]], dtype=np.int32)
+        self._mapping, potential_start_location = self._slice_grid_map()
+        if potential_start_location is None:
+            potential_start_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+        self._agent_location = potential_start_location
         self.target_num = 0
 
     def reset(
-        self, return_info: bool = False
-    ) -> Union[Tuple[Union[np.ndarray, OrderedDict], Dict], np.ndarray, OrderedDict]:
+        self,
+        *,
+        seed: Union[int, None] = None,
+        options: Union[Dict, None] = None,
+    ) -> Tuple[Union[np.ndarray, OrderedDict], Dict]:
         """
-        Resets the environment to the initial state and returns the initial observation
+        Resets the environment to the initial state and returns the initial observation and info
 
         Parameters
         ----------
-        return_info : bool, optional
-            whether to return the info dictionary, by default False
+        seed : Union[int, None]
+            seed to use for the environment
+        options : Union[Dict, None]
+            options to use for the environment
 
         Returns
         -------
         obs : np.ndarray or OrderedDict
             observation from manystep environment
-        info : Dict (optional)
-            info dictionary (optional) of the last step in the stack
+        info : Dict
+            info dictionary of the last step in the stack
         """
+
+        # Initialize the RNG if the seed is manually passed
+        super().reset(seed=seed)
+
+        # seed the other random number generator
+        if seed is not None:
+            self.random_gen, _ = seeding.np_random(seed)
+
         self.target_num += 1
         if (self.target_num % self.soft_reset_freq) == 0:
-            self.soft_reset()
-            if self.test_mode:
-                self.seed(1327455)
+            self._soft_reset()
         else:
             self._new_world_center = self._next_new_world_center
 
         self.current_step = 0
         self.num_collisions = 0
-        self.slice_grid_map()
-        self._agent_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+        self._mapping, potential_start_location = self._slice_grid_map()
+        if potential_start_location is None:
+            potential_start_location = np.array([self.view_size, self.view_size], dtype=np.int32)
+        self._agent_location = potential_start_location
         # We will sample the target's location randomly until it does
         # not coincide with the agent's location
-        self._target_location = self.find_target()
-        self._next_new_world_center = self._new_world_center + (self._target_location - self._agent_location)[::-1]
-        self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
-        self.min_cost_astar = self.cost_astar
+        self._target_location = self._find_target()
+
+        # increase the count of the start and target locations
+        mapped_start = self._mapping(self._agent_location[0], self._agent_location[1])
+        self.start_locations[mapped_start[::-1]] += 1
+        mapped_target = self._mapping(self._target_location[0], self._target_location[1])
+        self.target_locations[mapped_target[::-1]] += 1
+
+        # set the next new world center at the mapped target location
+        self._next_new_world_center = mapped_target[::-1]
+        self.ini_distance_to_target = self.min_distance_to_target = float(
+            np.linalg.norm(self._target_location - self._agent_location)
+        )
+        self.ini_astarPath = self.astarPath
+        self.ini_cost_astar = self.min_cost_astar = self.cost_astar
         self._path = []
         self.action = [-1, -1]
         self.action_cost = 0.0
 
         self._create_base_obs()
         observation = self._get_obs()
-        return (observation, self._get_info()) if return_info else observation
+        return (observation, self._get_info())
 
     def _compute_reward(self, completion_reward: bool = False):
         """
@@ -490,7 +732,7 @@ def _compute_reward(self, completion_reward: bool = False):
 
     def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
         """
-        Takes the action and returns the new agent lo
+        Takes the action and returns the new agent location
         """
         raise NotImplementedError
 
@@ -500,9 +742,9 @@ def close(self) -> None:
         """
         plt.close("all")
 
-    def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], float, bool, Dict]:
+    def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], float, bool, bool, Dict]:
         """
-        Takes a step in the environment and returns the observation, reward, done and info
+        Takes a step in the environment and returns the observation, reward, terminated, truncated and info
 
         Parameters
         ----------
@@ -511,10 +753,19 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
 
         Returns
         -------
-        Observation, Reward, Done, Info : Tuple[Union[np.ndarray, OrderedDict], float, bool, Dict]
-            the observation, reward, done and info
+        observation : np.ndarray or OrderedDict
+            observation
+        reward : float
+            reward
+        terminated : bool
+            whether the episode terminated
+        truncated : bool
+            whether the episode was truncated
+        info : Dict
+            info dictionary
         """
-        done = False
+        terminated = False
+        truncated = False
         valid_action_path, collision = self._take_action(action)
         self.current_step += 1
 
@@ -526,13 +777,13 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
         if collision:
             self.num_collisions += 1
             reward = -1.0
-        else:  # only do it if the agent moves
+        elif len(valid_action_path) > 1:  # only do it if the agent moves
             self._path = valid_action_path[:-1]  # remove the agent location
             if np.array_equal(self._target_location, self._agent_location):
-                done = True
+                terminated = True
                 reward += self._compute_reward(completion_reward=True)
                 self.num_targets_reached += 1
-                logger.info(
+                self.logger.info(
                     "%s Target reached in %s steps, Collisions : %s",
                     self.ordinal(self.num_targets_reached),
                     self.current_step,
@@ -540,43 +791,49 @@ def step(self, action: np.ndarray) -> Tuple[Union[np.ndarray, OrderedDict], floa
                 )
             else:
                 reward += self._compute_reward(completion_reward=False)
-                self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
-                self.min_cost_astar = self.cost_astar
+        else:
+            self.logger.warning("No movement caused by action: %s!", action)
+
+        self.min_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
+        self.min_cost_astar = self.cost_astar
 
         if self.num_collisions > self.max_collisions:
-            done = True
+            terminated = True
             if not self.partial_reward:
                 reward += -self.cost_astar / 100
-            logger.info(" --Max Collisions! Collisions: %s/%s", self.num_collisions, self.max_collisions + 1)
-        elif (not done) and (self.current_step > self.max_steps):
-            done = True
+            self.logger.info(
+                " --Max Collisions! Collisions: %s/%s",
+                self.num_collisions,
+                self.max_collisions + 1,
+            )
+        elif (not terminated) and (self.current_step > self.max_steps):
+            terminated = True
             if not self.partial_reward:
                 reward += -self.cost_astar / 100
-            logger.info(
-                " --Max Steps: %s/%s!",
+            self.logger.info(
+                " --Max Steps! Steps: %s/%s",
                 self.current_step,
                 self.max_steps + 1,
             )
 
         observation = self._get_obs()
         info = self._get_info()
-        return observation, reward, done, info
+        return observation, reward, terminated, truncated, info
 
-    def render(self, mode="human") -> None:
+    def render(self) -> None:
         """
         Renders the environment
 
-        Parameters
-        ----------
-        mode : str, optional
-            the mode to render in, by default "human"
-
         Returns
         -------
         None
         """
-        plt.title("Voxelgym")
-        plt.imshow(np.moveaxis(self.obs_world.astype(np.uint8), 0, 2))
+        plt.subplot(1, 2, 1)
+        plt.title("Agent View")
+        plt.imshow(np.moveaxis(self.obs_world, 0, 2))
+        plt.subplot(1, 2, 2)
+        plt.title("Current Astar Path")
+        plt.imshow(np.moveaxis(self.obs_world_astar, 0, 2))
 
         palette = [[77, 77, 255], [77, 255, 77], [255, 77, 77], [144, 144, 144]]
         classes = ["obstacles", "target", "agent", "last visited"]
diff --git a/voxelgym2D/envs/env_one_step.py b/voxelgym2D/envs/env_one_step.py
index 0c95189..98c1042 100644
--- a/voxelgym2D/envs/env_one_step.py
+++ b/voxelgym2D/envs/env_one_step.py
@@ -1,37 +1,42 @@
-"""Environment corresponding to Onestep action space"""
+"""Voxel environment corresponding to Onestep action space"""
 
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as np
 
 from .base_env import BaseEnv
 
 
 class VoxelGymOneStep(BaseEnv):
-    """Environment corresponding to Onestep action space"""
+    """Voxel environment corresponding to Onestep action space"""
 
     def __init__(
         self,
-        mapfile: str = "200x200x200_dense.npy",
+        render_mode: Optional[str] = None,
+        mapfile: str = "600x600.npy",
         view_size: int = 21,
+        image_size: int = 42,
         max_collisions: int = 0,
         max_steps: int = 60,
         show_path: bool = True,
         multi_output: bool = False,
         partial_reward: bool = True,
-        image_size: int = 42,
-        test_mode: bool = False,
         inference_mode: bool = False,
         discrete_actions: bool = True,
+        log_level: str = "ERROR",
     ):
         """
         Parameters
         ----------
+        render_mode : Optional[str], optional
+            render mode, by default None
         mapfile : str
             name of the map file in the maps folder
         view_size : int
             size of the view window for observation
+        image_size : int
+            size of the image to be returned as observation
         max_collisions : int
             maximum number of collisions allowed before episode ends
         max_steps : int
@@ -42,34 +47,34 @@ def __init__(
             whether to add additional outputs in the observation
         partial_reward : bool
             whether to give rewards for each step
-        image_size : int
-            size of the image to be returned as observation
-        test_mode : bool
-            whether to run in test mode, for evaluation during training
         inference_mode : bool
             whether to run in inference mode
         discrete_actions : bool
             whether to use discrete actions
+        log_level : str, optional
+            log level, by default "ERROR". One of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
         """
 
         super().__init__(
+            render_mode,
             mapfile,
             view_size,
+            image_size,
             max_collisions,
             max_steps,
             show_path,
             multi_output,
             partial_reward,
-            image_size,
-            test_mode,
             inference_mode,
+            log_level,
         )
 
         self.discrete_actions = discrete_actions
         if self.discrete_actions:
             self.action_space = gym.spaces.Discrete(len(self.delta))
         else:
-            self.action_space = gym.spaces.Box(-1, 1, shape=(len(self.delta),))
+            # self.action_space = gym.spaces.Box(-1, 1, shape=(len(self.delta)))
+            self.action_space = gym.spaces.Box(-1, 1, shape=[1])
 
     def _compute_reward(self, completion_reward: bool = False) -> float:
         """
@@ -91,30 +96,49 @@ def _compute_reward(self, completion_reward: bool = False) -> float:
 
         if completion_reward:
             reward_completion = 0.5
+            return reward_completion
 
-        else:
-            reward_completion = -self.action_cost
-
-            _new_astarPath, _cost_astar, self.astar_runs = self.run_astar(self._target_location)
-            if len(_new_astarPath) > 0:
-                self.astarPath = _new_astarPath
-                self.cost_astar = _cost_astar
+        reward_completion = -self.action_cost
 
-                reward_euc_astar = self.min_cost_astar - _cost_astar
-            else:
-                # fallback to euclidean distance if astar fails
-                reward_euc_astar = self.min_distance_to_target - float(
-                    np.linalg.norm(self._target_location - self._agent_location)
-                )
+        _new_astarPath, _cost_astar, self.astar_runs = self._run_astar(self._target_location)
+        if len(_new_astarPath) > 0:
+            self.astarPath = _new_astarPath
+            self.cost_astar = _cost_astar
 
-        if completion_reward:
-            return reward_completion
-        if (self.cost_astar < self.min_cost_astar) and self.partial_reward:
-            return round((2 * reward_euc_astar + reward_completion) / 10, 4)
-        if (self.cost_astar > self.min_cost_astar) and self.partial_reward:
+            reward_euc_astar = self.min_cost_astar - _cost_astar
+            improved = self.min_cost_astar > _cost_astar
+        else:
+            # fallback to euclidean distance if astar fails
+            current_distance_to_target = float(np.linalg.norm(self._target_location - self._agent_location))
+            reward_euc_astar = self.min_distance_to_target - current_distance_to_target
+            improved = self.min_distance_to_target > current_distance_to_target
+
+        if self.partial_reward:
+            if improved:
+                return round((2 * reward_euc_astar + reward_completion) / 10, 4)
             return round((reward_euc_astar + reward_completion) / 10, 4)
+
         return round(reward_completion / 10, 4)
 
+    @staticmethod
+    def action_to_bins(action: np.ndarray) -> int:
+        """
+        Converts the action to bins of size 1/4 and returns the bin number in the range [0, 7] for actions
+
+        Parameters
+        ----------
+        action : np.ndarray
+            action to be converted to bin number
+
+        Returns
+        -------
+        bin : int
+            bin number in the range [0, 7]
+        """
+        # Clip action to [-1, 1]
+        clipped = np.clip(action, -1, 1)
+        return min(int((clipped + 1) * 4), 7)
+
     def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
         """
         Takes the action and updates the agent location
@@ -132,11 +156,12 @@ def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
             True if the agent collides with an obstacle, else False
         """
         if not self.discrete_actions:
-            self.action = list(self.delta[np.argmax(action)])
+            # convert angle to bin number
+            action_idx = self.action_to_bins(action)
+            self.action = list(self.delta[action_idx])
         else:
             self.action = list(self.delta[int(action)])
 
-        self.action_cost = float(np.linalg.norm(self.action))
         action_location = self._agent_location + np.array(self.action, dtype=np.int32)
         action_path = [action_location.tolist()]
 
@@ -154,4 +179,10 @@ def _take_action(self, action: np.ndarray) -> Tuple[List, bool]:
         # complete path by including initial location
         valid_action_path.insert(0, self._agent_location.tolist())
 
+        # compute action cost
+        self.action_cost = 0.0
+        for _, dx in enumerate(np.array(valid_action_path[1:]) - np.array(valid_action_path[:-1])):
+            self.action_cost += np.sqrt(np.sum(dx**2))
+        self.action_cost = round(self.action_cost, 4)
+
         return valid_action_path, collision
diff --git a/voxelgym2D/envs/maps/100x100x100_dense.npy b/voxelgym2D/envs/maps/100x100x100_dense.npy
deleted file mode 100644
index 1962b83..0000000
Binary files a/voxelgym2D/envs/maps/100x100x100_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/200x200x200_dense.npy b/voxelgym2D/envs/maps/200x200x200_dense.npy
deleted file mode 100644
index 6101fe7..0000000
Binary files a/voxelgym2D/envs/maps/200x200x200_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/50x50x50_dense.npy b/voxelgym2D/envs/maps/50x50x50_dense.npy
deleted file mode 100644
index 50cdb81..0000000
Binary files a/voxelgym2D/envs/maps/50x50x50_dense.npy and /dev/null differ
diff --git a/voxelgym2D/envs/maps/600x600.npy b/voxelgym2D/envs/maps/600x600.npy
new file mode 100644
index 0000000..97f6eb2
Binary files /dev/null and b/voxelgym2D/envs/maps/600x600.npy differ
diff --git a/voxelgym2D/version.txt b/voxelgym2D/version.txt
new file mode 100644
index 0000000..9325c3c
--- /dev/null
+++ b/voxelgym2D/version.txt
@@ -0,0 +1 @@
+0.3.0
\ No newline at end of file