diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100644 index 5dce83a..0000000 --- a/.codecov.yml +++ /dev/null @@ -1,14 +0,0 @@ -coverage: - status: - project: - default: - # Commits pushed to main should not make the overall - # project coverage decrease by more than 1% - target: auto - threshold: 1% - patch: - default: - # Be tolerant on slight code coverage diff on PRs to limit - # noisy red coverage status on github PRs. - target: auto - threshold: 1% diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 495f712..0000000 --- a/.coveragerc +++ /dev/null @@ -1,23 +0,0 @@ -[run] -branch = True -omit = */tests/* - -[report] -exclude_lines = - # Have to re-enable the standard pragma - pragma: no cover - - # Don't complain about missing debug-only code: - def __repr__ - if self\.debug: - if debug: - if DEBUG: - - # Don't complain if tests don't hit defensive assertion code: - raise AssertionError - raise NotImplementedError - - # Don't complain if non-runnable code isn't run: - if 0: - if False: - if __name__ == .__main__.: diff --git a/.github/workflows/test-release-candidate.yaml b/.github/workflows/test-release-candidate.yaml deleted file mode 100644 index a7a1217..0000000 --- a/.github/workflows/test-release-candidate.yaml +++ /dev/null @@ -1,145 +0,0 @@ -# Tests for releases and release candidates -# -# Runs on every tag creation, and all pushes and PRs to release branches -# named "v1.2.x", etc. -# -# This workflow is more extensive than the regular test workflow. -# - Tests are executed on more Python versions -# - Tests are run on more operating systems -# - N.B. There is no pip cache here to ensure runs are always against the -# very latest versions of dependencies, even if this workflow ran recently. -# -# In addition, the package is built as a wheel on each OS/Python job, and these -# are stored as artifacts to use for your distribution process. There is an -# extra job (disabled by default) which can be enabled to push to Test PyPI. - -name: release candidate tests - -on: - push: - branches: - # Release branches. - # Examples: "v1", "v3.0", "v1.2.x", "1.5.0", "1.2rc0" - # Expected usage is (for example) a branch named "v1.2.x" which contains - # the latest release in the 1.2 series. - - 'v[0-9]+' - - 'v?[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9x]+rc[0-9]*' - tags: - # Run whenever any tag is created - - '**' - pull_request: - branches: - # Release branches - - 'v[0-9]+' - - 'v?[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' - - 'v?[0-9]+.[0-9x]+rc[0-9]*' - release: - # Run on a new release - types: [created, edited, published] - -jobs: - test-build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - include: - - os: macos-latest - python-version: "3.12" - - os: macos-latest - python-version: "3.12" - env: - OS: ${{ matrix.os }} - PYTHON: ${{ matrix.python-version }} - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: System information - run: python .github/workflows/system_info.py - - - name: Install dependencies - run: | - python -m pip install --upgrade pip wheel - python -m pip install flake8 - python -m pip install --editable .[test] - - - name: Sanity check with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings - python -m flake8 . --count --exit-zero --statistics - - - name: Debug environment - run: python -m pip freeze - - - name: Test with pytest - run: | - python -m pytest --cov=package_name --cov-report term --cov-report xml --cov-config .coveragerc --junitxml=testresults.xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - flags: unittests - env_vars: OS,PYTHON - name: Python ${{ matrix.python-version }} on ${{ runner.os }} - - - name: Build wheels - run: | - python -m pip install --upgrade setuptools wheel twine - python setup.py sdist bdist_wheel - - - name: Store wheel artifacts - uses: actions/upload-artifact@v4 - with: - name: wheel-${{ matrix.os }}-${{ matrix.python-version }} - path: dist/* - - - name: Build HTML docs - run: | - python -m pip install --editable .[docs] - cd docs - make html - cd .. - - publish: - # Disabled by default - if: | - false && - startsWith(github.ref, 'refs/tags/') - needs: test-build - - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Download wheel artifacts - uses: actions/download-artifact@v4 - with: - name: wheel-* - path: dist/ - - - name: Store aggregated wheel artifacts - uses: actions/upload-artifact@v4 - with: - name: wheels - path: dist/* - - - name: Publish package to Test PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.TEST_PYPI_API_TOKEN }} - repository_url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 4b4f03d..0000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,78 +0,0 @@ -# Regular tests -# -# Use this to ensure your tests are passing on every push and PR (skipped on -# pushes which only affect documentation). -# There is also a cron job set to run weekly on the default branch, to check -# against dependency chain rot. -# -# You should make sure you run jobs on at least the *oldest* and the *newest* -# versions of python that your codebase is intended to support. - -name: tests - -on: - push: - pull_request: - schedule: - - cron: "0 0 * * 1" - branches: [ $default-branch ] - -jobs: - test: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.12"] - include: - - os: windows-latest - python-version: "3.12" - env: - OS: ${{ matrix.os }} - PYTHON: ${{ matrix.python-version }} - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: pip - - - name: System information - run: python .github/workflows/system_info.py - - - name: Install dependencies - run: | - python -m pip install --upgrade pip wheel - python -m pip install flake8 - python -m pip install --editable .[test] - - - name: Sanity check with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings - python -m flake8 . --count --exit-zero --statistics - - - name: Debug environment - run: python -m pip freeze - - - name: Test with pytest - run: | - python -m pytest --cov=package_name --cov-report term --cov-report xml --cov-config .coveragerc --junitxml=testresults.xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - flags: unittests - env_vars: OS,PYTHON - name: Python ${{ matrix.python-version }} on ${{ runner.os }} - - - name: Build HTML docs - run: | - python -m pip install --editable .[docs] - cd docs - make html - cd .. diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b46b365..5746db2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Changelog ========= -All notable changes to package_name will be documented here. +All notable changes to bioscan_dataset will be documented here. The format is based on `Keep a Changelog`_, and this project adheres to `Semantic Versioning`_. @@ -11,7 +11,7 @@ The format is based on `Keep a Changelog`_, and this project adheres to `Semanti Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security. -Version `0.1.0 `__ +Version `0.1.0 `__ -------------------------------------------------------------------------------- Release date: YYYY-MM-DD. diff --git a/LICENSE b/LICENSE index 68a49da..78f38e9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,24 +1,22 @@ -This is free and unencumbered software released into the public domain. +MIT License -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. +Copyright (c) 2024, Scott C. Lowe -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. -For more information, please refer to diff --git a/README.rst b/README.rst index 9032e95..291f80d 100644 --- a/README.rst +++ b/README.rst @@ -1,530 +1,10 @@ -|GHA tests| |Codecov report| |pre-commit| |black| +|pre-commit| |black| -Python Template Repository -========================== +BIOSCAN Datasets +================ -This repository gives a fully-featured template or skeleton for new Python repositories. +PyTorch torchvision-style datasets for the BIOSCAN-1M and BIOSCAN-5M datasets. - -Quick start ------------ - -.. highlight:: bash - -When creating a new repository from this template, these are the steps to follow: - -#. *Don't click the fork button.* - The fork button is for making a new template based in this one, not for using the template to make a new repository. - -#. Creating repository. - - #. **New GitHub repository**. - - You can create a new repository on GitHub from this template by clicking the `Use this template `_ button. - - *Need to support Python 2.7?* - Make sure to check the "Include all branches" option while creating the new repository. - - Then clone your new repository to your local system [pseudocode]:: - - git clone git@github.com:your-org/your-repo.git - cd your-repo - - *If you need to support Python 2.7*, now move the reference for your default branch (master/main) to point to the python2.7 branch head:: - - git reset --hard origin/python2.7 - git push -f - - You can now delete the python2.7 branch from your remote. - - #. **New repository not on GitHub**. - - Alternatively, if your new repository is not going to be on GitHub, you can download `this repo as a zip `_ and work from there. - - *Need to support Python 2.7?* - Download the `python2.7 branch as a zip `_ instead. - - Either way, you should note that this zip does not include the .gitignore and .gitattributes files (because GitHub automatically omits them, which is usually helpful but is not for our purposes). - Thus you will also need to download the `.gitignore `__ and `.gitattributes `__ files. - - The following shell commands can be used for this purpose on \*nix systems:: - - git init your_repo_name - cd your_repo_name - wget https://github.com/scottclowe/python-template-repo/archive/master.zip - unzip master.zip - mv -n python-template-repo-master/* python-template-repo-master/.[!.]* . - rm -r python-template-repo-master/ - rm master.zip - wget https://raw.githubusercontent.com/scottclowe/python-template-repo/master/.gitignore - wget https://raw.githubusercontent.com/scottclowe/python-template-repo/master/.gitattributes - git add . - git commit -m "Initial commit" - git rm LICENSE - - Note that we are doing the move with ``mv -n``, which will prevent the template repository from clobbering your own files (in case you already made a README.rst file, for instance). - - You'll need to instruct your new local repository to synchronise with the remote ``your_repo_url``:: - - git remote set-url origin your_repo_url - git push -u origin master - -#. Remove the dummy files ``package_name/module.py`` and ``package_name/tests/test_module.py``:: - - rm package_name/module.py - rm package_name/tests/test_module.py - - If you prefer, you can keep them around as samples, but should note that they require numpy. - -#. Depending on your needs, some of the files may be superfluous to you. - You can remove any superfluous files, as follows. - - - *No GitHub Actions!* - Delete the .github directory:: - - rm -r .github/ - - - *No unit testing!* - Run the following commands to delete unit testing files:: - - rm -rf package_name/tests/ - rm -f .github/workflows/test*.yaml - rm -f .codecov.yml - rm -f .coveragerc - rm -f requirements-test.txt - -#. Delete the LICENSE file and replace it with a LICENSE file of your own choosing. - If the code is intended to be freely available for anyone to use, use an `open source license `_, such as `MIT License `__ or `GPLv3 `__. - If you don't want your code to be used by anyone else, add a LICENSE file which just says: - - .. code-block:: none - - Copyright (c) CURRENT_YEAR, YOUR_NAME - - All rights reserved. - - Note that if you don't include a LICENSE file, you will still have copyright over your own code (this copyright is automatically granted), and your code will be private source (technically nobody else will be permitted to use it, even if you make your code publicly available). - -#. Edit the file ``package_name/__meta__.py`` to contain your author and repo details. - - name - The name as it will/would be on PyPI (users will do ``pip install new_name_here``). - It is `recommended `__ to use a name all lowercase, runtogetherwords but if separators are needed hyphens are preferred over underscores. - - path - The path to the package. What you will rename the directory ``package_name``. - `Should be `__ the same as ``name``, but now hyphens are disallowed and should be swapped for underscores. - By default, this is automatically inferred from ``name``. - - license - Should be the name of the license you just picked and put in the LICENSE file (e.g. ``MIT`` or ``GPLv3``). - - Other fields to enter should be self-explanatory. - -#. Rename the directory ``package_name`` to be the ``path`` variable you just added to ``__meta__.py``.:: - - PACKAGE_NAME=your_actual_package_name - mv package_name "$PACKAGE_NAME" - -#. Change references to ``package_name`` to your path variable: - - This can be done with the sed command:: - - sed -i "s/package_name/$PACKAGE_NAME/" setup.py \ - docs/source/conf.py \ - CHANGELOG.rst \ - .github/workflows/test*.yaml - - Which will make changes in the following places. - - .. highlight:: python - - - In ``setup.py``, `L54 `__:: - - exec(read("package_name/__meta__.py"), meta) - - - In ``docs/source/conf.py``, `L27 `__:: - - from package_name import __meta__ as meta # noqa: E402 isort:skip - - - In ``.github/workflows/test.yaml``, `L64 `__, and ``.github/workflows/test-release-candidate.yaml``, `L90 `__:: - - python -m pytest --cov=package_name --cov-report term --cov-report xml --cov-config .coveragerc --junitxml=testresults.xml - - .. highlight:: bash - -#. Swap out the contents of ``requirements.txt`` for your project's current requirements. - If you don't have any requirements yet, delete the contents of ``requirements.txt``. - -#. Swap out the contents of ``README.rst`` with an initial description of your project. - If you are keeping all the badges, make sure to change the URLs from ``scottclowe/python-template-repo`` to ``your_username/your_repo``. - If you prefer, you can use markdown instead of rST. - -#. Commit and push your changes:: - - git add . - git commit -m "Initialise project from template repository" - git push - -When it comes time to make your first release, make sure you update the placeholder entry in CHANGELOG.rst to contain the correct details. -You'll need to change ``YYYY-MM-DD`` to the actual release date, and change the URL to point to your release. - - -Features --------- - -.gitignore -~~~~~~~~~~ - -A `.gitignore`_ file is used specify untracked files which Git should ignore and not try to commit. - -Our template's .gitignore file is based on the `GitHub defaults `_. -We use the default `Python .gitignore`_, `Windows .gitignore`_, `Linux .gitignore`_, and `Mac OSX .gitignore`_ concatenated together. -(Released under `CC0-1.0 `__.) - -The Python .gitignore specifications prevent compiled files, packaging and sphinx artifacts, test outputs, etc, from being accidentally committed. -Even though you may develop on one OS, you might find a helpful contributor working on a different OS suddenly issues you a new PR, hence we include the gitignore for all OSes. -This makes both their life and yours easier by ignoring their temporary files before they even start working on the project. - -.. _.gitignore: https://git-scm.com/docs/gitignore -.. _default-gitignores: https://github.com/github/gitignore -.. _Python .gitignore: https://github.com/github/gitignore/blob/master/Python.gitignore -.. _Windows .gitignore: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore -.. _Linux .gitignore: https://github.com/github/gitignore/blob/master/Global/Linux.gitignore -.. _Mac OSX .gitignore: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore - - -.gitattributes -~~~~~~~~~~~~~~ - -The most important reason to include a `.gitattributes`_ file is to ensure that line endings are normalised, no matter which OS the developer is using. -This is largely achieved by the line:: - - * text=auto - -which `ensures `__ that all files Git decides contain text have their line endings normalized to LF on checkin. -This can cause problems if Git misdiagnoses a file as text when it is not, so we overwrite automatic detection based on file endings for some several common file endings. - -Aside from this, we also gitattributes to tell git what kind of diff to generate. - -Our template .gitattributes file is based on the `defaults from Alexander Karatarakis `__. -We use the `Common .gitattributes`_ and `Python .gitattributes`_ concatenated together. -(Released under `MIT License `__.) - -.. _.gitattributes: https://git-scm.com/docs/gitattributes -.. _gitattributes-text: https://git-scm.com/docs/gitattributes#_text -.. _alexkaratarakis/gitattributes: https://github.com/alexkaratarakis/gitattributes -.. _Common .gitattributes: https://github.com/alexkaratarakis/gitattributes/blob/master/Common.gitattributes -.. _Python .gitattributes: https://github.com/alexkaratarakis/gitattributes/blob/master/Python.gitattributes - - -Black -~~~~~ - -Black_ is an uncompromising Python code formatter. -By using it, you cede control over minutiae of hand-formatting. -But in return, you no longer have to worry about formatting your code correctly, since black will handle it. -Blackened code looks the same for all authors, ensuring consistent code formatting within your project. - -The format used by Black makes code review faster by producing the smaller diffs. - -Black's output is always stable. -For a given block of code, a fixed version of black will always produce the same output. -However, you should note that different versions of black will produce different outputs. -If you want to upgrade to a newer version of black, you must change the version everywhere it is specified: - -- requirements-dev.txt, `L1 `__ -- .pre-commit-config.yaml, `L14 `__, - `L29 `__, and - `L48 `__ - -.. _black: https://github.com/psf/black - - -pre-commit -~~~~~~~~~~ - -The template repository comes with a pre-commit_ stack. -This is a set of git hooks which are executed every time you make a commit. -The hooks catch errors as they occur, and will automatically fix some of these errors. - -To set up the pre-commit hooks, run the following code from within the repo directory:: - - pip install -r requirements-dev.txt - pre-commit install - -Whenever you try to commit code which is flagged by the pre-commit hooks, the commit will not go through. -Some of the pre-commit hooks (such as black_, isort_) will automatically modify your code to fix the issues. -When this happens, you'll have to stage the changes made by the commit hooks and then try your commit again. -Other pre-commit hooks will not modify your code and will just tell you about issues which you'll then have to manually fix. - -You can also manually run the pre-commit stack on all the files at any time:: - - pre-commit run --all-files - -To force a commit to go through without passing the pre-commit hooks use the ``--no-verify`` flag:: - - git commit --no-verify - -The pre-commit stack which comes with the template is highly opinionated, and includes the following operations: - -- Code is reformatted to use the black_ style. - Any code inside docstrings will be formatted to black using blackendocs_. - All code cells in Jupyter notebooks are also formatted to black using black_nbconvert_. - -- All Jupyter notebooks are cleared using nbstripout_. - -- Imports are automatically sorted using isort_. - -- flake8_ is run to check for conformity to the python style guide PEP-8_, along with several other formatting issues. - -- setup-cfg-fmt_ is used to format any setup.cfg files. - -- Several `hooks from pre-commit `_ are used to screen for non-language specific git issues, such as incomplete git merges, overly large files being commited to the repo, bugged JSON and YAML files. - JSON files are also prettified automatically to have standardised indentation. - Entries in requirements.txt files are automatically sorted alphabetically. - -- Several `hooks from pre-commit specific to python `_ are used to screen for rST formatting issues, and ensure noqa flags always specify an error code to ignore. - -Once it is set up, the pre-commit stack will run locally on every commit. -The pre-commit stack will also run on github with one of the action workflows, which ensures PRs are checked without having to rely on contributors to enable the pre-commit locally. - -.. _black_nbconvert: https://github.com/dfm/black_nbconvert -.. _blackendocs: https://github.com/asottile/blacken-docs -.. _flake8: https://gitlab.com/pycqa/flake8 -.. _isort: https://github.com/timothycrosley/isort -.. _nbstripout: https://github.com/kynan/nbstripout -.. _PEP-8: https://www.python.org/dev/peps/pep-0008/ -.. _pre-commit: https://pre-commit.com/ -.. _pre-commit-hooks: https://github.com/pre-commit/pre-commit-hooks -.. _pre-commit-py-hooks: https://github.com/pre-commit/pygrep-hooks -.. _setup-cfg-fmt: https://github.com/asottile/setup-cfg-fmt - - -Automated documentation -~~~~~~~~~~~~~~~~~~~~~~~ - -The script ``docs/conf.py`` is based on the Sphinx_ default configuration. -It is set up to work well out of the box, with several features added in. - -GitHub Pages -^^^^^^^^^^^^ - -If your repository is publicly available, the docs workflow will automatically deploy your documentation to `GitHub Pages`_. -To enable the documentation, go to the ``Settings > Pages`` pane for your repository and set Source to be the ``gh-pages`` branch (root directory). -Your automatically compiled documentation will then be publicly available at https://USER.github.io/PACKAGE/. - -Since GitHub pages are always publicly available, the workflow will check whether your repository is public or private, and will not deploy the documentation to gh-pages if your repository is private. - -The gh-pages documentation is refreshed every time there is a push to your default branch. - -Note that only one copy of the documentation is served (the latest version). -For more mature projects, you may wish to host the documentation readthedocs_ instead, which supports hosting documentation for multiple package versions simultaneously. - -.. _GitHub Pages: https://pages.github.com/ -.. _readthedocs: https://readthedocs.org/ - -Building locally -^^^^^^^^^^^^^^^^ - -You can build the web documentation locally with:: - - make -C docs html - -And view the documentation like so:: - - sensible-browser docs/_build/html/index.html - -Or you can build pdf documentation:: - - make -C docs latexpdf - -On Windows, this becomes:: - - cd docs - make html - make latexpdf - cd .. - -Other documentation features -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Your README.rst will become part of the generated documentation (via a link file ``docs/source/readme.rst``). - Note that the first line of README.rst is not included in the documentation, since this is expected to contain badges which you want to render on GitHub, but not include in your documentation pages. - -- If you prefer, you can use a README.md file written in GitHub-Flavored Markdown instead of README.rst. - This will automatically be handled and incorporate into the generated documentation (via a generated file ``docs/source/readme.rst``). - As with a README.rst file, the first line of README.md is not included in the documentation, since this is expected to contain badges which you want to render on GitHub, but not include in your documentation pages. - -- Your docstrings to your modules, functions, classes and methods will be used to build a set of API documentation using autodoc_. - Our ``docs/conf.py`` is also set up to automatically call autodoc whenever it is run, and the output files which it generates are on the gitignore list. - This means you will automatically generate a fresh API description which exactly matches your current docstrings every time you generate the documentation. - -- Docstrings can be formatted in plain reST_, or using the `numpy format`_ (recommended), or `Google format`_. - Support for numpy and Google formats is through the napoleon_ extension (which we have enabled by default). - -- You can reference functions in the python core and common packages and they will automatically be hyperlinked to the appropriate documentation in your own documentation. - This is done using intersphinx_ mappings, which you can see (and can add to) at the bottom of the ``docs/conf.py`` file. - -- The documentation theme is sphinx-book-theme_. - Alternative themes can be found at sphinx-themes.org_, sphinxthemes.com_, and writethedocs_. - -.. _autodoc: http://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html -.. _Google format: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html#example-google -.. _intersphinx: http://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html -.. _napoleon: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html -.. _numpy format: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html#example-numpy-style-python-docstrings -.. _Sphinx: https://www.sphinx-doc.org/ -.. _sphinx-book-theme: https://sphinx-book-theme.readthedocs.io/ -.. _sphinx-themes.org: https://sphinx-themes.org -.. _sphinxthemes.com: https://sphinxthemes.com/ -.. _reST: http://docutils.sourceforge.net/rst.html -.. _writethedocs: https://www.writethedocs.org/guide/tools/sphinx-themes/ - - -Consolidated metadata -~~~~~~~~~~~~~~~~~~~~~ - -Package metadata is consolidated into one place, the file ``package_name/__meta__.py``. -You only have to write the metadata once in this centralised location, and everything else (packaging, documentation, etc) picks it up from there. -This is similar to `single-sourcing the package version`_, but for all metadata. - -This information is available to end-users with ``import package_name; print(package_name.__meta__)``. -The version information is also accessible at ``package_name.__version__``, as per PEP-396_. - -.. _PEP-396: https://www.python.org/dev/peps/pep-0396/#specification -.. _single-sourcing the package version: https://packaging.python.org/guides/single-sourcing-package-version/ - - -setup.py -~~~~~~~~ - -The ``setup.py`` script is used to build and install your package. - -Your package can be installed from source with:: - - pip install . - -or alternatively with:: - - python setup.py install - -But do remember that as a developer, you should install your package in editable mode, using either:: - - pip install --editable . - -or:: - - python setup.py develop - -which will mean changes to the source will affect your installed package immediately without you having to reinstall it. - -By default, when the package is installed only the main requirements, listed in ``requirements.txt`` will be installed with it. -Requirements listed in ``requirements-dev.txt``, ``requirements-docs.txt``, and ``requirements-test.txt`` are optional extras. -The ``setup.py`` script is configured to include these as extras named ``dev``, ``docs``, and ``test``. -They can be installed along with:: - - pip install .[dev] - -etc. -Any additional files named ``requirements-EXTRANAME.txt`` will also be collected automatically and made available with the corresponding name ``EXTRANAME``. -Another extra named ``all`` captures all of these optional dependencies. - -Your README file is automatically included in the metadata when you use setup.py build wheels for PyPI. -The rest of the metadata comes from ``package_name/__meta__.py``. - -Our template setup.py file is based on the `example from setuptools documentation `_, and the comprehensive example from `Kenneth Reitz `_ (released under `MIT License `__), with further features added. - -.. _kennethreitz/setup.py: https://github.com/kennethreitz/setup.py -.. _setuptools-setup.py: https://setuptools.readthedocs.io/en/latest/setuptools.html#basic-use - - -Unit tests -~~~~~~~~~~ - -The file ``package_name/tests/base_test.py`` provides a class for unit testing which provides easy access to all the numpy testing in one place (so you don't need to import a stack of testing functions in every test file, just import the ``BaseTestClass`` instead). - -If you aren't using doing numeric tests, you can delete this from the ``package_name/tests/base_test.py`` file. - - -GitHub Actions Workflows -~~~~~~~~~~~~~~~~~~~~~~~~ - -GitHub features the ability to run various workflows whenever code is pushed to the repo or a pull request is opened. -This is one service of several services that can be used to continually run the unit tests and ensure changes can be integrated together without issue. -It is also useful to ensure that style guides are adhered to - -Five workflows are included: - -docs - The docs workflow ensures the documentation builds correctly, and presents any errors and warnings nicely as annotations. - If your repository is public, publicly available html documentation is automatically deployed to the gh-pages branch and https://USER.github.io/PACKAGE/. - -pre-commit - Runs the pre-commit stack. - Ensures all contributions are compliant, even if a contributor has not set up pre-commit on their local machine. - -lint - Checks the code uses the black_ style and tests for flake8_ errors. - If you are using the pre-commit hooks, the lint workflow is superfluous and can be deleted. - -test - Runs the unit tests, and pushes coverage reports to Codecov_. - You'll need to sign up at Codecov_ with your GitHub account in order for this integration to work. - -release candidate tests - The release candidate tests workflow runs the unit tests on more Python versions and operating systems than the regular test workflow. - This runs on all tags, plus pushes and PRs to branches named like "v1.2.x", etc. - Wheels are built for all the tested systems, and stored as artifacts for your convenience when shipping a new distribution. - -If you enable the ``publish`` job on the release candidate tests workflow, you can also push built release candidates to the `Test PyPI `_ server. -For this to work, you'll also need to add your Test `PyPI API token `_ to your `GitHub secrets `_. -Checkout the `pypa/gh-action-pypi-publish `_ GitHub action, and `PyPI's guide on distributing from CI `_ for more information on this. -With minimal tweaks, this job can be changed to push to PyPI for real, but be careful with this since releases on PyPI can not easily be yanked. - -.. _Codecov: https://codecov.io/ -.. _ci-packaging: https://packaging.python.org/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ -.. _github-secrets: https://docs.github.com/en/actions/reference/encrypted-secrets -.. _pypi-api-token: https://pypi.org/help/#apitoken -.. _pypi-publish: https://github.com/pypa/gh-action-pypi-publish -.. _testpypi: https://test.pypi.org/ - - -Other CI/CD options -~~~~~~~~~~~~~~~~~~~ - -Alternative CI/CD services are also available for running tests. - -- `Travis CI `_ offers a free trial service. - -- `Circle CI `_ is another option with a limited `free option `_. - -- `Appveyor `_ useful for testing on Windows. - This offers an alternative to GitHub Actions if you need to `build Windows wheel files to submit to PyPI `_. - -- `Jenkins `_ is useful if you want to run your CI test suite locally or on your own private server instead of in the cloud. - - -Contributing ------------- - -Contributions are welcome! If you can see a way to improve this template: - -- Do click the fork button -- Make your changes and make a pull request. - -Or to report a bug or request something new, make an issue. - - -.. highlight:: python - - -.. |GHA tests| image:: https://github.com/scottclowe/python-template-repo/workflows/tests/badge.svg - :target: https://github.com/scottclowe/python-template-repo/actions?query=workflow%3Atests - :alt: GHA Status -.. |Codecov report| image:: https://codecov.io/github/scottclowe/python-template-repo/coverage.svg?branch=master - :target: https://codecov.io/github/scottclowe/python-template-repo?branch=master - :alt: Coverage .. |pre-commit| image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white :target: https://github.com/pre-commit/pre-commit :alt: pre-commit diff --git a/package_name/__init__.py b/bioscan_dataset/__init__.py similarity index 100% rename from package_name/__init__.py rename to bioscan_dataset/__init__.py diff --git a/bioscan_dataset/__meta__.py b/bioscan_dataset/__meta__.py new file mode 100644 index 0000000..2794fa4 --- /dev/null +++ b/bioscan_dataset/__meta__.py @@ -0,0 +1,10 @@ +name = "bioscan-dataset" +path = name.lower().replace("-", "_").replace(" ", "_") +version = "0.1.dev0" +author = "Scott C. Lowe" +author_email = "scott.code.lowe@gmail.com" +description = ( + "PyTorch torchvision-style datasets for the BIOSCAN-1M and BIOSCAN-5M datasets." +) +url = "" +license = "MIT" diff --git a/docs/source/conf.py b/docs/source/conf.py index 8e6a570..8d36549 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ sys.path.insert(0, REPO_DIR) -from package_name import __meta__ as meta # noqa: E402 isort:skip +from bioscan_dataset import __meta__ as meta # noqa: E402 isort:skip # -- Project information ----------------------------------------------------- diff --git a/package_name/__meta__.py b/package_name/__meta__.py deleted file mode 100644 index abd8a35..0000000 --- a/package_name/__meta__.py +++ /dev/null @@ -1,12 +0,0 @@ -# `name` is the name of the package as used for `pip install package` -name = "package-name" -# `path` is the name of the package for `import package` -path = name.lower().replace("-", "_").replace(" ", "_") -# Your version number should follow https://python.org/dev/peps/pep-0440 and -# https://semver.org -version = "0.1.dev0" -author = "Author Name" -author_email = "" -description = "" # One-liner -url = "" # your project homepage -license = "Unlicense" # See https://choosealicense.com diff --git a/package_name/module.py b/package_name/module.py deleted file mode 100644 index 3e6892f..0000000 --- a/package_name/module.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Module provides a simple cubic_rectification function. -""" - -import numpy as np - - -def cubic_rectification(x, verbose=False): - """ - Rectified cube of an array. - - Parameters - ---------- - x : numpy.ndarray - Input array. - verbose : bool, optional - Whether to print out details. Default is ``False``. - - Returns - ------- - numpy.ndarray - Elementwise, the cube of `x` where it is positive and ``0`` otherwise. - - Note - ---- - This is a sample function, using a numpy docstring format. - - Note - ---- - The use of intersphinx will cause :class:`numpy.ndarray` to link to - the numpy documentation page. - """ - if verbose: - print("Cubing and then rectifying {}".format(x)) - return np.maximum(0, x**3) diff --git a/package_name/tests/__init__.py b/package_name/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/package_name/tests/base_test.py b/package_name/tests/base_test.py deleted file mode 100644 index a11a640..0000000 --- a/package_name/tests/base_test.py +++ /dev/null @@ -1,243 +0,0 @@ -""" -Provides a base test class for other test classes to inherit from. - -Includes the numpy testing functions as methods. -""" - -import os.path -import sys -import unittest -from inspect import getsourcefile - -import numpy as np -import pytest -from numpy.testing import ( - assert_allclose, - assert_almost_equal, - assert_approx_equal, - assert_array_almost_equal, - assert_array_almost_equal_nulp, - assert_array_equal, - assert_array_less, - assert_array_max_ulp, - assert_equal, - assert_raises, - assert_string_equal, - assert_warns, -) - -TEST_DIRECTORY = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0))) - - -def assert_starts_with(actual, desired): - """ - Check that a string starts with a certain substring. - - Parameters - ---------- - actual : str-like - Actual string or string-like object. - desired : str - Desired initial string. - """ - try: - assert len(actual) >= len(desired) - except BaseException: - print( - "Actual string too short ({} < {} characters)".format( - len(actual), len(desired) - ) - ) - print("ACTUAL: {}".format(actual)) - raise - try: - return assert_string_equal(str(actual)[: len(desired)], desired) - except BaseException as err: - msg = "ACTUAL: {}".format(actual) - if isinstance(getattr(err, "args", None), str): - err.args += "\n" + msg - elif isinstance(getattr(err, "args", None), tuple): - if len(err.args) == 1: - err.args = (err.args[0] + "\n" + msg,) - else: - err.args += (msg,) - else: - print(msg) - raise - - -class BaseTestCase(unittest.TestCase): - """ - Superclass for test cases, including support for numpy. - """ - - # The attribute `test_directory` provides the path to the directory - # containing the file `base_test.py`, which is useful to obtain - # test resources - files which are needed to run tests. - test_directory = TEST_DIRECTORY - - def __init__(self, *args, **kwargs): - """Instance initialisation.""" - # First do the __init__ associated with parent class - super().__init__(*args, **kwargs) - # Add a test to automatically use when comparing objects of - # type numpy ndarray. This will be used for self.assertEqual(). - self.addTypeEqualityFunc(np.ndarray, self.assert_allclose) - - @pytest.fixture(autouse=True) - def capsys(self, capsys): - r""" - Pass-through for accessing pytest.capsys fixture with class methods. - - Returns - ------- - capture : pytest.CaptureFixture[str] - - Example - ------- - To use this fixture with your own subclass of ``BaseTestCase``:: - - class TestVerbose(BaseTestCase): - def test_output(self): - print("hello") - captured = self.capsys.readouterr() - self.assert_string_equal(captured.out, "hello\n") - - Note - ---- - capsys will capture all messages sent to stdout and stderr since the - last call to capsys (or since execution began on the test). To test the - output of a particular command, you may want to do a capture before the - command to clear stdout/stderr before running the command and then - capturing its output. - - See Also - -------- - - https://docs.pytest.org/en/stable/reference.html#capsys - - https://docs.pytest.org/en/stable/capture.html - """ - self.capsys = capsys - - def recapsys(self, *captures): - r""" - Capture stdout and stderr, then write them back to stdout and stderr. - - Capture is done using the :func:`pytest.capsys` fixture. Used on its - own, :func:`~pytest.capsys` captures outputs to stdout and stderr, - which prevents the output from appearing in the usual way when an - error occurs during testing. - - By chaining series of calls to ``capsys`` and ``recapsys`` around - commands whose outputs must be inspected, all output directed to stdout - and stderr will end up there and appear in the "Captured stdout call" - block in the event of a test failure, as well as being captured here - for the test. - - Parameters - ---------- - *captures : pytest.CaptureResult, optional - A series of extra captures to output. For each `capture` in - `captures`, `capture.out` and `capture.err` are written to stdout - and stderr, respectively. - - Returns - ------- - capture : NamedTuple - `capture.out` and `capture.err` contain all the outputs to stdout - and stderr since the previous capture with :func:`~pytest.capsys`. - - Example - ------- - To use this fixture with your own subclass of ``BaseTestCase``:: - - class TestVerbose(BaseTestCase): - def test_hello_world(self): - print("previous message here") - message = "Hello world!" - capture_pre = self.capsys.readouterr() # Clear stdout - print(message) - capture_post = self.recapsys(capture_pre) # Capture & output - self.assert_string_equal(capture_post.out, message + "\n") - """ - capture_now = self.capsys.readouterr() - for capture in captures + (capture_now,): - sys.stdout.write(capture.out) - sys.stderr.write(capture.err) - return capture_now - - # Add assertions provided by numpy to this class, so they will be - # available as methods to all subclasses when we do our tests. - def assert_almost_equal(self, *args, **kwargs): - """ - Check if two items are not equal up to desired precision. - """ - return assert_almost_equal(*args, **kwargs) - - def assert_approx_equal(self, *args, **kwargs): - """ - Check if two items are not equal up to significant digits. - """ - return assert_approx_equal(*args, **kwargs) - - def assert_array_almost_equal(self, *args, **kwargs): - """ - Check if two objects are not equal up to desired precision. - """ - return assert_array_almost_equal(*args, **kwargs) - - def assert_allclose(self, *args, **kwargs): - """ - Check if two objects are equal up to desired tolerance. - """ - return assert_allclose(*args, **kwargs) - - def assert_array_almost_equal_nulp(self, *args, **kwargs): - """ - Compare two arrays relatively to their spacing. - """ - return assert_array_almost_equal_nulp(*args, **kwargs) - - def assert_array_max_ulp(self, *args, **kwargs): - """ - Check that all items of arrays differ in at most N Units in the Last Place. - """ - return assert_array_max_ulp(*args, **kwargs) - - def assert_array_equal(self, *args, **kwargs): - """ - Check if two array_like objects are equal. - """ - return assert_array_equal(*args, **kwargs) - - def assert_array_less(self, *args, **kwargs): - """ - Check if two array_like objects are not ordered by less than. - """ - return assert_array_less(*args, **kwargs) - - def assert_equal(self, *args, **kwargs): - """ - Check if two objects are not equal. - """ - return assert_equal(*args, **kwargs) - - def assert_raises(self, *args, **kwargs): - """ - Check that an exception of class exception_class is thrown by callable. - """ - return assert_raises(*args, **kwargs) - - def assert_warns(self, *args, **kwargs): - """ - Check that the given callable throws the specified warning. - """ - return assert_warns(*args, **kwargs) - - def assert_string_equal(self, *args, **kwargs): - """ - Test if two strings are equal. - """ - return assert_string_equal(*args, **kwargs) - - def assert_starts_with(self, *args, **kwargs): - return assert_starts_with(*args, **kwargs) diff --git a/package_name/tests/test_module.py b/package_name/tests/test_module.py deleted file mode 100644 index 63a02da..0000000 --- a/package_name/tests/test_module.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Tests for module in package_name. -""" - -import math - -import numpy as np - -# from package_name.module import cubic_rectification -from ..module import cubic_rectification -from .base_test import BaseTestCase, unittest - - -class NumbersTest(BaseTestCase): - def test_even(self): - """ - Test that numbers between 0 and 5 are all even. - """ - for i in range(0, 6, 2): - with self.subTest(i=i): - self.assertEqual(i % 2, 0) - - -class TestVerbose(BaseTestCase): - """ - Test that things are printed to stdout correctly. - """ - - def test_hello_world(self): - """Test printing to stdout.""" - message = "Hello world!" - capture_pre = self.capsys.readouterr() # Clear stdout - print(message) # Execute method (verbose) - capture_post = self.recapsys(capture_pre) # Capture and then re-output - self.assert_string_equal(capture_post.out.strip(), message) - - def test_shakespeare(self): - # Clear stdout (in this case, an empty capture) - capture_pre = self.capsys.readouterr() - # Execute method (verbose) - print("To be, or not to be, that is the question:") - # Capture the output to stdout, then re-output - capture_post = self.recapsys(capture_pre) - # Compare output to target - self.assert_starts_with(capture_post.out, "To be, or not") - # Clear stdout (in this case, capturing the re-output first print statement) - capture_pre = self.capsys.readouterr() - # Execute method (verbose) - print("Whether 'tis nobler in the mind to suffer") - # Capture the output to stdout, then re-output. This now prints both - # lines to stdout at once, which otherwise would not appear due to our - # captures. - capture_post = self.recapsys(capture_pre) - # Compare output to target - self.assert_starts_with(capture_post.out.lower(), "whether 'tis nobler") - - -class TestCubicRectification(BaseTestCase): - """ - Tests for the cubic_rectification function. - """ - - def test_int(self): - """Test with integer inputs.""" - self.assertEqual(cubic_rectification(2), 8) - self.assertEqual(cubic_rectification(-2), 0) - self.assertEqual(cubic_rectification(3), 27) - - def test_float(self): - """Test with float inputs.""" - # Need to use assert_allclose due to the real possibility of a - # floating point inaccuracy. - self.assert_allclose(cubic_rectification(1.2), 1.728) - self.assert_allclose(cubic_rectification(-1.2), 0) - - def test_empty_array(self): - """Test with empty array.""" - self.assert_equal(cubic_rectification(np.array([])), np.array([])) - - def test_array(self): - """Test with numpy array inputs.""" - # Test with singleton array - self.assert_equal(cubic_rectification(np.array(3)), np.array(27)) - # Test with vector - self.assert_equal( - cubic_rectification(np.array([0, 2, -2])), np.array([0, 8, 0]) - ) - - def test_arange(self): - """Test with numpy array input generated with arange.""" - # Test with arange input - x = np.arange(-3, 4) - actual = cubic_rectification(x) - desired = np.array([0, 0, 0, 0, 1, 8, 27]) - self.assert_allclose(actual, desired) - - @unittest.expectedFailure - def test_nan_skipped(self): - """Test for NaN input with invalid comparison methods.""" - # We can't use the standard assertEquals for comparing two NaNs - self.assertEqual(cubic_rectification(float("nan")), float("nan")) - self.assertEqual(cubic_rectification(np.nan), np.nan) - - def test_nan(self): - """Test for NaN input with valid comparison methods.""" - # Can use the assert_equal from numpy.testing to compare NaNs - self.assert_equal(cubic_rectification(float("nan")), float("nan")) - self.assert_equal(cubic_rectification(np.nan), np.nan) - # Or we can use an isnan function from either math or numpy - self.assertTrue(math.isnan(cubic_rectification(float("nan")))) - self.assertTrue(np.isnan(cubic_rectification(np.nan))) - - def test_quiet(self): - capture_pre = self.capsys.readouterr() # Clear stdout - cubic_rectification(2) - capture_post = self.recapsys(capture_pre) # Capture and then re-output - self.assert_equal(capture_post.out, "") - - def test_verbose(self): - capture_pre = self.capsys.readouterr() # Clear stdout - cubic_rectification(2, verbose=True) # Execute method (verbose) - capture_post = self.recapsys(capture_pre) # Capture and then re-output - self.assert_starts_with(capture_post.out, "Cubing") # Test diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 7bb576f..0000000 --- a/requirements-test.txt +++ /dev/null @@ -1,3 +0,0 @@ -pytest -pytest-cov -pytest-flake8 diff --git a/requirements.txt b/requirements.txt index 4d7f078..92ffd1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -numpy>=1.5.1 -scipy +pytorch +torchvision diff --git a/setup.py b/setup.py index d4b4ea9..786516b 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def read(fname): # are installed. # https://packaging.python.org/guides/single-sourcing-package-version/ meta = {} -exec(read("package_name/__meta__.py"), meta) +exec(read("bioscan_dataset/__meta__.py"), meta) # Import the README and use it as the long-description.