From 83e67220a42b98ba686c2d2bddd2cec6b2f2799d Mon Sep 17 00:00:00 2001 From: lizgzil Date: Fri, 26 Jul 2024 16:54:04 +0100 Subject: [PATCH] Add the file structure for this project, the basics of tests, github actions, documentation --- .github/workflows/docs.yaml | 27 ++++++++++++++ .github/workflows/pytest.yaml | 57 ++++++++++++++++++++++++++++++ .gitignore | 4 +++ README.md | 40 +++++++++++++++++++++ docs/README.md | 5 +++ docs/mkdocs.yaml | 44 +++++++++++++++++++++++ docs/page1.md | 1 + docs/site_assets/nesta_logo.png | Bin 0 -> 2737 bytes docs/site_assets/requirements.txt | 3 ++ docs/site_assets/style.css | 10 ++++++ nlp_link/__init__.py | 0 nlp_link/linker.py | 12 +++++++ pyproject.toml | 21 +++++++++++ tests/__init__.py | 0 tests/test_linker.py | 10 ++++++ 15 files changed, 234 insertions(+) create mode 100644 .github/workflows/docs.yaml create mode 100644 .github/workflows/pytest.yaml create mode 100644 .gitignore create mode 100644 docs/README.md create mode 100644 docs/mkdocs.yaml create mode 100644 docs/page1.md create mode 100644 docs/site_assets/nesta_logo.png create mode 100644 docs/site_assets/requirements.txt create mode 100644 docs/site_assets/style.css create mode 100644 nlp_link/__init__.py create mode 100644 nlp_link/linker.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/test_linker.py diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..6190c46 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,27 @@ +name: Deploy docs to gh-pages + +on: + workflow_dispatch: + push: + branches: + - dev + +jobs: + build: + runs-on: ubuntu-latest + name: Deploy docs to gh-pages + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Deploy + uses: mhausenblas/mkdocs-deploy-gh-pages@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CONFIG_FILE: docs/mkdocs.yml + REQUIREMENTS: docs/site_assets/requirements.txt \ No newline at end of file diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..b8e2a70 --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,57 @@ +name: Unit Tests + +on: [push] + +jobs: + test: + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: ["ubuntu-latest", "macos-latest"] + python-version: ["3.9", "3.10"] + steps: + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v4 + - name: Set up python ${{ matrix.python-version }} + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + #---------------------------------------------- + # ----- install & configure poetry ----- + #---------------------------------------------- + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + #---------------------------------------------- + # install your root project, if required + #---------------------------------------------- + - name: Install additional dependencies + run: | + poetry install --no-interaction + #---------------------------------------------- + # add matrix specifics and run test suite + #---------------------------------------------- + - name: Run tests + run: poetry run pytest tests/ --verbose diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2044a52 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# Mac OS-specific storage files +.DS_Store + +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index fe82b75..62982ba 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,42 @@ # nlp-link A python package to semantically link two lists of texts. + + +## Set-up + +In setting up this project we ran: +``` +conda create --name nlp-link pip python=3.9 +conda activate nlp-link +pip install poetry +``` + +``` +poetry init + +``` + +``` +poetry install + +``` + +## Tests + +To run tests: + +``` +poetry run pytest tests/ +``` + +## Documentation + +Docs for this repo are automatically published to gh-pages branch via. Github actions after a PR is merged into dev. We use Material for MkDocs for these. Nothing needs to be done to update these. + +However, if you are editing the docs you can test them out locally by running + +``` +cd guidelines +pip install -r docs/requirements.txt +mkdocs serve +``` diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..13cbf86 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# nlp-link + +Documentation for NLP Link + +- [Page1](./page1.md) \ No newline at end of file diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml new file mode 100644 index 0000000..1bd0fb1 --- /dev/null +++ b/docs/mkdocs.yaml @@ -0,0 +1,44 @@ +site_name: NLP Link +site_description: A python package to semantically link two lists of texts.. +site_url: https://nestauk.github.io/nlp-link +repo_name: nestauk/nlp-link +repo_url: https://github.com/nestauk/nlp-link +extra: + homepage: https://nestauk.github.io/nlp-link +docs_dir: . +extra_css: + - site_assets/style.css +theme: + name: material + # disable_nav_previous_next: true + # disable_footer: false + logo: site_assets/nesta_logo.png + favicon: site_assets/nesta_logo.png + features: + - navigation.instant + - navigation.tracking + # - navigation.tabs + - navigation.sections + - navigation.top + font: + text: Century Gothic + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + primary: custom + accent: custom + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: amber + accent: pink + toggle: + icon: material/weather-sunny + name: Switch to light mode +nav: + - Home: README.md + - Page 1: page1.md +plugins: + - same-dir \ No newline at end of file diff --git a/docs/page1.md b/docs/page1.md new file mode 100644 index 0000000..127d3fc --- /dev/null +++ b/docs/page1.md @@ -0,0 +1 @@ +## Title \ No newline at end of file diff --git a/docs/site_assets/nesta_logo.png b/docs/site_assets/nesta_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..af179d6076f9e81e5aaba6fdf10c898887d4ce6b GIT binary patch literal 2737 zcmb7G3se(V8omjL5EKRJ3YfDEu~@B5G6_VI7_tp72@2s=jc&JPGMR*6l8G||LW^mq~A2$m| zkYuee(1xNVDvYp;Q4?Xr#V)&(K>@W(1xY(jqXL)R=AcxrEFr6*f;7XH2nDPPy)sK^ zC@K-;5F{>GAV$Q9P{R`l1ZvV`R+Z)E2jOs)C9I-pr%EEJtE&^&rHcvDB9SVUN(my9 z$Ydg@A);10Xw)TgP_qL@M(pI`6h>N|w3ToO7`v#EsG+ljLdMbP6NnRcS;ss%s9;(! zK?x&~NX3X`A{cEo{|tw(HJKzB$OATZ^BfxmA2s_m4WqPcd6D!hjT{+HY>AxRW{UN$r8FmCftnH*l3|f zXNPIg0-d84r)aj(Y6-I~ZPsu*0dBZ?wPb=w)R2dfI7$=bI17x~a(q=dcPu{5`0LC1g5ULt^;oNe*ZSF+ndC zL=9HO=rbLEj5pT4QS3-~IZU;1lSYQfq#~(IgpA=wL{C8HxoDk9gJT%7KxWJoA!b>+ zCk|7lDdRmmn($!lq$=BGi)c?4aX-zmU6>ZE%1KZ)yz>B1dk04bOe$UMBz% zdR?xz!o|6{^EYo^M5dpn9?lSOvMUOD>U{fhcklJ@-KR|PMZ|32s(8=wQc_frqO{#w zZtfS$e12|g`fcUAANP=*t)1u6p8d9+Smd3)A4_8}q%9 z#f_U^KKlcbb^N`3U#GV1D9t%@cu~PW&2ipl+e^#(Ev|Lh-N~t4?Map2TvHAZue>*J z*R5yuu5J8Z&-hHnf4Q$Mng3hU^`)-2e8|P_jft-tyYk{bB<3iF4j=m7E}!h~iMaYz zWXY7ce;hoWQak^|iG5^(qTcssUF+}-MOE?B6}?TAMX2a8lp4Hs=^o<`G2Nh7uc_4} zt~guJJ!xyX^YUJ4)pfP)yJBR zTjkv?Z#4hwK!eD$xjuU5^$RM?J9q7~huqP-4wv`dJoYHrpZ031yZJ#+3EFNi9_XuW z&)bvn@Y753&lYc`=3e|@-X*E~>&AGsyYt3VoPnX!Z@>C&{?oS(u6gawlGu62I(`+G zePU%|)3jYP{ymsn9AA|dbGLGRC3#Yp>HLn`^v>|2-UBTGFYNwwN!Rt z!!^(8E8F5O41VR8#G+@)`+L6Z(|`1d9Zbn2H{}^Kz8NeT_*2sg{ewsP^tG$5t*DIr zB(;D4Q&R4g>CTHAlh&?^R#fM;99Km0yM+0bd~WNC_}NGtXVC}i~}k&jQ6BV7n=P$>{WdiURc^CI%>f>&+Tn&n|8GJQA^*DSN);|>7Unjjht#UYlm+"] +readme = "README.md" +packages = [{include = "nlp_link"}] + +[tool.poetry.dependencies] +python = "^3.9" +scikit-learn = "^1.4.2" +pandas = "^2.2.2" +sentence-transformers = "^2.1.0" +torch = "^1.10.0" +pytest = "^8.2.0" +tqdm = "^4.64.1" +numpy = "^1.24.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_linker.py b/tests/test_linker.py new file mode 100644 index 0000000..df8c2a9 --- /dev/null +++ b/tests/test_linker.py @@ -0,0 +1,10 @@ +from nlp_link.linker import link_lists + +def test_link_lists(): + + list_1 = ["dog", "cat"] + list_2 = ["kitten", "puppy"] + linked = link_lists(list_1, list_2) + + assert len(linked) == len(list_1) +