Skip to content

Commit

Permalink
Add CR_CREATE_DATE and update CR_SUBLEVEL, CR_COU_COPYRIGHT_HOLDER (#11)
Browse files Browse the repository at this point in the history
* Add course start date

* Add sublevel function

* Enhance docstring- remove pylint character warning

* Remove cli.main from __init__.py and update README

* Update README; env variable instructions

* Update CR_COU_COPYRIGHT_HOLDER

* Use API_BASE_URL env variable

* Update poetry lock
  • Loading branch information
ibrahimjaved12 authored Feb 28, 2024
1 parent 320a840 commit b4e7ade
Show file tree
Hide file tree
Showing 12 changed files with 1,374 additions and 1,116 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,17 @@ Therefore, the above commands will generate `private/output/ocw_oer_export.csv`

If you want to change this, you will not only have to change the `output_path` in the function (`create_csv` or `create_json`) but also have to change the mapping in `docker-compose.yml`.

## Environment Variables

By default, this project uses MIT Open's Production API, as given in `ocw_oer_export/config.py`.
To use the RC API or local, create an environment file, `.env` in the project's root directory and add the relevant base URL:
Eg. `API_BASE_URL=https://mitopen-rc.odl.mit.edu` or `API_BASE_URL=http://localhost:8063`

## Requirements

For successful execution and correct output, ensure the [MIT Open's API](https://mit-open-rc.odl.mit.edu//api/v1/courses/?platform=ocw) contains the following fields:

`title`, `url`, `description`, `topics`, `course_feature`, `runs: instructors`

`title`, `url`, `runs: level`, `description`, `topics`, `runs: instructors`, `runs: semester`, `runs: year`, `course_feature`
Additionally, the `mapping_files` should be up-to-date. If new topics are added in OCW without corresponding mappings in `ocw_oer_export/mapping_files/ocw_topic_to_oer_subject.csv`, this will lead to `null` entries for those topics in the CSV (`CR_SUBJECT`). In addition to that, make sure `fm_keywords_exports.csv` is also present.

## Tests
Expand Down
3 changes: 1 addition & 2 deletions ocw_oer_export/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
__all__ = ["create_json", "create_csv", "main"]
__all__ = ["create_json", "create_csv"]

import logging

from .create_csv import create_csv
from .create_json import create_json
from .cli import main

logging.root.setLevel(logging.INFO)
2 changes: 1 addition & 1 deletion ocw_oer_export/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():
parser.add_argument(
"--input_path",
default="/private/output/ocw_api_data.json",
help="Output path for the CSV file",
help="Input path for the JSON file",
)
parser.add_argument(
"--output_path",
Expand Down
10 changes: 10 additions & 0 deletions ocw_oer_export/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""
Module for loading environment settings and setting API base URL based on the current environment.
"""
import os
from dotenv import load_dotenv

load_dotenv()

API_BASE_URL = os.getenv("API_BASE_URL", "https://mitopen.odl.mit.edu")
API_URL = f"{API_BASE_URL}/api/v1/courses/?platform=ocw"
4 changes: 0 additions & 4 deletions ocw_oer_export/constants.py

This file was deleted.

56 changes: 45 additions & 11 deletions ocw_oer_export/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .client import extract_data_from_api
from .data_handler import extract_data_from_json
from .constants import API_URL
from .config import API_URL
from .utilities import normalize_course_url, normalize_keywords, text_cleanup


Expand Down Expand Up @@ -56,6 +56,26 @@ def create_ocw_topic_to_oer_subject_mapping(path=None, file_name=None):
return {row["OCW Topic"]: row["OER Subject"] for row in reader}


def get_cr_sublevel(levels):
"""Set the value(s) of CR_SUBLEVEL based on the course levels."""
level_mappings = {
"Undergraduate": ["Community College/Lower Division", "College/Upper Division"],
"Graduate": ["Graduate/Professional"],
"High School": ["High School", "Community College/Lower Division"],
"Non-Credit": ["Career/Technical Education"],
}
sublevels = [
sublevel for level in levels for sublevel in level_mappings.get(level["name"])
]
return "|".join(sorted(set(sublevels)))


def get_description_in_plain_text(description):
"""Get Course Resource plain text description by cleaning up markdown and HTML."""
cleaned_description = text_cleanup(description)
return cleaned_description


def get_cr_subjects(ocw_topics_mapping, ocw_course_topics):
"""
Get OER formatted Course Resource Subjects list.
Expand Down Expand Up @@ -89,6 +109,22 @@ def get_cr_keywords(fm_ocw_keywords_mapping, list_of_topics_objs, course_url):
return "|".join(topic["name"] for topic in list_of_topics_objs)


def get_cr_create_date(semester, year):
"""Convert a semester and year into a ballpark start date."""
semester_start_dates = {
"Fall": "09-01",
"Spring": "02-01",
"Summer": "06-01",
"January IAP": "01-01",
}
start_date = semester_start_dates.get(semester)
if start_date and year:
return f"{year}-{start_date}"
if year:
return f"{year}-01-01"
return ""


def get_cr_authors(list_of_authors_objs):
"""Get OER formatted Course Resource Authors list."""
return "|".join(
Expand Down Expand Up @@ -136,21 +172,15 @@ def get_cr_accessibility(ocw_course_feature_tags):
return "|".join(tags)


def get_description_in_plain_text(description):
"""Get Course Resource plain text description by cleaning up markdown and HTML."""
cleaned_description = text_cleanup(description)
return cleaned_description


def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping):
"""Transform a single course according to OER template."""
course_runs = course["runs"][0]
return {
"CR_TITLE": course["title"],
"CR_URL": course_runs["url"],
"CR_MATERIAL_TYPE": "Full Course",
"CR_Media_Formats": "Text/HTML",
"CR_SUBLEVEL": "null",
"CR_MEDIA_FORMATS": "Text/HTML",
"CR_SUBLEVEL": get_cr_sublevel(course_runs["level"]),
"CR_ABSTRACT": get_description_in_plain_text(course_runs["description"]),
"CR_LANGUAGE": "en",
"CR_COU_TITLE": "Creative Commons Attribution Non Commercial Share Alike 4.0",
Expand All @@ -159,11 +189,14 @@ def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping)
"CR_KEYWORDS": get_cr_keywords(
fm_ocw_keywords_mapping, course["topics"], course_runs["url"]
),
"CR_CREATE_DATE": get_cr_create_date(
course_runs["semester"], course_runs["year"]
),
"CR_AUTHOR_NAME": get_cr_authors(course_runs["instructors"]),
"CR_PROVIDER": "MIT",
"CR_PROVIDER_SET": "MIT OpenCourseWare",
"CR_COU_URL": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
"CR_COU_COPYRIGHT_HOLDER": "MIT",
"CR_COU_COPYRIGHT_HOLDER": get_cr_authors(course_runs["instructors"]),
"CR_EDUCATIONAL_USE": get_cr_educational_use(course["course_feature"]),
"CR_ACCESSIBILITY": get_cr_accessibility(course["course_feature"]),
}
Expand Down Expand Up @@ -209,14 +242,15 @@ def create_csv(
"CR_TITLE",
"CR_URL",
"CR_MATERIAL_TYPE",
"CR_Media_Formats",
"CR_MEDIA_FORMATS",
"CR_SUBLEVEL",
"CR_ABSTRACT",
"CR_LANGUAGE",
"CR_COU_TITLE",
"CR_PRIMARY_USER",
"CR_SUBJECT",
"CR_KEYWORDS",
"CR_CREATE_DATE",
"CR_AUTHOR_NAME",
"CR_PROVIDER",
"CR_PROVIDER_SET",
Expand Down
2 changes: 1 addition & 1 deletion ocw_oer_export/create_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import logging

from .constants import API_URL
from .config import API_URL
from .client import extract_data_from_api

logging.basicConfig(level=logging.INFO)
Expand Down
17 changes: 15 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ nodeenv = "1.8.0"
platformdirs = "4.2.0"
pre-commit = "3.6.2"
py = "1.11.0"
python-dotenv = "^1.0.1"
PyYAML = "6.0.1"
requests = "2.31.0"
retry = "0.9.2"
Expand Down
Loading

0 comments on commit b4e7ade

Please sign in to comment.