Skip to content

Commit

Permalink
feat: create dataset for tagging
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian2012 committed Aug 28, 2024
1 parent 4aabd90 commit 985a0af
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
39 changes: 39 additions & 0 deletions models/courses/course_tags.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
with
most_recent_overviews as (
select org, course_key, max(modified) as last_modified
from {{ source("event_sink", "course_overviews") }}
group by org, course_key
),
most_recent_course_tags as (
select
course_key,
display_name as course_name,
splitByString('+', course_key)[-1] as course_run,
org,
JSONExtract(course_data_json, 'tags', 'String') as tags_str
from {{ source("event_sink", "course_overviews") }} co
inner join
most_recent_overviews mro
on co.org = mro.org
and co.course_key = mro.course_key
and co.modified = mro.last_modified
),
parsed_tags as (
select
course_key,
course_name,
JSONExtractKeys(tags_str) as taxonomy,
JSONExtractKeysAndValues(tags_str, 'String') as tags_keys,
arrayJoin(tags_keys) as taxonomy_tuple,
taxonomy_tuple .1 as taxonomy_name,
taxonomy_tuple .2 as tags_array_str,
JSONExtractArrayRaw(tags_array_str) as tags,
trim(BOTH '\"\"' from arrayJoin(tags)) as tag
from most_recent_course_tags
)
select course_key, course_name, taxonomy_name, tag, lineage
from parsed_tags
inner join
{{ source("event_sink", "object_tag") }} as ot
on (course_key = object_id)
and (parsed_tags.tag = _value)
12 changes: 12 additions & 0 deletions models/courses/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,15 @@ sources:
- name: modified
- name: dump_id
- name: time_last_dumped

- name: object_tag
columns:
- name: id
- name: object_id
- name: taxonomy
- name: tag
- name: _value
- name: _export_id
- name: lineage
- name: dump_id
- name: time_last_dumped

0 comments on commit 985a0af

Please sign in to comment.