Skip to content

Commit

Permalink
Merge pull request #98 from openedx/bmtcril/update_instance_mvs
Browse files Browse the repository at this point in the history
feat: Use LowCardinality for smaller columns
  • Loading branch information
bmtcril authored May 30, 2024
2 parents a20f45e + 84a6531 commit 188fb23
Show file tree
Hide file tree
Showing 14 changed files with 71 additions and 53 deletions.
5 changes: 4 additions & 1 deletion models/base/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ models:
description: "The xAPI object identifier"
- name: course_id
data_type: string
description: "The course identifier"
description: "The fully-qualified course identifier URL"
- name: course_key
data_type: String
description: "The course key for the course"
- name: org
data_type: string
description: "The organization that the course belongs to"
Expand Down
41 changes: 25 additions & 16 deletions models/base/xapi_events_all_parsed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

select
event_id as event_id,
JSON_VALUE(event::String, '$.verb.id') as verb_id,
toLowCardinality(JSON_VALUE(event::String, '$.verb.id')) as verb_id,
COALESCE(
NULLIF(JSON_VALUE(event::String, '$.actor.account.name'), ''),
NULLIF(JSON_VALUE(event::String, '$.actor.mbox'), ''),
Expand All @@ -23,23 +23,32 @@ select
-- If the contextActivities parent is a course, use that. It can be a "course"
-- type, or a "cmi.interaction" type for multiple question problem submissions.
-- Otherwise use the object id for the course id.
multiIf(
-- If the contextActivities parent is a course, use that
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].definition.type'
toLowCardinality(
multiIf(
-- If the contextActivities parent is a course, use that
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].definition.type'
)
= 'http://adlnet.gov/expapi/activities/course',
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'),
-- Else if the contextActivities parent is a GroupActivity, it's a multi
-- question problem and we use the grouping id
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].objectType'
)
in ('Activity', 'GroupActivity'),
JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'),
-- Otherwise use the object id
JSON_VALUE(event::String, '$.object.id')
)
= 'http://adlnet.gov/expapi/activities/course',
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'),
-- Else if the contextActivities parent is a GroupActivity, it's a multi
-- question problem and we use the grouping id
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].objectType')
in ('Activity', 'GroupActivity'),
JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'),
-- Otherwise use the object id
JSON_VALUE(event::String, '$.object.id')
) as course_id,
coalesce(
get_org_from_course_url(course_id), get_org_from_ccx_course_url(course_id), ''
toLowCardinality(splitByString('/', course_id)[-1]) as course_key,
toLowCardinality(
coalesce(
get_org_from_course_url(course_id),
get_org_from_ccx_course_url(course_id),
''
)
) as org,
emission_time as emission_time,
event::String as event
Expand Down
2 changes: 1 addition & 1 deletion models/completion/completion_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
Expand Down
10 changes: 6 additions & 4 deletions models/enrollment/enrollment_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
event,
'$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"'
toLowCardinality(
JSON_VALUE(
event,
'$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"'
)
) as enrollment_mode
from {{ ref("xapi_events_all_parsed") }}
where
Expand Down
2 changes: 1 addition & 1 deletion models/forum/forum_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
object_id,
actor_id,
verb_id
Expand Down
2 changes: 1 addition & 1 deletion models/grading/grading_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractFloat(event, 'result', 'score', 'scaled') as scaled_score
Expand Down
8 changes: 4 additions & 4 deletions models/instance/fact_instance_actors.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day)",
partition_by="(toYYYYMM(emission_day))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
date_trunc('day', emission_time) as emission_day,
uniqCombinedState(actor_id) as actors_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
group by emission_day
11 changes: 5 additions & 6 deletions models/instance/fact_instance_enrollments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("SummingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day, course_key, enrollment_mode, enrollment_status)",
partition_by="(toYYYYMM(emission_day))",
)
}}

Expand All @@ -19,11 +19,10 @@ with
)

select
date_trunc('hour', emission_time) as emission_hour,
courses.course_name as course_name,
date_trunc('day', emission_time) as emission_day,
enrollments.course_key,
enrollments.enrollment_mode as enrollment_mode,
enrollments.enrollment_status as enrollment_status,
count() as course_enrollment_mode_status_cnt
from enrollments
join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key
group by emission_hour, course_name, enrollment_mode, enrollment_status
group by emission_day, course_key, enrollment_mode, enrollment_status
8 changes: 4 additions & 4 deletions models/instance/fact_instance_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day)",
partition_by="(toYYYYMM(emission_day))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
date_trunc('day', emission_time) as emission_day,
uniqCombinedState(event_id) as events_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
group by emission_day
21 changes: 12 additions & 9 deletions models/instance/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,35 @@ models:
- name: fact_instance_events
description: "A materialized view summarizing site-wide xAPI event activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: events_cnt
data_type: int
description: "The number of xAPI events that occurred in the given hour"

- name: fact_instance_actors
description: "A materialized view summarizing site-wide user activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: actors_cnt
data_type: int
description: "The number of xAPI actors active in the given hour"

- name: fact_instance_enrollments
description: "A materialized view for summarizing site-wide enrollment activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: course_name
data_type: String
description: "The name of the course"
- name: course_key
data_type: String
description: "The course key for the course"
- name: enrollment_mode
data_type: string
description: "The name of the enrollment mode (ex: audit, honor)"
Expand Down
2 changes: 1 addition & 1 deletion models/navigation/navigation_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
splitByString('/xblock/', object_id)[-1] as block_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractString(event, 'object', 'definition', 'type') as object_type,
Expand Down
6 changes: 4 additions & 2 deletions models/problems/problem_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(event, '$.result.response') as responses,
Expand All @@ -25,7 +25,9 @@ select
cast(JSON_VALUE(event, '$.result.success') as Bool),
false
) as success,
JSON_VALUE(event, '$.object.definition.interactionType') as interaction_type,
toLowCardinality(
JSON_VALUE(event, '$.object.definition.interactionType')
) as interaction_type,
if(
verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated',
cast(
Expand Down
2 changes: 1 addition & 1 deletion models/video/video_playback_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
ceil(
Expand Down
4 changes: 2 additions & 2 deletions models/video/video_transcript_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
splitByString('/xblock/', object_id)[2] as video_id,
actor_id,
JSONExtractBool(
Expand All @@ -25,7 +25,7 @@ select
) as cc_enabled
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in ('http://adlnet.gov/expapi/verbs/interacted')
verb_id = 'http://adlnet.gov/expapi/verbs/interacted'
and JSONHas(
event,
'result',
Expand Down

0 comments on commit 188fb23

Please sign in to comment.