/* Get events for courses */ /* Runtime: 28s ; 884.64 GBs processed; 284,848,363 observations returned */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.events` AS SELECT SUBSTR(JSON_EXTRACT_SCALAR(event,"$[actor][id]"),-7,7) AS user_id, SUBSTR(JSON_EXTRACT_SCALAR(event,"$[session][id]"),-32,32) AS session_id, SUBSTR(JSON_EXTRACT_SCALAR(event,"$[group][extensions]['com.instructure.canvas'][entity_id]"),-7,7) AS course_id, event_time, JSON_EXTRACT_SCALAR(event,"$[type]") AS eventType, action, SUBSTR(JSON_EXTRACT_SCALAR(event,"$[object][id]"),24,40) as event_object, SUBSTR(JSON_EXTRACT_SCALAR(event,"$[object][name]"),-7,32) as object_name FROM `udp-iu-prod.event_store`.expanded WHERE event_time BETWEEN '2019-08-26' AND '2019-12-20' /* Create consenting enrollment table */ /* Runtmie 3s ; 1.6 MBs processed; 18,102 observations returned */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.student_enrollments_consent` AS SELECT u.id as boost_user_id, u.username, CAST(u.canvas_id AS STRING) AS user_canvas_id, e.course_id, CAST(c.canvas_course_id AS STRING) as canvas_course_id, e.enrollment_state FROM `iu-uits-pti-elearninglab.boost_fa19.users` u INNER JOIN `iu-uits-pti-elearninglab.boost_fa19.student_enrollments` e ON u.id = e.user_id INNER JOIN `iu-uits-pti-elearninglab.boost_fa19.courses` c ON e.course_id = c.id WHERE u.consented_at IS NOT NULL AND c.canvas_course_name like 'FA19%' /* Get submission information */ /* Runtime 6s; 10.93 GBs processed ; 250,200 observations returned */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.submissions` AS SELECT boost_user_id, username, user_canvas_id AS user_canvas_id_str, CAST(user_canvas_id AS INT64) AS user_canvas_id_int, sc.course_id as boost_course_id, canvas_course_id as canvas_course_id_str, CAST(canvas_course_id AS INt64) as canvas_course_id_int, a.id as assignment_id, a.canvas_id as assignment_canvas_id, a.title as assignment_title, a.due_at as assignment_due_at, a.points_possible as assignment_points_possible, sd.id as submission_id, sd.submitted_at, sf.published_score FROM `iu-uits-pti-elearninglab.boost_fa19.student_enrollments_consent` sc INNER JOIN `iu-uits-tlt-la.canvas_data.course_dim` c ON CAST(canvas_course_id AS INT64) = c.canvas_id INNER JOIN `iu-uits-tlt-la.canvas_data.assignment_dim` a ON c.id = a.course_id INNER JOIN `iu-uits-tlt-la.canvas_data.user_dim` u ON CAST(user_canvas_id AS INT64) = u.canvas_id INNER JOIN `iu-uits-tlt-la.canvas_data.submission_dim` sd ON a.id = sd.assignment_id AND u.id = sd.user_id INNER JOIN `iu-uits-tlt-la.canvas_data.submission_fact` sf ON sd.id = sf.submission_id WHERE a.submission_types NOT IN ('on_paper','Assignments', 'none') AND a.due_at IS NOT NULL and a.points_possible > 0 ORDER BY canvas_course_id, username, due_at /* Create dataset for assignment views, scores, and submission details */ /* Runtime 4s; 12.07 GBs processed; 142,923 observations */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.dataset_1` AS WITH assignment_visits AS ( SELECT DISTINCT a.boost_user_id, a.username, a.user_canvas_id_str, a.boost_course_id, a.canvas_course_id_str, assignment_id, a.assignment_title, a.assignment_points_possible, count(b.entity_id) as assignment_views FROM `iu-uits-pti-elearninglab.boost_fa19.submissions` a INNER JOIN `iu-uits-pti-elearninglab.boost_fa19.events` b ON a.user_canvas_id_str = b.user_id AND a.canvas_course_id_str = b.course_id AND CAST(a.assignment_id AS STRING) = b.entity_id WHERE b.action = 'NavigatedTo' AND b.course_id IS NOT NULL GROUP BY a.boost_user_id, a.user_canvas_id_str,a.username, a.canvas_course_id_str, a.boost_course_id, assignment_id, a.assignment_title, a.assignment_points_possible), subm_agg AS ( select a.boost_user_id, a.username, a.user_canvas_id_str, a.canvas_course_id_str, a.boost_course_id, a.assignment_id, a.assignment_title, a.assignment_points_possible, SUM(CASE WHEN a.submitted_at <= a.assignment_due_at THEN 1 ELSE 0 END) as ontime_subm_cnt, AVG(DATETIME_DIFF(a.assignment_due_at, a.submitted_at, MINUTE)) avg_subm_due_diff, AVG(a.published_score) AS avg_pub_scr FROM `iu-uits-pti-elearninglab.boost_fa19.submissions` a WHERE a.submitted_at IS NOT NULL GROUP BY a.boost_user_id, a.user_canvas_id_str,a.username, a.canvas_course_id_str,a.boost_course_id, a.assignment_id, a.assignment_title, a.assignment_points_possible ) SELECT a.boost_user_id, a.boost_course_id, a.assignment_id, a.assignment_title, a.assignment_points_possible, b.assignment_views, a.ontime_subm_cnt, a.avg_subm_due_diff, a.avg_pub_scr as pub_scr FROM subm_agg a LEFT JOIN assignment_visits b ON a.boost_user_id = b.boost_user_id AND a.canvas_course_id_str = b.canvas_course_id_str AND a.assignment_id = b.assignment_id ORDER BY boost_user_id, assignment_title /* Get Canvas Course Scores */ /* Runtime: 4s ; 860.81 MBs processed; 17,731 observations returned */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.fa19_canvas_scr_data` AS SELECT a.boost_user_id, a.username, a.user_canvas_id, sha256(a.username) as hashed_username, a.course_id as boost_course_id, a.canvas_course_id, f.current_score as canvas_est_scr FROM `iu-uits-pti-elearninglab.boost_fa19.student_enrollments_consent` a INNER JOIN `iu-uits-tlt-la.canvas_data.user_dim` b ON CAST(a.user_canvas_id AS INT64) = b.canvas_id INNER JOIN `iu-uits-tlt-la.canvas_data.course_dim` c ON CAST(canvas_course_id AS INT64) = c.canvas_id INNER JOIN `iu-uits-tlt-la.canvas_data.enrollment_dim` d ON c.id = d.course_id AND b.id = d.user_id LEFT JOIN `iu-uits-tlt-la.canvas_data.course_score_dim` e ON d.id = e.enrollment_id LEFT JOIN `iu-uits-tlt-la.canvas_data.course_score_fact` f ON e.id = f.score_id WHERE e.workflow_state = 'active' ORDER BY canvas_course_id, username /* Create dataset for Canvas Course Scores */ /* Runtime: 1s ; 407.86 KBs processed; 17731 observations returned */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.boost_fa19.dataset_2` AS SELECT boost_user_id, boost_course_id, canvas_est_scr FROM `iu-uits-pti-elearninglab.boost_fa19.fa19_canvas_scr_data`