/* All SQL scripts were executed within IU's GCP Bigquery environment */ /* Get Course Assignment List */ CREATE OR REPLACE TABLE `iu-uits-pti-elearninglab.Hossain21.course_assignment_list` AS SELECT a.id as course_id, a.canvas_id as canvas_course_id, a.sis_source_id as course_sis_id, b.id as assignment_id, b.canvas_id as canvas_assignment_id, b.title as assignment_title, b.due_at as assignment_deadline, b.points_possible as assignment_points FROM `iu-uits-tlt-la.canvas_data.course_dim` a INNER JOIN `iu-uits-tlt-la.canvas_data.assignment_dim` b ON a.id = b.course_id WHERE a.canvas_id IN (1929867,1885883, 1969078, 2017562 ) /* Runtime 2 seconds; processed 520.11 MB */ /* Gets submission details from course assignments */ create or replace table `iu-uits-pti-elearninglab.Hossain21.submission_details` as select a.course_id, a.canvas_course_id, a.course_sis_id, a.assignment_id, a.canvas_assignment_id, a.assignment_title, a.assignment_deadline, a.assignment_points, b.user_id, u.canvas_id as canvas_user_id, b.id as submission_id, b.canvas_id as canvas_submission_id, b.submitted_at, b.attempt, b.quiz_submission_id, timestamp_diff(a.assignment_deadline, b.submitted_at, MINUTE) submission_time_from_deadline, b.grade, c.score from `iu-uits-pti-elearninglab.Hossain21.course_assignment_list` a inner join `iu-uits-tlt-la.canvas_data.submission_dim` b on a.assignment_id = b.assignment_id inner join `iu-uits-tlt-la.canvas_data.submission_fact` c on b.id = c.submission_id inner join `iu-uits-tlt-la.canvas_data.user_dim` u ON b.user_id = u.id order by a.course_id, b.user_id, a.assignment_deadline /* Runtime 4 seconds; 11.53 GB processed */ /* Computes summaries of assignment submission records */ create or replace table `iu-uits-pti-elearninglab.Hossain21.course_submission_summaries` AS with course_measures as ( select canvas_course_id, course_sis_id, assignment_id, assignment_title, assignment_deadline, assignment_points, date_from_unix_date(CAST(avg(unix_date(extract(DATE FROM submitted_at))) AS INT64)) as avg_subm_date, timestamp_seconds(CAST(avg(unix_seconds(submitted_at)) AS INT64 )) as avg_subm_timestamp, round(avg(score),2) as avg_subm_score from `iu-uits-pti-elearninglab.Hossain21.submission_details` group by canvas_course_id, course_sis_id,assignment_id, assignment_title, assignment_deadline, assignment_points ) select a.canvas_course_id, a.course_sis_id, a.assignment_title, a.assignment_deadline, a.assignment_points, a.avg_subm_date, a.avg_subm_timestamp, a.avg_subm_score, c.name as student, p.sis_user_id as prsn_univ_id, b.submitted_at, b.score, b.submission_time_from_deadline, from course_measures a left join `iu-uits-pti-elearninglab.Hossain21.submission_details` b ON a.canvas_course_id = b.canvas_course_id and a.assignment_id = b.assignment_id inner join `iu-uits-tlt-la.canvas_data.user_dim` c ON b.user_id = c.id inner join `iu-uits-tlt-la.canvas_data.pseudonym_dim` p ON c.id = p.user_id where p.sis_user_id not like '8000%' and c.name <> 'Test Student' order by canvas_course_id, assignment_title, assignment_deadline, c.name /* Runtime 5 seconds; 1.17 GB processed */ /* Gets final exam scores for specified courses */ create or replace table `iu-uits-pti-elearninglab.Hossain21.course_final_scores` AS select c.canvas_id as canvas_course_id, c.sis_source_id as course_sis_id, u.name as student_name, p.sis_user_id as prsn_univ_id, csf.current_score as course_final_score from `iu-uits-tlt-la.canvas_data.course_dim` c inner join `iu-uits-tlt-la.canvas_data.enrollment_dim` e ON c.id = e.course_id inner join `iu-uits-tlt-la.canvas_data.user_dim` u ON e.user_id = u.id inner join `iu-uits-tlt-la.canvas_data.pseudonym_dim` p ON u.id = p.user_id inner join `iu-uits-tlt-la.canvas_data.course_score_dim` csd ON e.id = csd.enrollment_id inner join `iu-uits-tlt-la.canvas_data.course_score_fact` csf ON csd.id = csf.score_id where c.sis_source_id in ('SP20-KO-INFO-I101-28614', 'SP21-KO-INFO-I101-28793','FA20-KO-CSCI-C101-30816','FA21-KO-CSCI-C101-31533') and e.type = 'StudentEnrollment' and e.workflow_state = 'active' and csd.workflow_state = 'active' and p.unique_name not like '8000%' /* Runtime 8 seconds; processed 1.18 GB */ ##################################################################################### R Transformation/Pivot Script ######################################################################### ## This script takes the flat files developed from the above queries, splits the courses, transposes the courses, and merges the final exam score to the transposed file. # Load in tidyr library(tidyverse) # Read in data data = read.csv("Hossain22_submisission_details.csv") final_scores = read.csv("hossain22_final_scores.csv") # Seperate out courses fa20_csci101 = subset(data, canvas_course_id == 1929867 ) fa21_csci101 = subset(data, canvas_course_id == 2017562 ) sp20_info101 = subset(data, canvas_course_id == 1885883 ) sp21_info101 = subset(data, canvas_course_id == 1969078 ) # pivot_wider taking values from specified columns and ids from student-course fields fa20_csci101_wide = pivot_wider(fa20_csci101,id_cols = c(student, prsn_univ_id, canvas_course_id, course_sis_id), names_from = assignment_title, names_sort = TRUE, values_from = c(assignment_deadline, assignment_points, avg_subm_date, avg_subm_timestamp, avg_subm_score, submitted_at, score, submission_time_from_deadline)) suffixes = unique(fa20_csci101$assignment_title) names_to_order <- map(suffixes, ~ names(fa20_csci101_wide)[grep(paste0("_",.x), names(fa20_csci101_wide))]) %>% unlist names_id <- setdiff(names(fa20_csci101_wide), names_to_order) names_id <- names_id[1:4] fa20_csci101_wide_sorted = fa20_csci101_wide %>% select(names_id, names_to_order) fa20_csci_final = merge(fa20_csci101_wide_sorted, final_scores, by = c("canvas_course_id", "prsn_univ_id")) fa20_csci_final$course_sis_id.y <- NULL fa20_csci_final$student_name <- NULL names(fa20_csci_final)[4] <- "course_sis_id" write.csv(fa20_csci_final, "Hossain22_fa20_csci101_wide_sorted.csv",row.names = FALSE) # Repeat for each class; fa21_csci101 fa21_csci101_wide = pivot_wider(fa21_csci101,id_cols = c(student, prsn_univ_id, canvas_course_id, course_sis_id), names_from = assignment_title,values_from = c(assignment_deadline, assignment_points, avg_subm_date, avg_subm_timestamp, avg_subm_score, submitted_at, score, submission_time_from_deadline) ) suffixes = unique(fa21_csci101$assignment_title) names_to_order <- map(suffixes, ~ names(fa21_csci101_wide)[grep(paste0("_",.x), names(fa21_csci101_wide))]) %>% unlist names_id <- setdiff(names(fa21_csci101_wide), names_to_order) names_id <- names_id[1:4] fa21_csci101_wide_sorted = fa21_csci101_wide %>% select(names_id, names_to_order) fa21_csci_final = merge(fa21_csci101_wide_sorted, final_scores, by = c("canvas_course_id", "prsn_univ_id")) fa21_csci_final$course_sis_id.y <- NULL fa21_csci_final$student_name <- NULL names(fa21_csci_final)[4] <- "course_sis_id" write.csv(fa21_csci_final, "Hossain22_fa21_csci101_wide_sorted.csv",row.names = FALSE) # sp20_info101 sp20_info101_wide = pivot_wider(sp20_info101,id_cols = c(student, prsn_univ_id, canvas_course_id, course_sis_id), names_from = assignment_title, values_from = c(assignment_deadline, assignment_points, avg_subm_date, avg_subm_timestamp, avg_subm_score, submitted_at, score, submission_time_from_deadline)) suffixes = unique(sp20_info101$assignment_title) names_to_order <- map(suffixes, ~ names(sp20_info101_wide)[grep(paste0("_",.x), names(sp20_info101_wide))]) %>% unlist names_id <- setdiff(names(sp20_info101_wide), names_to_order) names_id <- names_id[1:4] sp20_info101_wide_sorted = sp20_info101_wide %>% select(names_id, names_to_order) sp20_info_final = merge(sp20_info101_wide_sorted, final_scores, by = c("canvas_course_id", "prsn_univ_id")) sp20_info_final$course_sis_id.y <- NULL sp20_info_final$student_name <- NULL names(sp20_info_final)[4] <- "course_sis_id" write.csv(sp20_info_final, "Hossain22_sp20_info101_wide_sorted.csv",row.names = FALSE) # sp21_info101 sp21_info101_wide = pivot_wider(sp21_info101,id_cols = c(student, prsn_univ_id, canvas_course_id, course_sis_id), names_from = assignment_title, values_from = c(assignment_deadline, assignment_points, avg_subm_date, avg_subm_timestamp, avg_subm_score, submitted_at, score, submission_time_from_deadline)) suffixes = unique(sp21_info101$assignment_title) names_to_order <- map(suffixes, ~ names(sp21_info101_wide)[grep(paste0("_",.x), names(sp21_info101_wide))]) %>% unlist names_id <- setdiff(names(sp21_info101_wide), names_to_order) names_id = names_id[1:4] sp21_info101_wide_sorted = sp21_info101_wide %>% select(names_id, names_to_order) sp21_info_final = merge(sp21_info101_wide_sorted, final_scores, by = c("canvas_course_id", "prsn_univ_id")) sp21_info_final$course_sis_id.y <- NULL sp21_info_final$student_name <- NULL names(sp21_info_final)[4] <- "course_sis_id" write.csv(sp21_info_final, "Hossain22_sp21_info101_wide_sorted.csv",row.names = FALSE)