diff --git a/.DS_Store b/.DS_Store index 651b7e0..2522aab 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/COVID-19-Analysis/.DS_Store b/COVID-19-Analysis/.DS_Store index ca32dc6..a1ad077 100644 Binary files a/COVID-19-Analysis/.DS_Store and b/COVID-19-Analysis/.DS_Store differ diff --git a/COVID-19-Analysis/COVID19-Analysis.tar.gz b/COVID-19-Analysis/COVID19-Analysis.tar.gz new file mode 100644 index 0000000..ade71da Binary files /dev/null and b/COVID-19-Analysis/COVID19-Analysis.tar.gz differ diff --git a/COVID-19-Analysis/Covid-19-analysis Query.zip b/COVID-19-Analysis/Covid-19-analysis Query.zip index a748193..1186458 100644 Binary files a/COVID-19-Analysis/Covid-19-analysis Query.zip and b/COVID-19-Analysis/Covid-19-analysis Query.zip differ diff --git a/COVID-19-Analysis/covid19.tar.gz b/COVID-19-Analysis/covid19.tar.gz deleted file mode 100644 index 6f76d96..0000000 Binary files a/COVID-19-Analysis/covid19.tar.gz and /dev/null differ diff --git a/COVID-19-Analysis/data/data.zip b/COVID-19-Analysis/data/covid-19-data.zip similarity index 100% rename from COVID-19-Analysis/data/data.zip rename to COVID-19-Analysis/data/covid-19-data.zip diff --git a/COVID-19-Analysis/db_scripts/queries/age_distribution.gsql b/COVID-19-Analysis/db_scripts/queries/age_distribution.gsql index ab5027b..87f7ce4 100644 --- a/COVID-19-Analysis/db_scripts/queries/age_distribution.gsql +++ b/COVID-19-Analysis/db_scripts/queries/age_distribution.gsql @@ -9,9 +9,9 @@ CREATE QUERY age_distribution() FOR GRAPH MyGraph SYNTAX V2 { MapAccum @@age_map; start = {Patient.*}; - start = select s from start:s - where s.state != "deceased" - accum @@age_map += ((year(now()) - s.birth_year) -> 1); + start = SELECT s FROM start:s + WHERE s.state != "deceased" + ACCUM @@age_map += ((year(now()) - s.birth_year) -> 1); - print @@age_map; + PRINT @@age_map; } \ No newline at end of file diff --git a/COVID-19-Analysis/db_scripts/queries/edge_crawl.gsql b/COVID-19-Analysis/db_scripts/queries/edge_crawl.gsql index fe5c242..27a3977 100644 --- a/COVID-19-Analysis/db_scripts/queries/edge_crawl.gsql +++ b/COVID-19-Analysis/db_scripts/queries/edge_crawl.gsql @@ -1,4 +1,4 @@ -CREATE QUERY edge_crawl()FOR GRAPH MyGraph SYNTAX V2 { +CREATE QUERY edge_crawl() FOR GRAPH MyGraph SYNTAX V2 { /************************************** * S1 = Grabbing all Patients that a Patient Infected * S2 = Grabbing all Patients belonging to a well known Case diff --git a/COVID-19-Analysis/db_scripts/queries/infection_subgraph.gsql b/COVID-19-Analysis/db_scripts/queries/infection_subgraph.gsql index 2768df5..acf2326 100644 --- a/COVID-19-Analysis/db_scripts/queries/infection_subgraph.gsql +++ b/COVID-19-Analysis/db_scripts/queries/infection_subgraph.gsql @@ -15,31 +15,31 @@ CREATE QUERY infection_subgraph(vertex p) FOR GRAPH MyGraph SYNTAX V2 { ENDTIME = p.released_date; infected (ANY) = {p}; - infected = select s from infected:s - accum s.@visited = true; + infected = SELECT s FROM infected:s + ACCUM s.@visited = TRUE; - while infected.size() > 0 limit 100 do - places = select t from infected:s-(PATIENT_TRAVELED:e)-TravelEvent:t - where (t.visited_date >= STARTTIME and - t.visited_date <= ENDTIME and + WHILE infected.size() > 0 LIMIT 100 DO + places = SELECT t FROM infected:s-(PATIENT_TRAVELED:e)-TravelEvent:t + WHERE (t.visited_date >= STARTTIME AND + t.visited_date <= ENDTIME AND t.outdegree("PATIENT_TRAVELED") > 1) - and t.@visited == false - accum t.@visited = true, + AND t.@visited == FALSE + ACCUM t.@visited = TRUE, @@edge_set += e; - same_place = select t from places:s-(PATIENT_TRAVELED:e)-Patient:t - where t.@visited == false - accum t.@visited = true, + same_place = SELECT t FROM places:s-(PATIENT_TRAVELED:e)-Patient:t + WHERE t.@visited == FALSE + ACCUM t.@visited = TRUE, @@edge_set += e; - people = select t from infected:s-(reverse_INFECTED_BY>:e)-:t - where (STARTTIME <= t.symptom_onset_date and ENDTIME >= t.symptom_onset_date) - and t.@visited == false - accum t.@visited = true, + people = SELECT t FROM infected:s-(reverse_INFECTED_BY>:e)-:t + WHERE (STARTTIME <= t.symptom_onset_date AND ENDTIME >= t.symptom_onset_date) + AND t.@visited == FALSE + ACCUM t.@visited = TRUE, @@edge_set += e; infected = same_place UNION people; - end; + END; - print @@edge_set; + PRINT @@edge_set; } \ No newline at end of file diff --git a/COVID-19-Analysis/db_scripts/queries/most_direct_infections.gsql b/COVID-19-Analysis/db_scripts/queries/most_direct_infections.gsql index 8ded0d2..f476493 100644 --- a/COVID-19-Analysis/db_scripts/queries/most_direct_infections.gsql +++ b/COVID-19-Analysis/db_scripts/queries/most_direct_infections.gsql @@ -8,10 +8,10 @@ CREATE QUERY most_direct_infections() FOR GRAPH MyGraph SYNTAX V2 { start (ANY) = {Patient.*}; //while Start.size() > 0 do - start = select s from start:s-(reverse_INFECTED_BY>:e)-Patient:t - accum s.@num_direct_infections += 1 - order by s.outdegree("reverse_INFECTED_BY") desc - limit 1; + start = SELECT s FROM start:s-(reverse_INFECTED_BY>:e)-Patient:t + ACCUM s.@num_direct_infections += 1 + ORDER BY s.outdegree("reverse_INFECTED_BY") DESC + LIMIT 1; - print start as Answer; + PRINT start AS Answer; } \ No newline at end of file diff --git a/COVID-19-Analysis/db_scripts/queries/use_map.gsql b/COVID-19-Analysis/db_scripts/queries/use_map.gsql index 772deed..1a299ad 100644 --- a/COVID-19-Analysis/db_scripts/queries/use_map.gsql +++ b/COVID-19-Analysis/db_scripts/queries/use_map.gsql @@ -7,9 +7,9 @@ CREATE QUERY use_map(/* Parameters here */) FOR GRAPH MyGraph SYNTAX v2{ //OrAccum @visted; C1 = {City.*}; S1 = SELECT v1 FROM C1:v1-(TRAVEL_EVENT_IN:e1)-TravelEvent:v2-(PATIENT_TRAVELED:e2)-Patient:v3 - //WHERE v3.@visted == false + //WHERE v3.@visted == FALSE ACCUM @@patients_by_city_map += (v1.city -> v3.patient_id); - //POST-ACCUM v3.@visted = true; + //POST-ACCUM v3.@visted = TRUE; - PRINT @@patients_by_city_map As Patient_Count_In_City; + PRINT @@patients_by_city_map AS Patient_Count_In_City; } \ No newline at end of file diff --git a/Customer-360-Attribution-and-Engagement-Graph/.DS_Store b/Customer-360-Attribution-and-Engagement-Graph/.DS_Store index 1a5aafb..e1a5a0f 100644 Binary files a/Customer-360-Attribution-and-Engagement-Graph/.DS_Store and b/Customer-360-Attribution-and-Engagement-Graph/.DS_Store differ diff --git a/Customer-360-Attribution-and-Engagement-Graph/Customer-360 Query.zip b/Customer-360-Attribution-and-Engagement-Graph/Customer-360 Query.zip index d8ad50b..e15a354 100644 Binary files a/Customer-360-Attribution-and-Engagement-Graph/Customer-360 Query.zip and b/Customer-360-Attribution-and-Engagement-Graph/Customer-360 Query.zip differ diff --git a/Customer-360-Attribution-and-Engagement-Graph/Customer-Attribution.tar.gz b/Customer-360-Attribution-and-Engagement-Graph/Customer-Attribution.tar.gz new file mode 100644 index 0000000..3c55ddf Binary files /dev/null and b/Customer-360-Attribution-and-Engagement-Graph/Customer-Attribution.tar.gz differ diff --git a/Customer-360-Attribution-and-Engagement-Graph/cust-attribution.tar.gz b/Customer-360-Attribution-and-Engagement-Graph/cust-attribution.tar.gz deleted file mode 100644 index de2b6c8..0000000 Binary files a/Customer-360-Attribution-and-Engagement-Graph/cust-attribution.tar.gz and /dev/null differ diff --git a/Customer-360-Attribution-and-Engagement-Graph/data/data.zip b/Customer-360-Attribution-and-Engagement-Graph/data/customer-360-data.zip similarity index 100% rename from Customer-360-Attribution-and-Engagement-Graph/data/data.zip rename to Customer-360-Attribution-and-Engagement-Graph/data/customer-360-data.zip diff --git a/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_contacts.gsql b/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_contacts.gsql index e9f536c..fab67c9 100644 --- a/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_contacts.gsql +++ b/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_contacts.gsql @@ -32,25 +32,26 @@ CREATE QUERY similar_contacts(VERTEX source_customer, FROM A:s -(is_connected_to>:e)- CampaignMember:t; PRINT campaign_members_set.size(); - // From campaign_members_set, traverse 'is_part_of' edges to Campaigns, for all - // desired campaign_types (e.g. Webinar, Website Direct, Demo Signup/Trial) + /* From campaign_members_set, traverse 'is_part_of' edges to Campaigns, for all + desired campaign_types (e.g. Webinar, Website Direct, Demo Signup/Trial) */ campaign_set = SELECT t FROM campaign_members_set:s -(is_part_of>:e)- Campaign:t WHERE campaign_types.size() == 0 OR (t.Campaign_Type IN campaign_types); PRINT campaign_set.size(); - // From campaign_set, traverse reverse_is_part_of edges back to all - // CampaignMembers + /* From campaign_set, traverse reverse_is_part_of edges back to all + CampaignMembers */ rev_campaign_members_set = SELECT t FROM campaign_set:s -(reverse_is_part_of>:e)- CampaignMember:t; PRINT rev_campaign_members_set.size(); - // From CampaignMemberSet, traverse 'reverse_is_connected_to' edges back to - // Contacts, (set B). For each Contact in set B, accumulate the intersection - // size of the shared Campaigns, and ompute its Jaccard Similarity score as - // intersection_size / (size_A + size_B - intersection_size) + /* From CampaignMemberSet, traverse 'reverse_is_connected_to' edges back to + Contacts, (set B). For each Contact in set B, accumulate the intersection + size of the shared Campaigns, and ompute its Jaccard Similarity score as + intersection_size / (size_A + size_B - intersection_size) + */ B = SELECT t FROM rev_campaign_members_set:s -(reverse_is_connected_to>:e)- Contact:t WHERE t != source_customer diff --git a/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_customers.gsql b/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_customers.gsql index 96dfcd0..1d10733 100644 --- a/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_customers.gsql +++ b/Customer-360-Attribution-and-Engagement-Graph/db_scripts/queries/similar_customers.gsql @@ -38,8 +38,8 @@ CREATE QUERY similar_customers(VERTEX source_customer, campaign_members_set = SELECT t FROM A:s -(is_connected_to:e)-> CampaignMember:t; - // From CampaignMember s, traverse 'is_part_of' edges to Campaign s, for all - // desired campaign_type_set (eg. Webinar, Website Direct, Demo Signup/Trial) + /* From CampaignMember s, traverse 'is_part_of' edges to Campaign s, for all + desired campaign_type_set (eg. Webinar, Website Direct, Demo Signup/Trial) */ campaign_set = SELECT t FROM campaign_members_set -(is_part_of:e)-> Campaign:t WHERE campaign_type_set.size() == 0 OR (t.Campaign_Type in campaign_type_set); @@ -48,11 +48,11 @@ CREATE QUERY similar_customers(VERTEX source_customer, campaign_members_set = SELECT t FROM campaign_set:s -(reverse_is_part_of:e)-> CampaignMember:t; - // From CampaignMember s, traverse 'reverse_is_connected_to' edges back to Contacts (B set) - // For each Contact in B set, accumulate the intersection size of the shared Campaigns, and - // compute it's Jaccard Similarity score as - // Jaccard similarity = intersection_size / size of the Union of (A set + B set) - // = intersection_size / (size_A + size_B - intersection_size) + /* From CampaignMember s, traverse 'reverse_is_connected_to' edges back to Contacts (B set) + For each Contact in B set, accumulate the intersection size of the shared Campaigns, and + compute it's Jaccard Similarity score as + Jaccard similarity = intersection_size / size of the Union of (A set + B set) + = intersection_size / (size_A + size_B - intersection_size) */ B = SELECT t FROM campaign_members_set:s -(reverse_is_connected_to:e)-> Contact:t WHERE t != source_customer diff --git a/Cybersecurity-Threat-Detection-IT/.DS_Store b/Cybersecurity-Threat-Detection-IT/.DS_Store index 4bd29e0..40a7ffb 100644 Binary files a/Cybersecurity-Threat-Detection-IT/.DS_Store and b/Cybersecurity-Threat-Detection-IT/.DS_Store differ diff --git a/Cybersecurity-Threat-Detection-IT/Cyber-Security-Threat-Detectiontar.gz.tar.gz b/Cybersecurity-Threat-Detection-IT/Cyber-Security-Threat-Detectiontar.gz.tar.gz new file mode 100644 index 0000000..c92d538 Binary files /dev/null and b/Cybersecurity-Threat-Detection-IT/Cyber-Security-Threat-Detectiontar.gz.tar.gz differ diff --git a/Cybersecurity-Threat-Detection-IT/Cybersecurity Threat Detection Query.zip b/Cybersecurity-Threat-Detection-IT/Cybersecurity Threat Detection Query.zip index 6ee5162..441d588 100644 Binary files a/Cybersecurity-Threat-Detection-IT/Cybersecurity Threat Detection Query.zip and b/Cybersecurity-Threat-Detection-IT/Cybersecurity Threat Detection Query.zip differ diff --git a/Cybersecurity-Threat-Detection-IT/cyber-security.tar.gz b/Cybersecurity-Threat-Detection-IT/cyber-security.tar.gz deleted file mode 100644 index 06a6d2f..0000000 Binary files a/Cybersecurity-Threat-Detection-IT/cyber-security.tar.gz and /dev/null differ diff --git a/Cybersecurity-Threat-Detection-IT/data/data.zip b/Cybersecurity-Threat-Detection-IT/data/cybersecurity-threat-data.zip similarity index 100% rename from Cybersecurity-Threat-Detection-IT/data/data.zip rename to Cybersecurity-Threat-Detection-IT/data/cybersecurity-threat-data.zip diff --git a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/alert_source_tracking.gsql b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/alert_source_tracking.gsql index 5dc90a8..32cf0b2 100644 --- a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/alert_source_tracking.gsql +++ b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/alert_source_tracking.gsql @@ -14,40 +14,40 @@ CREATE QUERY alert_source_tracking(int days=7, int k=3) FOR GRAPH MyGraph SYNTAX start = {@@alert_type_set}; - file_corrupted_alerts = select alert from start-(Alert_Has_Type:e)-:alert - accum alert.@path_list += e; + file_corrupted_alerts = SELECT alert FROM start-(Alert_Has_Type:e)-:alert + ACCUM alert.@path_list += e; - service = select serv from file_corrupted_alerts:alert-(Service_Alert:e)-:serv - accum serv.@alert_date += alert.Alert_Date, serv.@path_list += alert.@path_list, serv.@path_list += e; + service = SELECT serv FROM file_corrupted_alerts:alert-(Service_Alert:e)-:serv + ACCUM serv.@alert_date += alert.Alert_Date, serv.@path_list += alert.@path_list, serv.@path_list += e; - read_events = select event from service:s-(From_Service:e)-:event + read_events = SELECT event FROM service:s-(From_Service:e)-:event // the read date is within one day before the alert - where datetime_diff(s.@alert_date,event.Start_Date) between 0 and 3600*24*days - and event.Event_Type == "read" - accum event.@path_list += s.@path_list, event.@path_list += e; + WHERE datetime_diff(s.@alert_date,event.Start_Date) BETWEEN 0 AND 3600*24*days + AND event.Event_Type == "read" + ACCUM event.@path_list += s.@path_list, event.@path_list += e; - resource = select res from read_events:s-(Read_From_Resource:e)-:res + resource = SELECT res FROM read_events:s-(Read_From_Resource:e)-:res // keep the most recent read date of the resource - accum res.@read_date += s.Start_Date, res.@path_list += s.@path_list, res.@path_list += e; + ACCUM res.@read_date += s.Start_Date, res.@path_list += s.@path_list, res.@path_list += e; - write_events = select event from resource:s-(Output_To_Resource:e)-:event + write_events = SELECT event FROM resource:s-(Output_To_Resource:e)-:event // the write date is within one day before the read - where datetime_diff(s.@read_date,event.Start_Date) between 0 and 3600*24*days - and event.Event_Type == "write" - accum event.@path_list += s.@path_list, event.@path_list += e; + WHERE datetime_diff(s.@read_date,event.Start_Date) BETWEEN 0 AND 3600*24*days + AND event.Event_Type == "write" + ACCUM event.@path_list += s.@path_list, event.@path_list += e; // get the users who behaved the file writing operation - users = select user from write_events:s-(User_Event:e)-:user - accum user.@count += 1, user.@path_list += s.@path_list, user.@path_list += e; + users = SELECT user FROM write_events:s-(User_Event:e)-:user + ACCUM user.@count += 1, user.@path_list += s.@path_list, user.@path_list += e; - login_events = select event from users:s-(User_Event:e)-:event + login_events = SELECT event FROM users:s-(User_Event:e)-:event where event.Event_Type == "login" - accum event.@count += s.@count, event.@path_list += s.@path_list, event.@path_list += e; + ACCUM event.@count += s.@count, event.@path_list += s.@path_list, event.@path_list += e; - login_IP = select ip from login_events:s-(Has_IP:e)-:ip - accum ip.@count += s.@count, ip.@path_list += s.@path_list, ip.@path_list += e - order by ip.@count desc - limit k; + login_IP = SELECT ip FROM login_events:s-(Has_IP:e)-:ip + ACCUM ip.@count += s.@count, ip.@path_list += s.@path_list, ip.@path_list += e + ORDER BY ip.@count DESC + LIMIT k; - print login_IP [login_IP.@count, login_IP.@path_list]; + PRINT login_IP [login_IP.@count, login_IP.@path_list]; } \ No newline at end of file diff --git a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/firewall_bypass_detection.gsql b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/firewall_bypass_detection.gsql index fa64ef1..e8cd2af 100644 --- a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/firewall_bypass_detection.gsql +++ b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/firewall_bypass_detection.gsql @@ -9,25 +9,25 @@ CREATE QUERY firewall_bypass_detection(/* Parameters here */) FOR GRAPH MyGraph resources = {Resource.*}; - resources = select res from resources:res - where res.Firewall_Required == true; + resources = SELECT res FROM resources:res + WHERE res.Firewall_Required == TRUE; - events = select event from resources-(Read_From_Resource)-:event - where event.Event_Type == "read"; + events = SELECT event FROM resources-(Read_From_Resource)-:event + WHERE event.Event_Type == "read"; - IP_userID = select t from events:s-((Has_IP|User_Event):e)-:t - accum t.@read_time += s.Start_Date; + IP_userID = SELECT t FROM events:s-((Has_IP|User_Event):e)-:t + ACCUM t.@read_time += s.Start_Date; - IP_userID_firewall = select s from IP_userID:s-((Has_IP|User_Event):e)-:event - where event.Event_Type == "firewall" - accum s.@firewall_time += event.Start_Date; + IP_userID_firewall = SELECT s FROM IP_userID:s-((Has_IP|User_Event):e)-:event + WHERE event.Event_Type == "firewall" + ACCUM s.@firewall_time += event.Start_Date; - IP_userID_no_firewall = IP_userID minus IP_userID_firewall; + IP_userID_no_firewall = IP_userID MINUS IP_userID_firewall; - print IP_userID_no_firewall; + PRINT IP_userID_no_firewall; - IP_userID_bypass_firewall = select s from IP_userID_firewall:s - where s.@read_time.size() > s.@firewall_time.size(); + IP_userID_bypass_firewall = SELECT s FROM IP_userID_firewall:s + WHERE s.@read_time.size() > s.@firewall_time.size(); - print IP_userID_bypass_firewall; + PRINT IP_userID_bypass_firewall; } \ No newline at end of file diff --git a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/flooding_detection.gsql b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/flooding_detection.gsql index 8acef66..2e83686 100644 --- a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/flooding_detection.gsql +++ b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/flooding_detection.gsql @@ -1,10 +1,10 @@ CREATE QUERY flooding_detection(/* Parameters here */) FOR GRAPH MyGraph { - // this query detects the addresses that flooded one service with large number of requests. - // and possibly the ip addresses have logged in to many accounts - // [IP] -> [login event] -> [user id] -> [request event] -> [service] + /* this query detects the addresses that flooded one service with large number of requests. + and possibly the ip addresses have logged in to many accounts + [IP] -> [login event] -> [user id] -> [request event] -> [service] */ - TypeDef tuple result_tuple; + TYPEDEF TUPLE result_tuple; AvgAccum @mean; SumAccum @std; MapAccum> @count_map; @@ -12,40 +12,40 @@ CREATE QUERY flooding_detection(/* Parameters here */) FOR GRAPH MyGraph { IPs = {IP.*}; - login_events = select event from IPs-(Has_IP)-:event - where event.Event_Type == "login" - accum event.@count_map += (IPs->1); + login_events = SELECT event FROM IPs-(Has_IP)-:event + WHERE event.Event_Type == "login" + ACCUM event.@count_map += (IPs->1); - users = select user from login_events-(User_Event)-:user - accum user.@count_map += login_events.@count_map; + users = SELECT user FROM login_events-(User_Event)-:user + ACCUM user.@count_map += login_events.@count_map; - events = select event from users-(User_Event:e)-:event + events = SELECT event FROM users-(User_Event:e)-:event where event.Event_Type == "request" - accum event.@count_map += users.@count_map; + ACCUM event.@count_map += users.@count_map; - services = select s from events-(To_Service)-:s - accum s.@count_map += events.@count_map - post-accum - case when s.@count_map.size() > 1 then + services = SELECT s FROM events-(To_Service)-:s + ACCUM s.@count_map += events.@count_map + POST-ACCUM + CASE WHEN s.@count_map.size() > 1 THEN // calculate the mean - foreach (user,cnt) in s.@count_map do + FOREACH (user,cnt) in s.@count_map DO s.@mean += cnt - end, + END, // calculate the standard deviation - foreach (user,cnt) in s.@count_map do + FOREACH (user,cnt) in s.@count_map DO s.@std += pow(cnt - s.@mean, 2) - end, + END, s.@std = sqrt(s.@std/(s.@count_map.size()-1)), - case when s.@std != 0 then + CASE WHEN s.@std != 0 THEN // calculate the out lier - foreach (user,cnt) in s.@count_map do - case when cnt-s.@mean > 3*s.@std then + FOREACH (user,cnt) IN s.@count_map DO + CASE WHEN cnt-s.@mean > 3*s.@std THEN @@result_list += result_tuple(user,s,cnt,s.@mean,s.@std) - end - end - end - end; + END + END + END + END; - print @@result_list; - print services; + PRINT @@result_list; + PRINT services; } \ No newline at end of file diff --git a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/footprinting_detection.gsql b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/footprinting_detection.gsql index 9ba910e..51dba75 100644 --- a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/footprinting_detection.gsql +++ b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/footprinting_detection.gsql @@ -1,9 +1,9 @@ CREATE QUERY footprinting_detection(datetime start_date = to_datetime("2019-05-01 11:10:00"), datetime end_date = to_datetime("2019-05-01 11:15:00")) FOR GRAPH MyGraph SYNTAX V2 { - // this query detects user id that calls many endpoints of a service in short period of time in order to find its vulnerability - // [user id] -> [request events] -> [service] + /* this query detects user id that calls many endpoints of a service in short period of time in order to find its vulnerability + [user id] -> [request events] -> [service] */ - TypeDef tuple result_tuple; + TYPEDEF TUPLE result_tuple; AvgAccum @mean; SumAccum @std; MapAccum> @api_map; @@ -11,36 +11,36 @@ CREATE QUERY footprinting_detection(datetime start_date = to_datetime("2019-05-0 events = {Event.*}; - events = select s from events:s - where s.Start_Date > start_date and s.Start_Date < end_date - and s.Event_Type == "request"; + events = SELECT s FROM events:s + WHERE s.Start_Date > start_date AND s.Start_Date < end_date + AND s.Event_Type == "request"; - events = select event from events:event-(User_Event)-:user - accum event.@api_map += (user -> event.Endpoint); + events = SELECT event FROM events:event-(User_Event)-:user + ACCUM event.@api_map += (user -> event.Endpoint); - services = select s from events:event-(To_Service)-:s - accum s.@api_map += event.@api_map - post-accum - case when s.@api_map.size() > 1 then + services = SELECT s from events:event-(To_Service)-:s + ACCUM s.@api_map += event.@api_map + POST-ACCUM + CASE WHEN s.@api_map.size() > 1 THEN // calculate the mean - foreach (user,cnt) in s.@api_map do + FOREACH (user,cnt) in s.@api_map DO s.@mean += cnt.size() - end, + END, // calculate the standard deviation - foreach (user,cnt) in s.@api_map do + FOREACH (user,cnt) in s.@api_map DO s.@std += pow(cnt.size() - s.@mean, 2) - end, + END, s.@std = sqrt(s.@std/(s.@api_map.size()-1)), - case when s.@std != 0 then + CASE WHEN s.@std != 0 THEN // calculate the out lier - foreach (user,cnt) in s.@api_map do - case when cnt.size()-s.@mean > 3*s.@std then + FOREACH (user,cnt) in s.@api_map DO + CASE WHEN cnt.size()-s.@mean > 3*s.@std THEN @@result_list += result_tuple(user,s,cnt.size(),s.@mean,s.@std) - end - end - end - end; + END + END + END + END; - print @@result_list; - print services; + PRINT @@result_list; + PRINT services; } \ No newline at end of file diff --git a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/suspicious_IP_detection.gsql b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/suspicious_IP_detection.gsql index 8962211..53c3f4a 100644 --- a/Cybersecurity-Threat-Detection-IT/db_scripts/queries/suspicious_IP_detection.gsql +++ b/Cybersecurity-Threat-Detection-IT/db_scripts/queries/suspicious_IP_detection.gsql @@ -10,20 +10,20 @@ CREATE QUERY suspicious_IP_detection(vertex inputIP, int k) FOR GRAPH MyGrap start (ANY) = {inputIP}; - start = select s from start:s post-accum s.@number_of_path = 1; + start = SELECT s from start:s POST-ACCUM s.@number_of_path = 1; - while start.size() > 0 limit k do - start = select t from start:s-(:e)-:t - where t.@number_of_path == 0 - accum t.@number_of_path += s.@number_of_path, + WHILE start.size() > 0 LIMIT k DO + start = SELECT t FROM start:s-(:e)-:t + WHERE t.@number_of_path == 0 + ACCUM t.@number_of_path += s.@number_of_path, t.@edges += e, t.@edges += s.@edges - post-accum case when t.banned == true then + POST-ACCUM CASE WHEN t.banned == TRUE THEN @@shortest_path_to_banned_ip += t.@number_of_path, @@banned_IP_set += t - end; - end; + END; + END; - print @@shortest_path_to_banned_ip; + PRINT @@shortest_path_to_banned_ip; start = @@banned_IP_set; - print start; + PRINT start; } \ No newline at end of file diff --git a/Data-Lineage/.DS_Store b/Data-Lineage/.DS_Store index d4eeb2c..8f3f084 100644 Binary files a/Data-Lineage/.DS_Store and b/Data-Lineage/.DS_Store differ diff --git a/Data-Lineage/Data-Lineage Query.zip b/Data-Lineage/Data-Lineage Query.zip index cb5cca0..f30f755 100644 Binary files a/Data-Lineage/Data-Lineage Query.zip and b/Data-Lineage/Data-Lineage Query.zip differ diff --git a/Data-Lineage/data-lineage.tar.gz b/Data-Lineage/data-lineage.tar.gz index 533f6dc..1628286 100644 Binary files a/Data-Lineage/data-lineage.tar.gz and b/Data-Lineage/data-lineage.tar.gz differ diff --git a/Data-Lineage/data/data.zip b/Data-Lineage/data/data-lineage-data.zip similarity index 100% rename from Data-Lineage/data/data.zip rename to Data-Lineage/data/data-lineage-data.zip diff --git a/Data-Lineage/db_scripts/queries/all_updates.gsql b/Data-Lineage/db_scripts/queries/all_updates.gsql index e9a1b4a..d2fb69d 100644 --- a/Data-Lineage/db_scripts/queries/all_updates.gsql +++ b/Data-Lineage/db_scripts/queries/all_updates.gsql @@ -9,9 +9,9 @@ CREATE QUERY all_updates(vertex input) FOR GRAPH MyGraph { start = {input}; - get_updates = SELECT t from start:s-(wasUpdated:e)-ContactOnDate:t - accum @@specific_person_set += s, @@specific_person_set += t; + get_updates = SELECT t FROM start:s-(wasUpdated:e)-ContactOnDate:t + ACCUM @@specific_person_set += s, @@specific_person_set += t; print_set = @@specific_person_set; - print print_set; + PRINT print_set; } \ No newline at end of file diff --git a/Data-Lineage/db_scripts/queries/contact_when.gsql b/Data-Lineage/db_scripts/queries/contact_when.gsql index fd8b018..664634d 100644 --- a/Data-Lineage/db_scripts/queries/contact_when.gsql +++ b/Data-Lineage/db_scripts/queries/contact_when.gsql @@ -5,64 +5,64 @@ CREATE QUERY contact_when(vertex contact, datetime time) FOR GRAPH MyGr datetime: 2019-06-05 */ - typedef tuple updates; + TYPEDEF TUPLE updates; ListAccum @@earliest_first; - HeapAccum(1,dtime desc) @email_update; - HeapAccum(1,dtime desc) @phone_update; - HeapAccum(1,dtime desc) @title_update; + HeapAccum(1,dtime DESC) @email_update; + HeapAccum(1,dtime DESC) @phone_update; + HeapAccum(1,dtime DESC) @title_update; - Start = {contact}; + start = {contact}; - get_updates = SELECT t FROM Start:s-(wasUpdated:e)-ContactOnDate:t - where t.modifiedDate <= time and s.outdegree("wasUpdated") > 1 - order by t.modifiedDate asc; + get_updates = SELECT t FROM start:s-(wasUpdated:e)-ContactOnDate:t + WHERE t.modifiedDate <= time AND s.outdegree("wasUpdated") > 1 + ORDER BY t.modifiedDate ASC; - print get_updates; + PRINT get_updates; updated_contacts = SELECT t FROM get_updates:s-(wasUpdated:e)-Contact:t - accum + ACCUM @@earliest_first += get_updates, - if s.email != "" then + IF s.email != "" THEN t.@email_update += updates(s.modifiedDate,s.email,s.source) - end, - if s.phone != "" then + END, + IF s.phone != "" THEN t.@phone_update += updates(s.modifiedDate,s.phone,s.source) - end, - if s.title != "" then + END, + IF s.title != "" THEN t.@title_update += updates(s.modifiedDate,s.title,s.source) - end; + END; - updated_contacts2 = SELECT t from get_updates:s-(wasUpdated:e)-Contact:t - post-accum - if get_updates.size() > 0 then - if t.@email_update.size() > 0 then + updated_contacts2 = SELECT t FROM get_updates:s-(wasUpdated:e)-Contact:t + POST-ACCUM + IF get_updates.size() > 0 THEN + IF t.@email_update.size() > 0 THEN t.LastModifiedDate = t.@email_update.top().dtime, t.LastModifiedByid = t.@email_update.top().source, t.Email = t.@email_update.top().attribute, - t.Updated = true - end, - if t.@phone_update.size() > 0 then - if t.@phone_update.top().dtime > t.@email_update.top().dtime then + t.Updated = TRUE + END, + IF t.@phone_update.size() > 0 THEN + IF t.@phone_update.top().dtime > t.@email_update.top().dtime THEN t.LastModifiedDate = t.@phone_update.top().dtime, t.LastModifiedByid = t.@phone_update.top().source, t.Phone = t.@phone_update.top().attribute, - t.Updated = true - else + t.Updated = TRUE + ELSE t.Phone = t.@phone_update.top().attribute, - t.Updated = true - end - end, - if t.@title_update.size() > 0 then - if t.@title_update.top().dtime > t.@email_update.top().dtime and t.@title_update.top().dtime > t.@phone_update.top().dtime then + t.Updated = TRUE + END + END, + IF t.@title_update.size() > 0 THEN + IF t.@title_update.top().dtime > t.@email_update.top().dtime AND t.@title_update.top().dtime > t.@phone_update.top().dtime THEN t.LastModifiedDate = t.@title_update.top().dtime, t.LastModifiedByid = t.@title_update.top().source, t.Title = t.@title_update.top().attribute, - t.Updated = true - else + t.Updated = TRUE + ELSE t.Title = t.@title_update.top().attribute, - t.Updated = true - end - end - end; + t.Updated = TRUE + END + END + END; } \ No newline at end of file diff --git a/Data-Lineage/db_scripts/queries/cust_journey_subgraph.gsql b/Data-Lineage/db_scripts/queries/cust_journey_subgraph.gsql index da99366..fe02822 100644 --- a/Data-Lineage/db_scripts/queries/cust_journey_subgraph.gsql +++ b/Data-Lineage/db_scripts/queries/cust_journey_subgraph.gsql @@ -10,24 +10,24 @@ CREATE QUERY cust_journey_subgraph(vertex customer, cust = { customer }; - acct = select t from cust:c -(belongs_to:e)-> Account:t - accum @@display_set += e, @@vertex_set += t; + acct = SELECT t FROM cust:c -(belongs_to:e)-> Account:t + ACCUM @@display_set += e, @@vertex_set += t; - opp = select t from cust -(Has_Role:e)-> Opportunity:t - accum @@display_set += e, @@vertex_set += t; + opp = SELECT t FROM cust -(Has_Role:e)-> Opportunity:t + ACCUM @@display_set += e, @@vertex_set += t; campaign_members = - select t - from cust -(is_connected_to:e)-> CampaignMember:t - accum @@vertex_set += cust, @@vertex_set += t, @@display_set += e; + SELECT t + FROM cust -(is_connected_to:e)-> CampaignMember:t + ACCUM @@vertex_set += cust, @@vertex_set += t, @@display_set += e; - campaigns = select t from campaign_members -(is_part_of:e)-> Campaign:t - accum @@vertex_set += t, @@display_set += e; + campaigns = SELECT t FROM campaign_members -(is_part_of:e)-> Campaign:t + ACCUM @@vertex_set += t, @@display_set += e; verts = @@vertex_set; - print verts; + PRINT verts; //print@@vertex_set; - print @@display_set; + PRINT @@display_set; } \ No newline at end of file diff --git a/Data-Lineage/db_scripts/queries/customer_journey.gsql b/Data-Lineage/db_scripts/queries/customer_journey.gsql index abdd3e0..fc9e2f6 100644 --- a/Data-Lineage/db_scripts/queries/customer_journey.gsql +++ b/Data-Lineage/db_scripts/queries/customer_journey.gsql @@ -10,18 +10,18 @@ CREATE QUERY customer_journey(vertex customer, set campaign_typ SumAccum @cam_type, @cam_name, @cam_desc; start = { customer }; - print start; + PRINT start; - Company = select t from start -(belongs_to)-> Account:t; - print Company; - campaign = select c - from start-(is_connected_to)-> CampaignMember:c - where c.CreatedDate >= start_time and c.CreatedDate <= end_time + Company = SELECT t FROM start -(belongs_to)-> Account:t; + PRINT Company; + campaign = SELECT c + FROM start-(is_connected_to)-> CampaignMember:c + WHERE c.CreatedDate >= start_time AND c.CreatedDate <= end_time ; - campaign = select c from campaign:c -(is_part_of)-> Campaign:t - where campaign_types.size() == 0 or t.Campaign_Type in campaign_types - accum c.@cam_type = t.Campaign_Type, + campaign = SELECT c FROM campaign:c -(is_part_of)-> Campaign:t + WHERE campaign_types.size() == 0 OR t.Campaign_Type IN campaign_types + ACCUM c.@cam_type = t.Campaign_Type, c.@cam_name = t.Name, c.@cam_desc = t.Description; - print campaign as Campaign; + PRINT campaign AS Campaign; } \ No newline at end of file diff --git a/Data-Lineage/db_scripts/queries/most_updated_contacts.gsql b/Data-Lineage/db_scripts/queries/most_updated_contacts.gsql index 74b07d2..01519b2 100644 --- a/Data-Lineage/db_scripts/queries/most_updated_contacts.gsql +++ b/Data-Lineage/db_scripts/queries/most_updated_contacts.gsql @@ -2,65 +2,65 @@ CREATE QUERY most_updated_contacts() FOR GRAPH MyGraph { /* * Updates Contacts to most recent information using ContactOnDate. */ - typedef tuple updates; + TYPEDEF TUPLE updates; ListAccum @@earliest_first; - HeapAccum(1,dtime desc) @email_update; - HeapAccum(1,dtime desc) @phone_update; - HeapAccum(1,dtime desc) @title_update; + HeapAccum(1,dtime DESC) @email_update; + HeapAccum(1,dtime DESC) @phone_update; + HeapAccum(1,dtime DESC) @title_update; start = {ContactOnDate.*}; get_updates = SELECT s FROM start:s-(wasUpdated:e)-Contact:t - where t.outdegree("wasUpdated") > 1 - order by s.modifiedDate asc; + WHERE t.outdegree("wasUpdated") > 1 + ORDER BY s.modifiedDate ASC; updated_contacts = SELECT t FROM get_updates:s-(wasUpdated:e)-Contact:t - accum + ACCUM @@earliest_first += get_updates, - if s.email != "" then + IF s.email != "" THEN t.@email_update += updates(s.modifiedDate,s.email,s.source) - end, - if s.phone != "" then + END, + IF s.phone != "" THEN t.@phone_update += updates(s.modifiedDate,s.phone,s.source) end, - if s.title != "" then + IF s.title != "" THEN t.@title_update += updates(s.modifiedDate,s.title,s.source) - end; + END; - updated_contacts2 = SELECT t from get_updates:s-(wasUpdated:e)-Contact:t - post-accum - if t.@email_update.size() > 0 then - if t.@email_update.top().dtime > t.LastModifiedDate then + updated_contacts2 = SELECT t FROM get_updates:s-(wasUpdated:e)-Contact:t + POST-ACCUM + IF t.@email_update.size() > 0 THEN + if t.@email_update.top().dtime > t.LastModifiedDate THEN t.LastModifiedDate = t.@email_update.top().dtime, t.LastModifiedByid = t.@email_update.top().source, t.Email = t.@email_update.top().attribute, - t.Updated = true - else + t.Updated = TRUE + ELSE t.Email = t.@email_update.top().attribute, - t.Updated = true - end - end, - if t.@phone_update.size() > 0 then - if t.@phone_update.top().dtime > t.LastModifiedDate then + t.Updated = TRUE + END + END, + IF t.@phone_update.size() > 0 THEN + if t.@phone_update.top().dtime > t.LastModifiedDate THEN t.LastModifiedDate = t.@phone_update.top().dtime, t.LastModifiedByid = t.@phone_update.top().source, t.Phone = t.@phone_update.top().attribute, - t.Updated = true - else + t.Updated = TRUE + ELSE t.Phone = t.@phone_update.top().attribute, - t.Updated = true - end - end, - if t.@title_update.size() > 0 then - if t.@title_update.top().dtime > t.LastModifiedDate then + t.Updated = TRUE + END + END, + IF t.@title_update.size() > 0 THEN + IF t.@title_update.top().dtime > t.LastModifiedDate THEN t.LastModifiedDate = t.@title_update.top().dtime, t.LastModifiedByid = t.@title_update.top().source, t.Title = t.@title_update.top().attribute, - t.Updated = true - else + t.Updated = TRUE + ELSE t.Title = t.@title_update.top().attribute, - t.Updated = true - end - end; + t.Updated = TRUE + END + END; } \ No newline at end of file diff --git a/Data-Lineage/db_scripts/queries/updated_contacts.gsql b/Data-Lineage/db_scripts/queries/updated_contacts.gsql index b53e81d..47efb8f 100644 --- a/Data-Lineage/db_scripts/queries/updated_contacts.gsql +++ b/Data-Lineage/db_scripts/queries/updated_contacts.gsql @@ -4,8 +4,8 @@ CREATE QUERY updated_contacts() FOR GRAPH MyGraph { */ start = {Contact.*}; - updated = SELECT s from start:s - where s.Updated == true; + updated = SELECT s FROM start:s + WHERE s.Updated == TRUE; - print updated; + PRINT updated; } \ No newline at end of file diff --git a/Enterprise-Knowledge-Graph-Corporate/.DS_Store b/Enterprise-Knowledge-Graph-Corporate/.DS_Store index 70c113b..1463866 100644 Binary files a/Enterprise-Knowledge-Graph-Corporate/.DS_Store and b/Enterprise-Knowledge-Graph-Corporate/.DS_Store differ diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/antifraud.tar.gz b/Enterprise-Knowledge-Graph-Corporate/Enterprise-Corporate-Solution.tar.gz similarity index 53% rename from Fraud-and-Money-Laundering-Detection-Fin-Services/antifraud.tar.gz rename to Enterprise-Knowledge-Graph-Corporate/Enterprise-Corporate-Solution.tar.gz index fc744dd..59f6af2 100644 Binary files a/Fraud-and-Money-Laundering-Detection-Fin-Services/antifraud.tar.gz and b/Enterprise-Knowledge-Graph-Corporate/Enterprise-Corporate-Solution.tar.gz differ diff --git a/Enterprise-Knowledge-Graph-Corporate/Enterprise-Corportae-Queries.zip b/Enterprise-Knowledge-Graph-Corporate/Enterprise-Corportae-Queries.zip new file mode 100644 index 0000000..fd09241 Binary files /dev/null and b/Enterprise-Knowledge-Graph-Corporate/Enterprise-Corportae-Queries.zip differ diff --git a/Enterprise-Knowledge-Graph-Corporate/EnterpriseGraph_Queries.zip b/Enterprise-Knowledge-Graph-Corporate/EnterpriseGraph_Queries.zip deleted file mode 100644 index 81dbf14..0000000 Binary files a/Enterprise-Knowledge-Graph-Corporate/EnterpriseGraph_Queries.zip and /dev/null differ diff --git a/Enterprise-Knowledge-Graph-Corporate/data/data.zip.gz b/Enterprise-Knowledge-Graph-Corporate/data/enterprise-knowledge-corporate-data.zip.gz similarity index 100% rename from Enterprise-Knowledge-Graph-Corporate/data/data.zip.gz rename to Enterprise-Knowledge-Graph-Corporate/data/enterprise-knowledge-corporate-data.zip.gz diff --git a/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/company_holders.gsql b/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/company_holders.gsql index 936cfb3..5a1e594 100644 --- a/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/company_holders.gsql +++ b/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/company_holders.gsql @@ -1,20 +1,20 @@ CREATE QUERY company_holders(vertex company, uint step) FOR GRAPH EnterpriseGraph syntax v1 { /** - * company_holders query finds all key investors of a company within several steps. - * --- Dataset has been shrunk on Nov.20th,2020 --- - * Some interesting input parameters you can try: - * 1. company: Hospice Mocha Frame, step: 5 - * 2. company: Psychoanalyst Purse Prior, step: 4 - * 3. company: Hospice Loyalty Decongestant, step: 2 - * 4. company: Discipline Base Perfume, step 1 - * 5. company: Discipline Base Perfume, step 2 - * 6. company: Discipline Base Perfume, step 3 - * 7. company: Discipline Base Perfume, step 4 - */ + * company_holders query finds all key investors of a company within several steps. + * --- Dataset has been shrunk on Nov.20th,2020 --- + * Some interesting input parameters you can try: + * 1. company: Hospice Mocha Frame, step: 5 + * 2. company: Psychoanalyst Purse Prior, step: 4 + * 3. company: Hospice Loyalty Decongestant, step: 2 + * 4. company: Discipline Base Perfume, step 1 + * 5. company: Discipline Base Perfume, step 2 + * 6. company: Discipline Base Perfume, step 3 + * 7. company: Discipline Base Perfume, step 4 + */ // @visited is used to mark visited vertices - OrAccum @visited = false; + OrAccum @visited = FALSE; // @@edge_set is used to hold all touched edges SetAccum @@edge_set; @@ -36,7 +36,7 @@ CREATE QUERY company_holders(vertex company, uint step) // Mark input company as visited start = SELECT s FROM start:s - ACCUM s.@visited = true; + ACCUM s.@visited = TRUE; // Traverse multiple steps WHILE (true) LIMIT loop_step DO diff --git a/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/key_relationship.gsql b/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/key_relationship.gsql index 74f6986..ec9fd01 100644 --- a/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/key_relationship.gsql +++ b/Enterprise-Knowledge-Graph-Corporate/db_scripts/queries/key_relationship.gsql @@ -13,7 +13,7 @@ CREATE QUERY key_relationship(vertex company, int step) * 7. company: Discipline Base Perfume, step 4 */ - OrAccum @visited = false; + OrAccum @visited = FALSE; SetAccum @@edge_set; int loop_step; @@ -32,7 +32,7 @@ CREATE QUERY key_relationship(vertex company, int step) start = SELECT s FROM start:s - ACCUM s.@visited = true; + ACCUM s.@visited = TRUE; WHILE (true) LIMIT loop_step DO start = SELECT tgt diff --git a/Enterprise-Knowledge-Graph-Corporate/enterprise-solution.tar.gz b/Enterprise-Knowledge-Graph-Corporate/enterprise-solution.tar.gz deleted file mode 100644 index ce80525..0000000 Binary files a/Enterprise-Knowledge-Graph-Corporate/enterprise-solution.tar.gz and /dev/null differ diff --git a/Enterprise-Knowledge-Graph-Crunchbase/.DS_Store b/Enterprise-Knowledge-Graph-Crunchbase/.DS_Store index 839eb6c..3e7df17 100644 Binary files a/Enterprise-Knowledge-Graph-Crunchbase/.DS_Store and b/Enterprise-Knowledge-Graph-Crunchbase/.DS_Store differ diff --git a/Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase-Solution.tar.gz b/Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase-Solution.tar.gz new file mode 100644 index 0000000..e4bb67b Binary files /dev/null and b/Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase-Solution.tar.gz differ diff --git a/Enterprise-Knowledge-Graph-Crunchbase/CrunchBase_Queries.zip b/Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase_Queries.zip similarity index 81% rename from Enterprise-Knowledge-Graph-Crunchbase/CrunchBase_Queries.zip rename to Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase_Queries.zip index 73aaec5..b970ecc 100644 Binary files a/Enterprise-Knowledge-Graph-Crunchbase/CrunchBase_Queries.zip and b/Enterprise-Knowledge-Graph-Crunchbase/Enterprise-Crunchbase_Queries.zip differ diff --git a/Enterprise-Knowledge-Graph-Crunchbase/crunchbase-solution.tar.gz b/Enterprise-Knowledge-Graph-Crunchbase/crunchbase-solution.tar.gz deleted file mode 100644 index 2fd8213..0000000 Binary files a/Enterprise-Knowledge-Graph-Crunchbase/crunchbase-solution.tar.gz and /dev/null differ diff --git a/Enterprise-Knowledge-Graph-Crunchbase/data/data.zip.gz b/Enterprise-Knowledge-Graph-Crunchbase/data/enterprise-knowledge-crunchbase-data.zip.gz similarity index 100% rename from Enterprise-Knowledge-Graph-Crunchbase/data/data.zip.gz rename to Enterprise-Knowledge-Graph-Crunchbase/data/enterprise-knowledge-crunchbase-data.zip.gz diff --git a/Enterprise-Knowledge-Graph-Crunchbase/db_scripts/queries/investor_successful_exits.gsql b/Enterprise-Knowledge-Graph-Crunchbase/db_scripts/queries/investor_successful_exits.gsql index 4b85544..da19b61 100644 --- a/Enterprise-Knowledge-Graph-Crunchbase/db_scripts/queries/investor_successful_exits.gsql +++ b/Enterprise-Knowledge-Graph-Crunchbase/db_scripts/queries/investor_successful_exits.gsql @@ -86,7 +86,7 @@ CREATE QUERY investor_successful_exits(STRING investor_name="Accel Partners", // Trace back from exits to investor, to get output vertices & edges children = {@@result_vertex_set}; - PRINT children.size() as Num_Successful_Exits; + PRINT children.size() AS Num_Successful_Exits; WHILE(children.size() > 0) DO start = SELECT s FROM children :s diff --git a/Entity-Resolution-MDM/.DS_Store b/Entity-Resolution-MDM/.DS_Store index e3251de..180f901 100644 Binary files a/Entity-Resolution-MDM/.DS_Store and b/Entity-Resolution-MDM/.DS_Store differ diff --git a/Entity-Resolution-MDM/Entity-Resolution-Queries.zip b/Entity-Resolution-MDM/Entity-Resolution-Queries.zip new file mode 100644 index 0000000..9653619 Binary files /dev/null and b/Entity-Resolution-MDM/Entity-Resolution-Queries.zip differ diff --git a/Entity-Resolution-MDM/Entity_Resolution_Queries.zip b/Entity-Resolution-MDM/Entity_Resolution_Queries.zip deleted file mode 100644 index 1b4290b..0000000 Binary files a/Entity-Resolution-MDM/Entity_Resolution_Queries.zip and /dev/null differ diff --git a/Entity-Resolution-MDM/data/data.zip.gz b/Entity-Resolution-MDM/data/entity-resolution-data.zip.gz similarity index 100% rename from Entity-Resolution-MDM/data/data.zip.gz rename to Entity-Resolution-MDM/data/entity-resolution-data.zip.gz diff --git a/Entity-Resolution-MDM/db_scripts/queries/get_account_interest.gsql b/Entity-Resolution-MDM/db_scripts/queries/get_account_interest.gsql index 848118b..c2ddce7 100644 --- a/Entity-Resolution-MDM/db_scripts/queries/get_account_interest.gsql +++ b/Entity-Resolution-MDM/db_scripts/queries/get_account_interest.gsql @@ -15,8 +15,8 @@ CREATE QUERY get_account_interest(vertex input_acc, int k) FOR GRAPH En WHILE start.size() > 0 DO start = SELECT t FROM start:s-(Same_Owner:e)-:t - WHERE t.@visited == false - POST-ACCUM @@connected_accounts += t, t.@visited = true; + WHERE t.@visited == FALSE + POST-ACCUM @@connected_accounts += t, t.@visited = TRUE; END; start = @@connected_accounts; @@ -33,10 +33,10 @@ CREATE QUERY get_account_interest(vertex input_acc, int k) FOR GRAPH En ACCUM t.@cnt += s.@cnt, t.@map += (s->s.@cnt) ORDER BY t.@cnt DESC LIMIT k; - print videos; + PRINT videos; videos = SELECT s from videos:s-((Has_Genre|Has_Keyword):e)-:t ACCUM @@list += e; - print @@list; + PRINT @@list; } \ No newline at end of file diff --git a/Entity-Resolution-MDM/db_scripts/queries/output_file.gsql b/Entity-Resolution-MDM/db_scripts/queries/output_file.gsql index 9d1b70a..549da59 100644 --- a/Entity-Resolution-MDM/db_scripts/queries/output_file.gsql +++ b/Entity-Resolution-MDM/db_scripts/queries/output_file.gsql @@ -1,25 +1,25 @@ CREATE QUERY output_file(/* Parameters here */) FOR GRAPH Entity_Resolution { - File f1("/tmp/video_genre.csv"); - File f2("/tmp/video_keyword.csv"); - File f3("/tmp/video.csv"); + FILE f1("/tmp/video_genre.csv"); + FILE f2("/tmp/video_keyword.csv"); + FILE f3("/tmp/video.csv"); f1.println("genre,video"); start (ANY) = {Genre.*}; - start = select s from start:s-(Has_Genre)-:t accum f1.println(s,t); + start = SELECT s FROM start:s-(Has_Genre)-:t ACCUM f1.println(s,t); f2.println("keyword,video"); start = {Keyword.*}; - start = select s from start:s-(Has_Keyword)-:t accum f2.println(s,t); + start = SELECT s FROM start:s-(Has_Keyword)-:t ACCUM f2.println(s,t); start = {Video.*}; f3.println("ID,runtime,title,release_date"); - start = select s from start:s where s.title != "" accum f3.println(s,s.runtime,s.title,s.release_date); + start = SELECT s FROM start:s WHERE s.title != "" ACCUM f3.println(s,s.runtime,s.title,s.release_date); } \ No newline at end of file diff --git a/Entity-Resolution-MDM/entity-resolution.tar.gz b/Entity-Resolution-MDM/entity-resolution.tar.gz index 1826170..5aed34d 100644 Binary files a/Entity-Resolution-MDM/entity-resolution.tar.gz and b/Entity-Resolution-MDM/entity-resolution.tar.gz differ diff --git a/Financial-Services-Payments-Fraud-Detection/.DS_Store b/Financial-Services-Payments-Fraud-Detection/.DS_Store index a2fc99d..1795f08 100644 Binary files a/Financial-Services-Payments-Fraud-Detection/.DS_Store and b/Financial-Services-Payments-Fraud-Detection/.DS_Store differ diff --git a/Financial-Services-Payments-Fraud-Detection/Financial-Service-Queries.zip b/Financial-Services-Payments-Fraud-Detection/Financial-Service-Queries.zip new file mode 100644 index 0000000..a97f2fb Binary files /dev/null and b/Financial-Services-Payments-Fraud-Detection/Financial-Service-Queries.zip differ diff --git a/Financial-Services-Payments-Fraud-Detection/Financial_Service_Query.zip b/Financial-Services-Payments-Fraud-Detection/Financial_Service_Query.zip deleted file mode 100644 index 9e866bc..0000000 Binary files a/Financial-Services-Payments-Fraud-Detection/Financial_Service_Query.zip and /dev/null differ diff --git a/Financial-Services-Payments-Fraud-Detection/db_scripts/queries/Common_Customers.gsql b/Financial-Services-Payments-Fraud-Detection/db_scripts/queries/Common_Customers.gsql index a22b503..1382b3c 100644 --- a/Financial-Services-Payments-Fraud-Detection/db_scripts/queries/Common_Customers.gsql +++ b/Financial-Services-Payments-Fraud-Detection/db_scripts/queries/Common_Customers.gsql @@ -7,19 +7,19 @@ CREATE QUERY common_customers(vertex Merchant1, vertex payment:t - ACCUM t.@visited += true; + ACCUM t.@visited += TRUE; //PRINT payments1; /* Step 2 – For those payments, find all the linked customers. */ customer1 = SELECT t FROM payments1:s -(reverse_sends:e)-> user_account:t - ACCUM t.@visited += true; + ACCUM t.@visited += TRUE; //PRINT customer1; /* Step 3 Start graph traversal from second merchant to find all payments */ payments2 = SELECT t FROM Mer2:s -(reverse_receives_pmnt:e)-> payment:t - ACCUM t.@visited += true; + ACCUM t.@visited += TRUE; //PRINT payments2; /* Step 4 – Find common customers by starting from payments in Step 3 */ diff --git a/Financial-Services-Payments-Fraud-Detection/financial-services.tar.gz b/Financial-Services-Payments-Fraud-Detection/financial-services.tar.gz index f1fb01a..d31d629 100644 Binary files a/Financial-Services-Payments-Fraud-Detection/financial-services.tar.gz and b/Financial-Services-Payments-Fraud-Detection/financial-services.tar.gz differ diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/.DS_Store b/Fraud-and-Money-Laundering-Detection-Fin-Services/.DS_Store index 3a883d6..e5b9b5a 100644 Binary files a/Fraud-and-Money-Laundering-Detection-Fin-Services/.DS_Store and b/Fraud-and-Money-Laundering-Detection-Fin-Services/.DS_Store differ diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/AntiFraud_Queries.zip b/Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Queries.zip similarity index 69% rename from Fraud-and-Money-Laundering-Detection-Fin-Services/AntiFraud_Queries.zip rename to Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Queries.zip index 7d93c3c..ec404e9 100644 Binary files a/Fraud-and-Money-Laundering-Detection-Fin-Services/AntiFraud_Queries.zip and b/Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Queries.zip differ diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Solution.tar.gz b/Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Solution.tar.gz new file mode 100644 index 0000000..8362f92 Binary files /dev/null and b/Fraud-and-Money-Laundering-Detection-Fin-Services/Fraud-and-Money-Laundering-Solution.tar.gz differ diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/data/data.zip.gz b/Fraud-and-Money-Laundering-Detection-Fin-Services/data/fraud-and-money-data.zip.gz similarity index 100% rename from Fraud-and-Money-Laundering-Detection-Fin-Services/data/data.zip.gz rename to Fraud-and-Money-Laundering-Detection-Fin-Services/data/fraud-and-money-data.zip.gz diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/jobs/load_job_payment.gsql b/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/jobs/load_job_payment.gsql index db8cc00..fc422b0 100644 --- a/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/jobs/load_job_payment.gsql +++ b/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/jobs/load_job_payment.gsql @@ -21,7 +21,7 @@ CREATE OR REPLACE QUERY circleDetection (vertex srcId) FOR GRAPH AntiFrau MinAccum @@minSrcSendTime = GSQL_INT_MAX; MaxAccum @@maxSrcReceiveTime = 0; - OrAccum @isValid = false;//flag used to record valid vertices in the subgraph + OrAccum @isValid = FALSE;//flag used to record valid vertices in the subgraph int stepLowLimit = 3; int stepHighLimit = 6; @@ -35,7 +35,7 @@ CREATE OR REPLACE QUERY circleDetection (vertex srcId) FOR GRAPH AntiFrau ListAccum> @edgeTupleList; ListAccum> @newEdgeTupleList; ListAccum> @@circleEdgeTuples; - OrAccum @receiveNewPath = false; + OrAccum @receiveNewPath = FALSE; //The following is used for printing edges and vertices SetAccum @@vSet; @@ -261,7 +261,7 @@ CREATE OR REPLACE QUERY circleDetection (vertex srcId) FOR GRAPH AntiFrau END END, //reset receiveNewPath as false - tgt.@receiveNewPath = false + tgt.@receiveNewPath = FALSE POST-ACCUM CASE WHEN tgt.@newEdgeTupleList.size() > 0 @@ -286,7 +286,7 @@ CREATE OR REPLACE QUERY circleDetection (vertex srcId) FOR GRAPH AntiFrau END END, //reset receiveNewPath as false - tgt.@receiveNewPath = false + tgt.@receiveNewPath = FALSE POST-ACCUM CASE WHEN tgt.@newEdgeTupleList.size() > 0 @@ -347,10 +347,10 @@ CREATE OR REPLACE QUERY SameRecieverSender(vertex transaction) FOR ; // traverse for 4 steps, or the paths of sender and reciever meets each other - WHILE Start.size() > 0 AND @@isSame == false LIMIT 4 DO + WHILE Start.size() > 0 AND @@isSame == FALSE LIMIT 4 DO Start = SELECT t FROM Start:s-((User_to_Device|User_to_Payment):e)-:t // do not traverse the vertexes that were visited - WHERE t.@fromReciever == false AND t.@fromSender == false + WHERE t.@fromReciever == FALSE AND t.@fromSender == FALSE ACCUM t.@fromReciever += s.@fromReciever, t.@fromSender += s.@fromSender ,@@edgeSet += e @@ -414,7 +414,7 @@ CREATE OR REPLACE QUERY MultiTransaction (VERTEx transaction) FOR G //users to the receiver set WHILE Start.size() > 0 LIMIT 4 DO Start = SELECT t FROM Start:s-((User_to_Device|User_to_Payment):e)-:t - WHERE t.@fromReciever == false AND t.@fromSender == false + WHERE t.@fromReciever == FALSE AND t.@fromSender == FALSE ACCUM t.@fromReciever += s.@fromReciever, t.@fromSender += s.@fromSender, @@ -475,7 +475,7 @@ CREATE OR REPLACE QUERY fraudConnectivity (VERTEX inputUser, FLOAT trustSc FROM Start:s-(:e)-:t // sample clause for better visualization result SAMPLE 15 EDGE WHEN s.outdegree() >= 20 - WHERE t.@visited == false AND t != inputUser + WHERE t.@visited == FALSE AND t != inputUser ACCUM @@visResult += e POST-ACCUM @@ -564,7 +564,7 @@ CREATE OR REPLACE QUERY TransferredAmount (vertex sender, dateTime startDa // from the input user, go 4 steps with a while loop to find her connected users. WHILE (Start.size() > 0) limit 4 DO Start = select t from Start:s-((User_to_Device|User_to_Payment):e)-:t - where t.@visited == false AND (t.type != "User" OR t != sender) + where t.@visited == FALSE AND (t.type != "User" OR t != sender) ACCUM @@edgeSet += e POST-ACCUM diff --git a/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/queries/repeated_user.gsql b/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/queries/repeated_user.gsql index 7a2688c..8f4cee9 100644 --- a/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/queries/repeated_user.gsql +++ b/Fraud-and-Money-Laundering-Detection-Fin-Services/db_scripts/queries/repeated_user.gsql @@ -14,7 +14,7 @@ FOR GRAPH AntiFraud syntax v2 { and received by the input user. Sample input: - receiver = 1223 + receiver = Recommend to use 1223 as input. Or, try integer between 1 and 500. */ SumAccum @num_messages_received; diff --git a/GSQL-101/.DS_Store b/GSQL-101/.DS_Store index 2283bb2..9733981 100644 Binary files a/GSQL-101/.DS_Store and b/GSQL-101/.DS_Store differ diff --git a/GSQL-101/GSQL 101 Query.zip b/GSQL-101/GSQL101-Queries.zip similarity index 100% rename from GSQL-101/GSQL 101 Query.zip rename to GSQL-101/GSQL101-Queries.zip diff --git a/GSQL-101/gsql101.tar.gz b/GSQL-101/GSQL101-Solution.tar.gz similarity index 100% rename from GSQL-101/gsql101.tar.gz rename to GSQL-101/GSQL101-Solution.tar.gz diff --git a/Graph-Analytics-Centrality-Algorithms/.DS_Store b/Graph-Analytics-Centrality-Algorithms/.DS_Store index a9d494f..e599c9b 100644 Binary files a/Graph-Analytics-Centrality-Algorithms/.DS_Store and b/Graph-Analytics-Centrality-Algorithms/.DS_Store differ diff --git a/Graph-Analytics-Centrality-Algorithms/Centrality_Query.zip b/Graph-Analytics-Centrality-Algorithms/Centrality_Query.zip deleted file mode 100644 index ce8ce6c..0000000 Binary files a/Graph-Analytics-Centrality-Algorithms/Centrality_Query.zip and /dev/null differ diff --git a/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Queries.zip b/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Queries.zip new file mode 100644 index 0000000..66e666d Binary files /dev/null and b/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Queries.zip differ diff --git a/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Solution.tar.gz b/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Solution.tar.gz new file mode 100644 index 0000000..4864c87 Binary files /dev/null and b/Graph-Analytics-Centrality-Algorithms/Graph-Centrality-Solution.tar.gz differ diff --git a/Graph-Analytics-Centrality-Algorithms/centrality.tar.gz b/Graph-Analytics-Centrality-Algorithms/centrality.tar.gz deleted file mode 100644 index f1812e2..0000000 Binary files a/Graph-Analytics-Centrality-Algorithms/centrality.tar.gz and /dev/null differ diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/A_README.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/A_README.gsql index b5dff14..3e7e8c6 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/A_README.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/A_README.gsql @@ -1,5 +1,5 @@ CREATE QUERY A_README() FOR GRAPH MyGraph SYNTAX V2 { - /************************************************************ + /************************************************************ * IMPORTANT : PLEASE INSTALL AND RUN THE add_weights QUERY * BEFORE RUNNING OTHER QUERIES. ***********************************************************/ diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/add_weights.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/add_weights.gsql index ddb16d4..a7a0aaf 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/add_weights.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/add_weights.gsql @@ -1,4 +1,4 @@ -CREATE QUERY add_weights(STRING e_type, BOOL overwrite=True) FOR GRAPH MyGraph SYNTAX V2 { +CREATE QUERY add_weights(STRING e_type, BOOL overwrite=TRUE) FOR GRAPH MyGraph SYNTAX V2 { /* This query uses the haversine formula to calculate the distances between airports by using their latitude and longitude coordinates. The calculated distances are measured in miles and are added as edge weights. @@ -15,7 +15,7 @@ The calculated distances are measured in miles and are added as edge weights. heavy = SELECT t FROM start:s-(e_type:e)-Airport:t ACCUM - IF overwrite == False AND e.miles != 0 THEN + IF overwrite == FALSE AND e.miles != 0 THEN @@dont_change_list += e ELSE double lat1 = s.latitude * pi / 180, // lat1 to radians diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/bc_subquery.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/bc_subquery.gsql index f5159e4..cf6cafa 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/bc_subquery.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/bc_subquery.gsql @@ -1,4 +1,5 @@ -CREATE QUERY bc_subquery (VERTEX source, INT max_hops) FOR GRAPH MyGraph RETURNS(MapAccum) SYNTAX V2 { +CREATE QUERY bc_subquery (VERTEX source, INT max_hops) FOR GRAPH MyGraph + RETURNS(MapAccum) SYNTAX V2 { /* The TigerGraph implementation is based on A Faster Algorithm for Betweenness Centrality by Ulrik Brandes, Journal of Mathematical Sociology 25(2):163-177, (2001). According to the algorithm, sigma is the number of shortest paths from source; delta is the pair dependency from source; and dist is the shortest distance from source. The subquery returns a map of (s.id->s.@delta) */ SumAccum @sigma; @@ -12,8 +13,8 @@ CREATE QUERY bc_subquery (VERTEX source, INT max_hops) FOR GRAPH MyGraph RETURNS start = SELECT s FROM start:s ACCUM s.@sigma = 1, s.@dist = 0; -# traverse in the order of increasing distance and calculate @sigma and @dist - WHILE (start.size()>0) LIMIT max_hops DO # explore up to (max_hops) hops FROM s +// traverse in the order of increasing distance and calculate @sigma and @dist + WHILE (start.size()>0) LIMIT max_hops DO // explore up to (max_hops) hops FROM s @@curr_dist += 1; start = SELECT t FROM start:s-(flight_to>:e)-:t WHERE t.@dist < 0 @@ -25,7 +26,7 @@ CREATE QUERY bc_subquery (VERTEX source, INT max_hops) FOR GRAPH MyGraph RETURNS start = SELECT s FROM all:s WHERE s.@dist == @@curr_dist; -# traverse in the order of non-increasing distance and calculate @delta +// traverse in the order of non-increasing distance and calculate @delta WHILE (@@curr_dist>0) DO @@curr_dist += -1; start = SELECT s FROM start:s -(flight_to>:e)-:t diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/betweenness_cent.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/betweenness_cent.gsql index d4cd32c..85bcd72 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/betweenness_cent.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/betweenness_cent.gsql @@ -1,5 +1,5 @@ CREATE QUERY betweenness_cent (INT max_hops, INT maxItems, STRING country) FOR GRAPH MyGraph SYNTAX V2 { -# Betweenness Centrality main query +// Betweenness Centrality main query MapAccum> @@BC; SumAccum @cent; @@ -13,7 +13,7 @@ CREATE QUERY betweenness_cent (INT max_hops, INT maxItems, STRING country) FOR G start = SELECT s FROM start:s ACCUM @@BC += bc_subquery(s, max_hops); - # Write scores to local accumulators of vertices. + // Write scores to local accumulators of vertices. start = SELECT s FROM start:s POST-ACCUM s.@cent += @@BC.get(s) ORDER BY s.@cent DESC diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc.gsql index ac47720..fe02576 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc.gsql @@ -1,20 +1,20 @@ CREATE QUERY cc (BOOL display, INT output_limit, INT max_hops) FOR GRAPH MyGraph SYNTAX V2 { -# Closeness Centrality main query +// Closeness Centrality main query TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; SumAccum @score; - SetAccum @@edge_set; # list of all edges, if display is needed + SetAccum @@edge_set; // list of all edges, if display is needed INT num_vert; - #INT max_hops = 10; # measure distance for vertices up to 10 hops away + // INT max_hops = 10; // measure distance for vertices up to 10 hops away start = {Airport.*}; - #Total number of vertices considered in graph + // Total number of vertices considered in graph num_vert = start.size(); - # get closeness centrality for each vertex + // get closeness centrality for each vertex start = SELECT s FROM start:s POST-ACCUM s.@score = cc_subquery(s,num_vert,max_hops), @@top_scores += Vertex_Score(s, cc_subquery(s,num_vert,max_hops)); diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_by_country.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_by_country.gsql index 28d3f10..d53540e 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_by_country.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_by_country.gsql @@ -1,13 +1,13 @@ CREATE QUERY cc_by_country (BOOL display, INT output_limit, INT maxHops, STRING country) FOR GRAPH MyGraph SYNTAX V2 { -# Closeness Centrality main query +// Closeness Centrality main query TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; SumAccum @score; - SetAccum @@edge_set; # list of all edges, if display is needed + SetAccum @@edge_set; // list of all edges, if display is needed INT num_vert; - #INT maxHops = 10; # measure distance for vertices up to 10 hops away + // INT maxHops = 10; // measure distance for vertices up to 10 hops away start = {Airport.*}; IF country != "" THEN @@ -16,10 +16,10 @@ CREATE QUERY cc_by_country (BOOL display, INT output_limit, INT maxHops, STRING WHERE v.country == country; END; - #Total number of vertices considered in graph + // Total number of vertices considered in graph num_vert = start.size(); - # get closeness centrality for each vertex + // get closeness centrality for each vertex start = SELECT s FROM start:s POST-ACCUM s.@score = cc_subquery(s,num_vert,maxHops), @@top_scores += Vertex_Score(s, s.name, cc_subquery(s,num_vert,maxHops)); diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_subquery.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_subquery.gsql index 379d95d..13e056c 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_subquery.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/cc_subquery.gsql @@ -1,20 +1,20 @@ CREATE QUERY cc_subquery (VERTEX source, INT num_vert, INT maxhops) FOR GRAPH MyGraph RETURNS(FLOAT) SYNTAX V2 { -# Subquery returns closeness centrality for vertex source in graph with num_vert vertices +// Subquery returns closeness centrality for vertex source in graph with num_vert vertices SumAccum @@curr_dist, @@total_dist; OrAccum @visited; -# Initialize: Set the input vertex source as the starting point +// Initialize: Set the input vertex source as the starting point start = {source}; start = SELECT s FROM start:s - ACCUM s.@visited += true; + ACCUM s.@visited += TRUE; -# total_dist = sum (distance between vertex s and all connected neighbors) - WHILE (start.size() > 0) LIMIT maxhops DO # explore up to (maxhops) hops FROM s +// total_dist = sum (distance between vertex s and all connected neighbors) + WHILE (start.size() > 0) LIMIT maxhops DO // explore up to (maxhops) hops FROM s @@curr_dist += 1; - # Move FROM the current start set to the neighboring set of (unvisited) vertices + // Move FROM the current start set to the neighboring set of (unvisited) vertices start = SELECT t FROM start:s -(flight_to>:e)- :t - WHERE t.@visited == false AND t != s - POST-ACCUM t.@visited += true; + WHERE t.@visited == FALSE AND t != s + POST-ACCUM t.@visited += TRUE; @@total_dist += start.size() * @@curr_dist; END; diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank.gsql index 96d6186..3ed6139 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank.gsql @@ -1,20 +1,20 @@ CREATE QUERY page_rank(FLOAT max_change=0.001, INT max_iter=20, FLOAT damping=0.85, - BOOL display=false, INT output_limit=100) FOR GRAPH MyGraph SYNTAX V2 { -# Compute the page_rank score for each vertex in the GRAPH -# In each iteration, compute a score for each vertex: -# score = (1-damping) + damping*sum(received scores FROM its neighbors). -# The page_rank algorithm stops when either of the following is true: -# a) it reaches max_iter iterations; -# b) the max score change for any vertex compared to the last iteration <= max_change. + BOOL display=FALSE, INT output_limit=100) FOR GRAPH MyGraph SYNTAX V2 { +/* Compute the page_rank score for each vertex in the GRAPH + In each iteration, compute a score for each vertex: + score = (1-damping) + damping*sum(received scores FROM its neighbors). + The page_rank algorithm stops when either of the following is true: + a) it reaches max_iter iterations; + b) the max score change for any vertex compared to the last iteration <= max_change. */ TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; - MaxAccum @@max_diff = 9999; # max score change in an iteration - SumAccum @received_score = 0; # sum of scores each vertex receives FROM neighbors - SumAccum @score = 1; # Initial score for every vertex is 1. - SetAccum @@edge_set; # list of all edges, if display is needed + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @received_score = 0; // sum of scores each vertex receives FROM neighbors + SumAccum @score = 1; // Initial score for every vertex is 1. + SetAccum @@edge_set; // list of all edges, if display is needed - start = {Airport.*}; # start with all vertices of specified type(s) + start = {Airport.*}; // start with all vertices of specified type(s) WHILE @@max_diff > max_change LIMIT max_iter DO @@max_diff = 0; V = SELECT s @@ -23,7 +23,7 @@ CREATE QUERY page_rank(FLOAT max_change=0.001, INT max_iter=20, FLOAT damping=0. POST-ACCUM s.@score = (1.0-damping) + damping * s.@received_score, s.@received_score = 0, @@max_diff += abs(s.@score - s.@score'); - END; # END WHILE loop + END; // END WHILE loop IF output_limit > 0 THEN diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_by_country.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_by_country.gsql index 96d2440..b41043b 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_by_country.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_by_country.gsql @@ -1,21 +1,21 @@ CREATE QUERY page_rank_by_country(FLOAT max_change=0.001, INT max_iter=20, - FLOAT damping=0.85, STRING country, BOOL display=False, INT output_limit=100) + FLOAT damping=0.85, STRING country, BOOL display=FALSE, INT output_limit=100) FOR GRAPH MyGraph SYNTAX V2 { -# Compute the pageRank score for each vertex in the GRAPH -# In each iteration, compute a score for each vertex: -# score = (1-damping) + damping*sum(received scores FROM its neighbors). -# The pageRank algorithm stops when either of the following is true: -# a) it reaches max_iter iterations; -# b) the max score change for any vertex compared to the last iteration <= max_change. +/* Compute the pageRank score for each vertex in the GRAPH + In each iteration, compute a score for each vertex: + score = (1-damping) + damping*sum(received scores FROM its neighbors). + The pageRank algorithm stops when either of the following is true: + a) it reaches max_iter iterations; + b) the max score change for any vertex compared to the last iteration <= max_change. */ TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; - MaxAccum @@max_diff = 9999; # max score change in an iteration - SumAccum @received_score = 0; # sum of scores each vertex receives FROM neighbors - SumAccum @score = 1; # Initial score for every vertex is 1. - SetAccum @@edge_set; # list of all edges, if display is needed + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @received_score = 0; // sum of scores each vertex receives FROM neighbors + SumAccum @score = 1; // Initial score for every vertex is 1. + SetAccum @@edge_set; // list of all edges, if display is needed - start = {Airport.*}; # start with all vertices of specified type(s) + start = {Airport.*}; // start with all vertices of specified type(s) start = SELECT v FROM start:v WHERE v.country == country; @@ -27,7 +27,7 @@ CREATE QUERY page_rank_by_country(FLOAT max_change=0.001, INT max_iter=20, POST-ACCUM s.@score = (1.0-damping) + damping * s.@received_score, s.@received_score = 0, @@max_diff += abs(s.@score - s.@score'); - END; # END WHILE loop + END; // END WHILE loop IF output_limit > 0 THEN diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_pers.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_pers.gsql index b4da29c..70e1d9d 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_pers.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/page_rank_pers.gsql @@ -1,45 +1,45 @@ CREATE QUERY page_rank_pers (SET source, FLOAT max_change, INT max_iter, FLOAT damping, INT output_limit) FOR GRAPH MyGraph SYNTAX V2 { -# Compute the pageRank score for each vertex in the GRAPH, given a set of source vertices -# In each iteration, compute a score for activated vertices if they are source vertices: -# score = (1-damping) + damping*sum(received scores FROM its neighbors). -# If they are not source vertices, then score = damping*sum(received scores FROM its neighbors). -# The personalized pageRank algorithm stops when either of the following is true: -# a) it reaches max_iter iterations; -# b) the max score change for any vertex compared to the last iteration <= max_change. +/* Compute the pageRank score for each vertex in the GRAPH, given a set of source vertices + In each iteration, compute a score for activated vertices if they are source vertices: + score = (1-damping) + damping*sum(received scores FROM its neighbors). + If they are not source vertices, then score = damping*sum(received scores FROM its neighbors). + The personalized pageRank algorithm stops when either of the following is true: + a) it reaches max_iter iterations; + b) the max score change for any vertex compared to the last iteration <= max_change.*/ TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; - MaxAccum @@max_diff = 9999; # max score change in an iteration - SumAccum @received_score = 0; # sum of scores each vertex receives FROM neighbors - SumAccum @score = 0; # Initial score for every vertex is 0. - SetAccum @@edge_set; # list of all edges, if display is needed + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @received_score = 0; // sum of scores each vertex receives FROM neighbors + SumAccum @score = 0; // Initial score for every vertex is 0. + SetAccum @@edge_set; // list of all edges, if display is needed OrAccum @is_source; - start = {source}; # start with a set of vertices + start = {source}; // start with a set of vertices start = SELECT s FROM start:s - ACCUM s.@score = 1, # Only set score of source vertices to 1 - s.@is_source = true; + ACCUM s.@score = 1, // Only set score of source vertices to 1 + s.@is_source = TRUE; total = start; WHILE @@max_diff > max_change LIMIT max_iter DO @@max_diff = 0; - V_tmp = SELECT t # Only update score for activated vertices + V_tmp = SELECT t // Only update score for activated vertices FROM start:s -(flight_to>:e)- :t ACCUM t.@received_score += s.@score/(s.outdegree("flight_to")); T = start UNION V_tmp; start = SELECT s FROM T:s POST-ACCUM - # For source vertices, if it's activated, then add damping; if not activated, do not need to update - IF s.@is_source == true + // For source vertices, if it's activated, then add damping; if not activated, do not need to update + IF s.@is_source == TRUE THEN s.@score = (1.0-damping) + damping * s.@received_score ELSE s.@score = damping * s.@received_score END, s.@received_score = 0, @@max_diff += abs(s.@score - s.@score'); total = total UNION T; - END; # END WHILE loop + END; // END WHILE loop IF output_limit > 0 THEN total = SELECT s FROM total:s diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql index ffc5853..6d20ee6 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql @@ -9,27 +9,27 @@ The attribute version only store the distance into attribute, not the path. ListAccum @path; SetAccum @@edge_set; - ##### Initialization ##### + // Initialization source_set = {source}; source_set = SELECT s FROM source_set:s - ACCUM s.@visited += true, + ACCUM s.@visited += TRUE, s.@dis = 0, s.@path = s; result_set = {source}; - ##### Calculate distances and paths ##### + // Calculate distances and paths WHILE(source_set.size()>0) DO source_set = SELECT t FROM source_set:s -(:e)- :t - WHERE t.@visited == false + WHERE t.@visited == FALSE ACCUM t.@dis += s.@dis + 1, t.@path = s.@path + [t], - t.@visited += true; + t.@visited += TRUE; result_set = result_set UNION source_set; END; - ##### Print the results ##### + // Print the results PRINT result_set[result_set.@dis, result_set.@path]; IF display THEN diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql index ed8cc1b..aba6815 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql @@ -5,17 +5,17 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph */ TYPEDEF TUPLE Path_Tuple; HeapAccum(1, dist ASC) @min_path; - ListAccum @path; # shortest path FROM source - SetAccum @@edge_set; # list of all edges, if display is needed + ListAccum @path; // shortest path FROM source + SetAccum @@edge_set; // list of all edges, if display is needed OrAccum @visited; STRING source_name; INT iter; BOOL negative_cycle; - total = {source}; # the connected vertices + total = {source}; // the connected vertices start = {source}; - ##### Get the connected vertices + // Get the connected vertices start = SELECT s FROM start:s ACCUM s.@min_path += Path_Tuple(0, s), @@ -29,8 +29,8 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph total = total UNION start; END; - ##### Do V-1 iterations: Consider whether each edge lowers the best-known distance. - iter = total.size() - 1; # the max iteration is V-1 + // Do V-1 iterations: Consider whether each edge lowers the best-known distance. + iter = total.size() - 1; // the max iteration is V-1 WHILE TRUE LIMIT iter DO tmp = SELECT s FROM total:s -(flight_route:e)- :t @@ -40,13 +40,13 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph END; END; - ##### Calculate the paths ##### + // Calculate the paths ##### start = {source}; tmp = SELECT s FROM total:s WHERE s != source ACCUM s.@visited = FALSE; - WHILE start.size() > 0 LIMIT iter DO # Limit the number of hops + WHILE start.size() > 0 LIMIT iter DO // Limit the number of hops start = SELECT t FROM start:s -(flight_route:e)- :t WHERE NOT t.@visited @@ -57,7 +57,7 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph END; END; - ##### Print the results ##### + // Print the results PRINT total[total.@min_path.top().dist, total.@path]; IF display THEN tmp = SELECT s diff --git a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql index e612522..6672dd1 100644 --- a/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql +++ b/Graph-Analytics-Centrality-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql @@ -6,17 +6,17 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT max_hop */ TYPEDEF TUPLE Path_Tuple; HeapAccum(1, dist ASC) @min_path; - ListAccum @path; # shortest path FROM source - SetAccum @@edge_set; # list of all edges, if display is needed + ListAccum @path; // shortest path FROM source + SetAccum @@edge_set; // list of all edges, if display is needed OrAccum @visited; STRING source_name; INT iter; BOOL negative_cycle; - total = {source}; # the connected vertices + total = {source}; // the connected vertices start = {source}; - ##### Get the connected vertices + // Get the connected vertices start = SELECT s FROM start:s ACCUM s.@min_path += Path_Tuple(0, s), @@ -30,8 +30,8 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT max_hop total = total UNION start; END; - ##### Do V-1 iterations: Consider whether each edge lowers the best-known distance. - iter = 1; # the max iteration is V-1 + // Do V-1 iterations: Consider whether each edge lowers the best-known distance. + iter = 1; // the max iteration is V-1 WHILE iter < total.size() LIMIT max_hops DO tmp = SELECT s FROM total:s -(flight_route:e)- :t @@ -42,13 +42,13 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT max_hop iter = iter + 1; END; - ##### Calculate the paths ##### + // Calculate the paths start = {source}; tmp = SELECT s FROM total:s WHERE s != source ACCUM s.@visited = FALSE; - WHILE start.size() > 0 LIMIT iter DO # Limit the number of hops + WHILE start.size() > 0 LIMIT iter DO // Limit the number of hops start = SELECT t FROM start:s -(flight_route:e)- :t WHERE NOT t.@visited @@ -59,8 +59,8 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT max_hop END; END; - ##### Print the results ##### - total = SELECT s # Sort VSET + // Print the results + total = SELECT s // Sort VSET FROM total:s ORDER BY s.@min_path.top().dist ASC LIMIT maxDest; #Limit the size of the output set diff --git a/Graph-Analytics-Community-Detection-Algorithms/.DS_Store b/Graph-Analytics-Community-Detection-Algorithms/.DS_Store index a181bc6..0737137 100644 Binary files a/Graph-Analytics-Community-Detection-Algorithms/.DS_Store and b/Graph-Analytics-Community-Detection-Algorithms/.DS_Store differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/Community Queries.zip b/Graph-Analytics-Community-Detection-Algorithms/Community Queries.zip deleted file mode 100644 index 210d7ee..0000000 Binary files a/Graph-Analytics-Community-Detection-Algorithms/Community Queries.zip and /dev/null differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Queries.zip b/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Queries.zip new file mode 100644 index 0000000..227c416 Binary files /dev/null and b/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Queries.zip differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Solutions.tar.gz b/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Solutions.tar.gz new file mode 100644 index 0000000..4646eac Binary files /dev/null and b/Graph-Analytics-Community-Detection-Algorithms/Community-Detection-Solutions.tar.gz differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/community-detection.tar.gz b/Graph-Analytics-Community-Detection-Algorithms/community-detection.tar.gz deleted file mode 100644 index 1828aab..0000000 Binary files a/Graph-Analytics-Community-Detection-Algorithms/community-detection.tar.gz and /dev/null differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/.DS_Store b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/.DS_Store index 908df4d..8402b39 100644 Binary files a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/.DS_Store and b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/.DS_Store differ diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain.gsql index b373b5d..4f28907 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain.gsql @@ -12,21 +12,21 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp */ TYPEDEF TUPLE Cluster_Num; TYPEDEF TUPLE V_Delta_Q; - HeapAccum(1, deltaQ DESC, cid ASC) @largest_delta_Q; # if deltaQ is the same, select the one with mininal vid - MapAccum @@tot_incident_cluster; # sun of weight incident to clusters - MapAccum @@cluster_sizes; # size of a cluster - MapAccum @weight_to_cluster; # weight from one vertex incident to that cluster - SumAccum @@total_weight; # total weight of all edges - SumAccum @weight; # total weight incident to this vertex - SumAccum @cweight; # total weight incident to this aggregate vertex - SumAccum @uid; # which vertex it belongs to - SumAccum @cid; # which cluster it belongs to - SumAccum @vid; # internal id - SumAccum @deltaQ; # contribution to the modularity + HeapAccum(1, deltaQ DESC, cid ASC) @largest_delta_Q; // if deltaQ is the same, select the one with mininal vid + MapAccum @@tot_incident_cluster; // sun of weight incident to clusters + MapAccum @@cluster_sizes; // size of a cluster + MapAccum @weight_to_cluster; // weight from one vertex incident to that cluster + SumAccum @@total_weight; // total weight of all edges + SumAccum @weight; // total weight incident to this vertex + SumAccum @cweight; // total weight incident to this aggregate vertex + SumAccum @uid; // which vertex it belongs to + SumAccum @cid; // which cluster it belongs to + SumAccum @vid; // internal id + SumAccum @deltaQ; // contribution to the modularity SumAccum @@modularity; SumAccum @@modularity2; - MapAccum> @@weight_to_cluster_map; # calculate edges between communities - MapAccum> @@move_comm; # map of communities that changed its community id + MapAccum> @@weight_to_cluster_map; // calculate edges between communities + MapAccum> @@move_comm; // map of communities that changed its community id MapAccum> @@represent_map; SetAccum @@represent_set; MapAccum @@vertex_map; @@ -42,7 +42,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp INT iteration2; INT partitions; INT loop; - INT debug = 0; # debug: 0, no modularity info; 1, show debug log; 2, modularity for each iteration + INT debug = 0; // debug: 0, no modularity info; 1, show debug log; 2, modularity for each iteration partitions = split; CASE WHEN split < 1 THEN @@ -50,7 +50,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp END; -# Initialize: count edges and set a unique cluster ID for each vertex +// Initialize: count edges and set a unique cluster ID for each vertex start (ANY) = {Prescriber.*}; S (ANY) = SELECT s FROM start:s -((referral>|reverse_referral>):e)- :t @@ -58,16 +58,16 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp s.@weight += e.num_patient POST-ACCUM s.@vid = getvid(s), s.@uid = s.@vid, - s.@cid = s.@vid # Label each vertex with its own internal ID + s.@cid = s.@vid // Label each vertex with its own internal ID ; -# Special first iteration of Phase 1 +// Special first iteration of Phase 1 iteration = 1; S = SELECT s FROM start:s -((referral>|reverse_referral>):e)- :t WHERE s.@cid > t.@cid ACCUM s.@largest_delta_Q += V_Delta_Q(t, t.@cid, e.num_patient - 2 * s.@weight * s.@weight/ @@total_weight) - # weight_to_cluster is just e.num_patient + // weight_to_cluster is just e.num_patient POST-ACCUM INT bestCluster = s.@largest_delta_Q.top().cid, IF s.@largest_delta_Q.size() > 0 and s.@largest_delta_Q.top().deltaQ > 0 and s.@cid != bestCluster THEN s.@cid = bestCluster @@ -84,10 +84,10 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp log(debug > 0, "[redrain]#move", iteration, @@modularity); -# Phase 1 -- Move -# For each vertex, calculate the change in modularity FROM adding it to each of the nearby clusters -# Add vertex to cluster with highest positive change in modularity -# Repeat the above until no vertices change cluster anymore +// Phase 1 -- Move +// For each vertex, calculate the change in modularity FROM adding it to each of the nearby clusters +// Add vertex to cluster with highest positive change in modularity +// Repeat the above until no vertices change cluster anymore S = SELECT s FROM start:s ACCUM @@tot_incident_cluster += (s.@cid -> s.@weight) @@ -101,13 +101,13 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp WHILE (loop < partitions) DO S = SELECT s FROM start:s -((referral>|reverse_referral>):e)- :t - WHERE s.@uid % partitions == loop # for different split - # At least one cluster not singlet(a cluster on its own). If both clusters are singlets, consider only when the label of target is smaller to avoid swap - AND (( abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon # s is not a singlet - OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon ) # or t is not a singlet - OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon # s is a singlet - AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon # t is also a singlet - AND s.@cid > t.@cid) ) # consider only when target label is smaller + WHERE s.@uid % partitions == loop // for different split + // At least one cluster not singlet(a cluster on its own). If both clusters are singlets, consider only when the label of target is smaller to avoid swap + AND (( abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon // s is not a singlet + OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon ) // or t is not a singlet + OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon // s is a singlet + AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon // t is also a singlet + AND s.@cid > t.@cid) ) // consider only when target label is smaller ACCUM s.@weight_to_cluster += (t.@cid -> e.num_patient) POST-ACCUM INT bestCluster = s.@cid, FLOAT max_delta_Q = 0.0, @@ -115,7 +115,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO #would be better if this can be distributed FLOAT incident = @@tot_incident_cluster.get(cluster), delta_Q_new = weightToC - 2 * incident * s.@weight/ @@total_weight, - IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster) THEN # when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change + IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster) THEN // when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change max_delta_Q = delta_Q_new, bestCluster = cluster END @@ -141,7 +141,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp log(debug > 0, "[redrain]#move", iteration, @@modularity); END; // outer WHILE -# Phase 2 -- Merge +// Phase 2 -- Merge iteration2 = 0; WHILE (iteration2 < 2 OR @@modularity2 - last_modularity2 > epsilon) LIMIT iter2 DO iteration2 = iteration2 + 1; @@ -149,7 +149,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp FROM start:s ACCUM s.@uid = s.@cid ; - # Select the vertices with minimal internal id to represent the coarsened graph + // Select the vertices with minimal internal id to represent the coarsened graph start = SELECT s FROM start:s ACCUM @@represent_map += (s.@cid -> s) @@ -162,7 +162,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp @@represent_set.clear(); log(debug > 0, "[redrain]#2_merge", represent.size()); #@@cluster_sizes.size()); - # Get @cweight from totalIncident + // Get @cweight from totalIncident represent = SELECT s FROM represent:s ACCUM s.@cweight = @@tot_incident_cluster.get(s.@uid), @@ -174,12 +174,12 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp @@modularity = 0; WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) limit iter1 DO iteration = iteration + 1; - # Calculate.num_patient incident from vertex to cluster in coarsened graph; change every interation + // Calculate.num_patient incident from vertex to cluster in coarsened graph; change every interation S = SELECT s FROM start:s -((referral>|reverse_referral>):e)-:t - # @tot_incident_cluster keeps changing, can be 0 + // @tot_incident_cluster keeps changing, can be 0 WHERE s.@cid != t.@cid AND @@tot_incident_cluster.get(s.@uid) > 0 AND @@tot_incident_cluster.get(t.@cid) > 0 - # from s, incident to some clusters. Not consider the same cluster + // from s, incident to some clusters. Not consider the same cluster ACCUM @@weight_to_cluster_map += (s.@uid -> (t.@cid -> e.num_patient)) ; represent = SELECT s @@ -193,7 +193,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp CONTINUE END, delta_Q_new = weightToC - 2 * incident * s.@cweight/ @@total_weight, #total weight should be the same - IF delta_Q_new > max_delta_Q OR abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster THEN # new cluster is smaller then the current best cluster + IF delta_Q_new > max_delta_Q OR abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster THEN // new cluster is smaller then the current best cluster max_delta_Q = delta_Q_new, bestCluster = cluster END @@ -210,7 +210,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp log(debug > 1, "[redrain]#2_merge", @@weight_to_cluster_map.size()); @@weight_to_cluster_map.clear(); log(debug > 1, "[redrain]#2_move:", @@move_comm.size()); - # move nodes + // move nodes S = SELECT s FROM start:s WHERE @@move_comm.containsKey(s.@uid) @@ -243,14 +243,14 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp END; // outer WHILE -# Phase 3 -- Refinement +// Phase 3 -- Refinement iteration = 0; @@modularity = 0; WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) LIMIT iter3 DO iteration = iteration + 1; S = SELECT s FROM start:s -((referral>|reverse_referral>):e)- :t - WHERE abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon AND s.@cid > t.@cid) # at least one cluster not only itself, or use smaller label + WHERE abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon AND s.@cid > t.@cid) // at least one cluster not only itself, or use smaller label ACCUM s.@weight_to_cluster += (t.@cid -> e.num_patient) POST-ACCUM INT bestCluster = s.@cid, @@ -259,7 +259,7 @@ CREATE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, INT iter3 = 10, INT sp FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO #would be better if this can be distributed FLOAT incident = @@tot_incident_cluster.get(cluster), delta_Q_new = weightToC - 2 * incident * s.@weight/ @@total_weight, - IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster) THEN # when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change + IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < bestCluster) THEN // when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change max_delta_Q = delta_Q_new, bestCluster = cluster END diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain_enhanced.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain_enhanced.gsql index dcb0cfc..51c259b 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain_enhanced.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_louvain_enhanced.gsql @@ -14,21 +14,21 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, */ TYPEDEF TUPLE Cluster_Num; TYPEDEF TUPLE V_Delta_Q; - HeapAccum(1, delta_Q DESC, cid ASC) @largest_delta_Q; # if delta_Q is the same, select the one with mininal vid - MapAccum @@tot_incident_cluster; # sun of weight incident to clusters - MapAccum @@cluster_sizes; # size of a cluster - MapAccum @weight_to_cluster; # weight from one vertex incident to that cluster - SumAccum @@total_weight; # total weight of all edges - SumAccum @weight; # total weight incident to this vertex - SumAccum @cweight; # total weight incident to this aggregate vertex - SumAccum @uid; # which vertex it belongs to - SumAccum @cid; # which cluster it belongs to - SumAccum @vid; # internal id - SumAccum @delta_Q; # contribution to the modularity + HeapAccum(1, delta_Q DESC, cid ASC) @largest_delta_Q; // if delta_Q is the same, select the one with mininal vid + MapAccum @@tot_incident_cluster; // sun of weight incident to clusters + MapAccum @@cluster_sizes; // size of a cluster + MapAccum @weight_to_cluster; // weight from one vertex incident to that cluster + SumAccum @@total_weight; // total weight of all edges + SumAccum @weight; // total weight incident to this vertex + SumAccum @cweight; // total weight incident to this aggregate vertex + SumAccum @uid; // which vertex it belongs to + SumAccum @cid; // which cluster it belongs to + SumAccum @vid; // internal id + SumAccum @delta_Q; // contribution to the modularity SumAccum @@modularity; SumAccum @@modularity2; - MapAccum> @@weight_to_cluster_map; # calculate edges between communities - MapAccum> @@move_comm; # map of communities that changed its community id + MapAccum> @@weight_to_cluster_map; // calculate edges between communities + MapAccum> @@move_comm; // map of communities that changed its community id MapAccum> @@represent_map; SetAccum @@represent_set; MapAccum @@vertex_map; @@ -45,7 +45,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, INT iteration2; INT partitions; INT loop; - INT debug = 0; # debug: 0, no modularity info; 1, show debug log; 2, modularity for each iteration + INT debug = 0; // debug: 0, no modularity info; 1, show debug log; 2, modularity for each iteration partitions = split; CASE WHEN split < 1 THEN @@ -53,7 +53,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, END; -# Initialize: count edges and set a unique cluster ID for each vertex +// Initialize: count edges and set a unique cluster ID for each vertex start = {vertex_type}; S = SELECT s FROM start:s -((edge_type|rev_edge_type):e)- :t @@ -61,16 +61,16 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, s.@weight += 1 POST-ACCUM s.@vid = getvid(s), s.@uid = s.@vid, - s.@cid = s.@vid # Label each vertex with its own internal ID + s.@cid = s.@vid // Label each vertex with its own internal ID ; -# Special first iteration of Phase 1 +// Special first iteration of Phase 1 iteration = 1; S = SELECT s FROM start:s -((edge_type|rev_edge_type):e)- :t WHERE s.@cid > t.@cid ACCUM s.@largest_delta_Q += V_Delta_Q(t, t.@cid, 1 - 2 * s.@weight * s.@weight/ @@total_weight) - # weight_to_cluster is just 1 + // weight_to_cluster is just 1 POST-ACCUM INT best_cluster = s.@largest_delta_Q.top().cid, IF s.@largest_delta_Q.size() > 0 and s.@largest_delta_Q.top().delta_Q > 0 and s.@cid != best_cluster THEN s.@cid = best_cluster @@ -87,10 +87,10 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, log(debug > 0, "[redrain]#move", iteration, @@modularity); -# Phase 1 -- Move -# For each vertex, calculate the change in modularity FROM adding it to each of the nearby clusters -# Add vertex to cluster with highest positive change in modularity -# Repeat the above until no vertices change cluster anymore +// Phase 1 -- Move +// For each vertex, calculate the change in modularity FROM adding it to each of the nearby clusters +// Add vertex to cluster with highest positive change in modularity +// Repeat the above until no vertices change cluster anymore S = SELECT s FROM start:s ACCUM @@tot_incident_cluster += (s.@cid -> s.@weight) @@ -104,13 +104,13 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, WHILE (loop < partitions) DO S = SELECT s FROM start:s -((edge_type|rev_edge_type):e)- :t - WHERE s.@uid % partitions == loop # for different split - # At least one cluster not singlet(a cluster on its own). If both clusters are singlets, consider only when the label of target is smaller to avoid swap - AND (( abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon # s is not a singlet - OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon ) # or t is not a singlet - OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon # s is a singlet - AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon # t is also a singlet - AND s.@cid > t.@cid) ) # consider only when target label is smaller + WHERE s.@uid % partitions == loop // for different split + // At least one cluster not singlet(a cluster on its own). If both clusters are singlets, consider only when the label of target is smaller to avoid swap + AND (( abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon // s is not a singlet + OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon ) // or t is not a singlet + OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon // s is a singlet + AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon // t is also a singlet + AND s.@cid > t.@cid) ) // consider only when target label is smaller ACCUM s.@weight_to_cluster += (t.@cid -> 1) POST-ACCUM INT best_cluster = s.@cid, FLOAT max_delta_Q = 0.0, @@ -118,7 +118,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO #would be better if this can be distributed FLOAT incident = @@tot_incident_cluster.get(cluster), deltaQ_new = weightToC - 2 * incident * s.@weight/ @@total_weight, - IF deltaQ_new > max_delta_Q OR (abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN # when deltaQ_new is equal to max_delta_Q, and the cluster label is smaller, also change + IF deltaQ_new > max_delta_Q OR (abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN // when deltaQ_new is equal to max_delta_Q, and the cluster label is smaller, also change max_delta_Q = deltaQ_new, best_cluster = cluster END @@ -144,7 +144,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, log(debug > 0, "[redrain]#move", iteration, @@modularity); END; // outer WHILE -# Phase 2 -- Merge +// Phase 2 -- Merge iteration2 = 0; WHILE (iteration2 < 2 OR @@modularity2 - last_modularity2 > epsilon) LIMIT iter2 DO iteration2 = iteration2 + 1; @@ -152,7 +152,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, FROM start:s ACCUM s.@uid = s.@cid ; - # Select the vertices with minimal internal id to represent the coarsened graph + // Select the vertices with minimal internal id to represent the coarsened graph start = SELECT s FROM start:s ACCUM @@represent_map += (s.@cid -> s) @@ -165,7 +165,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, @@represent_set.clear(); log(debug > 0, "[redrain]#2_merge", represent.size()); #@@cluster_sizes.size()); - # Get @cweight from totalIncident + // Get @cweight from totalIncident represent = SELECT s FROM represent:s ACCUM s.@cweight = @@tot_incident_cluster.get(s.@uid), @@ -177,12 +177,12 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, @@modularity = 0; WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) limit iter1 DO iteration = iteration + 1; - # Calculate # incident from vertex to cluster in coarsened graph; change every interation + // Calculate // incident from vertex to cluster in coarsened graph; change every interation S = SELECT s FROM start:s -((edge_type|rev_edge_type):e)-:t - # @tot_incident_cluster keeps changing, can be 0 + // @tot_incident_cluster keeps changing, can be 0 WHERE s.@cid != t.@cid AND @@tot_incident_cluster.get(s.@uid) > 0 AND @@tot_incident_cluster.get(t.@cid) > 0 - # from s, incident to some clusters. Not consider the same cluster + // from s, incident to some clusters. Not consider the same cluster ACCUM @@weight_to_cluster_map += (s.@uid -> (t.@cid -> 1)) ; represent = SELECT s @@ -196,7 +196,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, CONTINUE END, deltaQ_new = weightToC - 2 * incident * s.@cweight/ @@total_weight, #total weight should be the same - IF deltaQ_new > max_delta_Q OR abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster THEN # new cluster is smaller then the current best cluster + IF deltaQ_new > max_delta_Q OR abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster THEN // new cluster is smaller then the current best cluster max_delta_Q = deltaQ_new, best_cluster = cluster END @@ -213,7 +213,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, log(debug > 1, "[redrain]#2_merge", @@weight_to_cluster_map.size()); @@weight_to_cluster_map.clear(); log(debug > 1, "[redrain]#2_move:", @@move_comm.size()); - # move nodes + // move nodes S = SELECT s FROM start:s WHERE @@move_comm.containsKey(s.@uid) @@ -246,14 +246,14 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, END; // outer WHILE -# Phase 3 -- Refinement +// Phase 3 -- Refinement iteration = 0; @@modularity = 0; WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) LIMIT iter3 DO iteration = iteration + 1; S = SELECT s FROM start:s -((edge_type|rev_edge_type):e)- :t - WHERE abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon AND s.@cid > t.@cid) # at least one cluster not only itself, or use smaller label + WHERE abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon AND s.@cid > t.@cid) // at least one cluster not only itself, or use smaller label ACCUM s.@weight_to_cluster += (t.@cid -> 1) POST-ACCUM INT best_cluster = s.@cid, @@ -262,7 +262,7 @@ CREATE QUERY algo_louvain_enhanced(STRING vertex_type, STRING edge_type, FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO #would be better if this can be distributed FLOAT incident = @@tot_incident_cluster.get(cluster), deltaQ_new = weightToC - 2 * incident * s.@weight/ @@total_weight, - IF deltaQ_new > max_delta_Q OR (abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN # when deltaQ_new is equal to max_delta_Q, and the cluster label is smaller, also change + IF deltaQ_new > max_delta_Q OR (abs(deltaQ_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN // when deltaQ_new is equal to max_delta_Q, and the cluster label is smaller, also change max_delta_Q = deltaQ_new, best_cluster = cluster END diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_page_rank.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_page_rank.gsql index 3726ebd..4607f9f 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_page_rank.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/algo_page_rank.gsql @@ -1,21 +1,21 @@ CREATE QUERY algo_page_rank(FLOAT max_change = 0.001, INT max_iter = 25, FLOAT damping = 0.85, INT output_limit) FOR GRAPH MyGraph SYNTAX V2 { - # Compute the pageRank score for each vertex in the GRAPH -# In each iteration, compute a score for each vertex: -# score = (1-damping) + damping*sum(received scores FROM its neighbors). -# The pageRank algorithm stops when either of the following is true: -# a) it reaches max_iter iterations; -# b) the max score change for any vertex compared to the last iteration <= max_change. + // Compute the pageRank score for each vertex in the GRAPH +// In each iteration, compute a score for each vertex: +// score = (1-damping) + damping*sum(received scores FROM its neighbors). +// The pageRank algorithm stops when either of the following is true: +// a) it reaches max_iter iterations; +// b) the max score change for any vertex compared to the last iteration <= max_change. TYPEDEF TUPLE Vertex_Score; HeapAccum(output_limit, score DESC) @@top_scores; - MaxAccum @@max_diff = 9999; # max score change in an iteration - SumAccum @received_score = 0; # sum of scores each vertex receives FROM neighbors - SumAccum @score = 1; # Initial score for every vertex is 1. - SetAccum @@edge_set; # list of all edges, if display is needed + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @received_score = 0; // sum of scores each vertex receives FROM neighbors + SumAccum @score = 1; // Initial score for every vertex is 1. + SetAccum @@edge_set; // list of all edges, if display is needed - start = {Prescriber.*}; # start with all vertices of specified type(s) + start = {Prescriber.*}; // start with all vertices of specified type(s) V (ANY) = {}; WHILE @@max_diff > max_change LIMIT max_iter DO @@max_diff = 0; @@ -25,7 +25,7 @@ CREATE QUERY algo_page_rank(FLOAT max_change = 0.001, INT max_iter = 25, POST-ACCUM s.@score = (1.0-damping) + damping * s.@received_score, s.@received_score = 0, @@max_diff += abs(s.@score - s.@score'); - END; # END WHILE loop + END; // END WHILE loop IF output_limit > 0 THEN diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp.gsql index aeb7a58..6b20d01 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp.gsql @@ -1,6 +1,6 @@ CREATE QUERY conn_comp (STRING vertex_type, STRING edge_type, STRING rev_edge_type) FOR GRAPH MyGraph SYNTAX V2 { -# This query identifies the Connected Components (undirected edges) +// This query identifies the Connected Components (undirected edges) MinAccum @cc_id = 0; //each vertex's tentative component id SumAccum @old_id = 0; @@ -9,14 +9,14 @@ CREATE QUERY conn_comp (STRING vertex_type, STRING edge_type, STRING rev_edge_ty start = {vertex_type}; -# Initialize: Label each vertex with its own internal ID +// Initialize: Label each vertex with its own internal ID S = SELECT x FROM start:x POST-ACCUM x.@cc_id = getvid(x), x.@old_id = getvid(x) ; -# Propagate smaller internal IDs until no more ID changes can be DOne +// Propagate smaller internal IDs until no more ID changes can be DOne WHILE (start.size()>0) DO start = SELECT t FROM start:s -((edge_type|rev_edge_type):e)- :t @@ -24,11 +24,11 @@ CREATE QUERY conn_comp (STRING vertex_type, STRING edge_type, STRING rev_edge_ty POST-ACCUM CASE WHEN t.@old_id != t.@cc_id THEN // If t's id has changed t.@old_id = t.@cc_id, - t.@active = true + t.@active = TRUE ELSE - t.@active = false + t.@active = FALSE END - HAVING t.@active == true + HAVING t.@active == TRUE ; END; diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp_enhanced.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp_enhanced.gsql index 4db66fc..cffce54 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp_enhanced.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/conn_comp_enhanced.gsql @@ -1,6 +1,6 @@ CREATE QUERY conn_comp_enhanced (SET vertex_types, STRING vt2, STRING edge_type, STRING rev_edge_type, INT output_level) FOR GRAPH MyGraph SYNTAX V2 { -# This query identifies the Connected Components (undirected edges) +// This query identifies the Connected Components (undirected edges) MinAccum @cc_id = 0; //each vertex's tentative component id SumAccum @old_id = 0; @@ -9,14 +9,14 @@ CREATE QUERY conn_comp_enhanced (SET vertex_types, STRING vt2, start = {vertex_types}; -# Initialize: Label each vertex with its own internal ID +// Initialize: Label each vertex with its own internal ID S = SELECT x FROM start:x POST-ACCUM x.@cc_id = getvid(x), x.@old_id = getvid(x) ; -# Propagate smaller internal IDs until no more ID changes can be DOne +// Propagate smaller internal IDs until no more ID changes can be DOne WHILE (start.size()>0) DO start = SELECT t FROM start:s -((edge_type|rev_edge_type):e)- :t @@ -24,11 +24,11 @@ CREATE QUERY conn_comp_enhanced (SET vertex_types, STRING vt2, POST-ACCUM CASE WHEN t.@old_id != t.@cc_id THEN // If t's id has changed t.@old_id = t.@cc_id, - t.@active = true + t.@active = TRUE ELSE - t.@active = false + t.@active = FALSE END - HAVING t.@active == true + HAVING t.@active == TRUE ; END; diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/insert_referrals.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/insert_referrals.gsql index 4b4b93b..2bb0ab2 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/insert_referrals.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/insert_referrals.gsql @@ -8,19 +8,19 @@ CREATE QUERY insert_referrals(VERTEX input_prescriber) start_set = {input_prescriber}; claims = SELECT t FROM start_set:s-(reverse_submitted_by>:e)-:t - POST-ACCUM t.@visited = true; + POST-ACCUM t.@visited = TRUE; patients = SELECT t FROM claims:s-(associated>:e)-:t ACCUM t.@date_list += s.rx_fill_date; claims = SELECT t FROM patients:s-(reverse_associated>:e)-:t - WHERE t.@visited == false + WHERE t.@visited == FALSE ACCUM FOREACH dt in s.@date_list do CASE WHEN datetime_diff(dt, t.rx_fill_date) BETWEEN 0 AND 2592000 THEN - t.@is_referred_claim = true + t.@is_referred_claim = TRUE END END - HAVING t.@is_referred_claim == true; + HAVING t.@is_referred_claim == TRUE; prescribers = SELECT t FROM claims-(submitted_by>:e)-:t POST-ACCUM INSERT INTO referral VALUES(input_prescriber, t, 1), diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc.gsql index bad4d8e..5e5a463 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc.gsql @@ -77,11 +77,11 @@ CREATE QUERY scc (INT iter = 500, INT iter_wcc = 5, INT top_k_dist = 10) FOR GRA POST-ACCUM CASE WHEN t.@wcc_id_prev != t.@wcc_id_curr THEN // If t's id has changed t.@wcc_id_prev = t.@wcc_id_curr, - t.@wcc_active = true + t.@wcc_active = TRUE ELSE - t.@wcc_active = false + t.@wcc_active = FALSE END - HAVING t.@wcc_active == true; + HAVING t.@wcc_active == TRUE; END; active = SELECT s FROM active:s diff --git a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc_enhanced.gsql b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc_enhanced.gsql index b169ba1..04639a5 100644 --- a/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc_enhanced.gsql +++ b/Graph-Analytics-Community-Detection-Algorithms/db_scripts/queries/scc_enhanced.gsql @@ -79,11 +79,11 @@ CREATE QUERY scc_enhanced (INT iter = 500, INT iter_wcc = 5, INT top_k_dist = 10 POST-ACCUM CASE WHEN t.@wcc_id_prev != t.@wcc_id_curr THEN // If t's id has changed t.@wcc_id_prev = t.@wcc_id_curr, - t.@wcc_active = true + t.@wcc_active = TRUE ELSE - t.@wcc_active = false + t.@wcc_active = FALSE END - HAVING t.@wcc_active == true; + HAVING t.@wcc_active == TRUE; END; active = SELECT s FROM active:s diff --git a/Graph-Analytics-Community-Detection-Algorithms/export_094919630.tar.gz b/Graph-Analytics-Community-Detection-Algorithms/export_094919630.tar.gz deleted file mode 100644 index 5b00441..0000000 Binary files a/Graph-Analytics-Community-Detection-Algorithms/export_094919630.tar.gz and /dev/null differ diff --git a/Graph-Analytics-Shortest-Path-Algorithms/.DS_Store b/Graph-Analytics-Shortest-Path-Algorithms/.DS_Store index 74eaf93..552e0e5 100644 Binary files a/Graph-Analytics-Shortest-Path-Algorithms/.DS_Store and b/Graph-Analytics-Shortest-Path-Algorithms/.DS_Store differ diff --git a/Graph-Analytics-Shortest-Path-Algorithms/Shortest Path Query.zip b/Graph-Analytics-Shortest-Path-Algorithms/Shortest Path Query.zip deleted file mode 100644 index 3473d60..0000000 Binary files a/Graph-Analytics-Shortest-Path-Algorithms/Shortest Path Query.zip and /dev/null differ diff --git a/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Query.zip b/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Query.zip new file mode 100644 index 0000000..87e6fc8 Binary files /dev/null and b/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Query.zip differ diff --git a/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Solutions.tar.gz b/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Solutions.tar.gz new file mode 100644 index 0000000..93be137 Binary files /dev/null and b/Graph-Analytics-Shortest-Path-Algorithms/Shortest-Path-Solutions.tar.gz differ diff --git a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/add_weights.gsql b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/add_weights.gsql index 7e7e705..4557f39 100644 --- a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/add_weights.gsql +++ b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/add_weights.gsql @@ -18,7 +18,7 @@ The calculated distances are measured in miles and are added as edge weights. Heavy = SELECT t FROM Start:s-(flight_route:e)-Airport:t ACCUM - IF overwrite == False AND e.miles != 0 THEN + IF overwrite == FALSE AND e.miles != 0 THEN @@dont_Change_List += e ELSE double lat1 = s.latitude * pi / 180, // lat1 to radians diff --git a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql index c020565..1197320 100644 --- a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql +++ b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_no_wt.gsql @@ -9,27 +9,27 @@ The attribute version only store the distance into attribute, not the path. ListAccum @path; SetAccum @@edge_Set; - ##### Initialization ##### + // Initialization Source = {source}; Source = SELECT s FROM Source:s - ACCUM s.@visited += true, + ACCUM s.@visited += TRUE, s.@dis = 0, s.@path = s; ResultSet = {source}; - ##### Calculate distances and paths ##### + // Calculate distances and paths WHILE(Source.size()>0) DO Source = SELECT t FROM Source:s -(:e)- :t - WHERE t.@visited == false + WHERE t.@visited == FALSE ACCUM t.@dis += s.@dis + 1, t.@path = s.@path + [t], - t.@visited += true; + t.@visited += TRUE; ResultSet = ResultSet UNION Source; END; - ##### Print the results ##### + // Print the results PRINT ResultSet[ResultSet.@dis, ResultSet.@path]; IF display THEN diff --git a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql index 3478223..1007502 100644 --- a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql +++ b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt.gsql @@ -9,17 +9,17 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph */ TYPEDEF TUPLE pathTuple; HeapAccum(1, dist ASC) @minPath; - ListAccum @path; # shortest path FROM source - SetAccum @@edge_Set; # list of all edges, if display is needed + ListAccum @path; // shortest path FROM source + SetAccum @@edge_Set; // list of all edges, if display is needed OrAccum @visited; STRING sourceName; INT iter; BOOL negativeCycle; - total = {source}; # the connected vertices + total = {source}; // the connected vertices start = {source}; - ##### Get the connected vertices + // Get the connected vertices start = SELECT s FROM start:s ACCUM s.@minPath += pathTuple(0, s), @@ -33,8 +33,8 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph total = total UNION start; END; - ##### Do V-1 iterations: Consider whether each edge lowers the best-known distance. - iter = total.size() - 1; # the max iteration is V-1 + // Do V-1 iterations: Consider whether each edge lowers the best-known distance. + iter = total.size() - 1; // the max iteration is V-1 WHILE TRUE LIMIT iter DO tmp = SELECT s FROM total:s -(flight_route:e)- :t @@ -44,13 +44,13 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph END; END; - ##### Calculate the paths ##### + // Calculate the paths start = {source}; tmp = SELECT s FROM total:s WHERE s != source ACCUM s.@visited = FALSE; - WHILE start.size() > 0 LIMIT iter DO # Limit the number of hops + WHILE start.size() > 0 LIMIT iter DO // Limit the number of hops start = SELECT t FROM start:s -(flight_route:e)- :t WHERE NOT t.@visited @@ -61,7 +61,7 @@ CREATE QUERY shortest_ss_pos_wt (VERTEX source, BOOL display) FOR GRAPH MyGraph END; END; - ##### Print the results ##### + // Print the results PRINT total[total.@minPath.top().dist, total.@path]; IF display THEN tmp = SELECT s diff --git a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql index 7f6dc21..ab1c846 100644 --- a/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql +++ b/Graph-Analytics-Shortest-Path-Algorithms/db_scripts/queries/shortest_ss_pos_wt_limits.gsql @@ -6,17 +6,17 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT maxHops */ TYPEDEF TUPLE pathTuple; HeapAccum(1, dist ASC) @min_Path; - ListAccum @path; # shortest path FROM source - SetAccum @@edge_Set; # list of all edges, if display is needed + ListAccum @path; // shortest path FROM source + SetAccum @@edge_Set; // list of all edges, if display is needed OrAccum @visited; STRING sourceName; INT iter; BOOL negativeCycle; - total = {source}; # the connected vertices + total = {source}; // the connected vertices start = {source}; - ##### Get the connected vertices + // Get the connected vertices start = SELECT s FROM start:s ACCUM s.@min_Path += pathTuple(0, s), @@ -30,7 +30,7 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT maxHops total = total UNION start; END; - ##### Do V-1 iterations: Consider whether each edge lowers the best-known distance. + // Do V-1 iterations: Consider whether each edge lowers the best-known distance. iter = 1; # the max iteration is V-1 WHILE iter < total.size() LIMIT maxHops DO tmp = SELECT s @@ -42,13 +42,13 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT maxHops iter = iter + 1; END; - ##### Calculate the paths ##### + // Calculate the paths start = {source}; tmp = SELECT s FROM total:s WHERE s != source ACCUM s.@visited = FALSE; - WHILE start.size() > 0 LIMIT iter DO # Limit the number of hops + WHILE start.size() > 0 LIMIT iter DO // Limit the number of hops start = SELECT t FROM start:s -(flight_route:e)- :t WHERE NOT t.@visited @@ -59,11 +59,11 @@ CREATE QUERY shortest_ss_pos_wt_limits (VERTEX source, BOOL display, INT maxHops END; END; - ##### Print the results ##### - total = SELECT s # Sort VSET + // Print the results + total = SELECT s // Sort VSET FROM total:s ORDER BY s.@min_Path.top().dist ASC - LIMIT maxDest; #Limit the size of the output set + LIMIT maxDest; // Limit the size of the output set PRINT total[total.@min_Path.top().dist, total.@path]; IF display THEN tmp = SELECT s diff --git a/Graph-Analytics-Shortest-Path-Algorithms/shortest-path.tar.gz b/Graph-Analytics-Shortest-Path-Algorithms/shortest-path.tar.gz deleted file mode 100644 index df0cba1..0000000 Binary files a/Graph-Analytics-Shortest-Path-Algorithms/shortest-path.tar.gz and /dev/null differ diff --git a/Graph-Convolutional-Networks/.DS_Store b/Graph-Convolutional-Networks/.DS_Store index 59f9d71..5f712f5 100644 Binary files a/Graph-Convolutional-Networks/.DS_Store and b/Graph-Convolutional-Networks/.DS_Store differ diff --git a/Graph-Convolutional-Networks/Graph-Convolutional Query.zip b/Graph-Convolutional-Networks/Convolutional-Networks-Queries.zip similarity index 100% rename from Graph-Convolutional-Networks/Graph-Convolutional Query.zip rename to Graph-Convolutional-Networks/Convolutional-Networks-Queries.zip diff --git a/Graph-Convolutional-Networks/GCNonCitationGraph.tar.gz b/Graph-Convolutional-Networks/Convolutional-Networks-Solution.tar.gz similarity index 100% rename from Graph-Convolutional-Networks/GCNonCitationGraph.tar.gz rename to Graph-Convolutional-Networks/Convolutional-Networks-Solution.tar.gz diff --git a/Healthcare-Graph-Drug-Interaction-FAERS/.DS_Store b/Healthcare-Graph-Drug-Interaction-FAERS/.DS_Store index 88b225c..f7d10ff 100644 Binary files a/Healthcare-Graph-Drug-Interaction-FAERS/.DS_Store and b/Healthcare-Graph-Drug-Interaction-FAERS/.DS_Store differ diff --git a/Healthcare-Graph-Drug-Interaction-FAERS/Drug Interaction Query.zip b/Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS-Queries.zip similarity index 100% rename from Healthcare-Graph-Drug-Interaction-FAERS/Drug Interaction Query.zip rename to Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS-Queries.zip diff --git a/Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS.tar.gz b/Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS-Solution.tar.gz similarity index 100% rename from Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS.tar.gz rename to Healthcare-Graph-Drug-Interaction-FAERS/Healthcare-Graph-Drug-Interaction-FAERS-Solution.tar.gz diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/.DS_Store b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/.DS_Store index 8253323..d0b5406 100644 Binary files a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/.DS_Store and b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/.DS_Store differ diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Referral-Network-Query.zip b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Healthcare-Referral-Networks-Queries.zip similarity index 54% rename from Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Referral-Network-Query.zip rename to Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Healthcare-Referral-Networks-Queries.zip index bb146c1..394917e 100644 Binary files a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Referral-Network-Query.zip and b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/Healthcare-Referral-Networks-Queries.zip differ diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql index d85c83e..2967189 100644 --- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql +++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql @@ -6,19 +6,19 @@ CREATE OR REPLACE QUERY ex2_create_referral_edge(VERTEX inputPrescri start_set = {inputPrescriber}; claims = SELECT t FROM start_set:s-(:e)-:t ACCUM t.@date_List += s.rx_fill_date; claims = SELECT t FROM patients:s-(:e)-:t POST-ACCUM INSERT INTO referral VALUES(inputPrescriber, t, 1); diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql index 175e2c9..147f3c8 100644 --- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql +++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql @@ -7,9 +7,9 @@ CREATE OR REPLACE QUERY get_k_hop_neighbor(int k, vertex input) FOR GRAPH MyGrap WHILE start.size() > 0 limit k DO start = SELECT t from start-(:e)-:t - WHERE t.@visited == false + WHERE t.@visited == FALSE ACCUM @@edgeList += e - POST-ACCUM t.@visited = true; + POST-ACCUM t.@visited = TRUE; END; print @@edgeList; diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql index 47a8847..5b41194 100644 --- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql +++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql @@ -6,13 +6,13 @@ CREATE OR REPLACE QUERY get_prescribers(vertex inputPrescriber) FOR claims = SELECT t FROM start_set:s-(:e)-:t ACCUM @@list +=e; claims = SELECT t FROM patients:s-(:e)-:t diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics-data.tar.gz b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics-data.tar.gz deleted file mode 100644 index ea7cfd0..0000000 Binary files a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics-data.tar.gz and /dev/null differ diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics.tar.gz b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics.tar.gz index 41de6d0..35bf921 100644 Binary files a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics.tar.gz and b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/healthcare-analytics.tar.gz differ diff --git a/In-Database-Machine-Learning-Recommendation/.DS_Store b/In-Database-Machine-Learning-Recommendation/.DS_Store index 88b225c..61dbd03 100644 Binary files a/In-Database-Machine-Learning-Recommendation/.DS_Store and b/In-Database-Machine-Learning-Recommendation/.DS_Store differ diff --git a/In-Database-Machine-Learning-Recommendation/Recommender_Queries.zip b/In-Database-Machine-Learning-Recommendation/Machine-Learning-Recommendation-Queries.zip similarity index 100% rename from In-Database-Machine-Learning-Recommendation/Recommender_Queries.zip rename to In-Database-Machine-Learning-Recommendation/Machine-Learning-Recommendation-Queries.zip diff --git a/In-Database-Machine-Learning-Recommendation/In-Database-Machine-Learning-Recommendation.tar.gz b/In-Database-Machine-Learning-Recommendation/Machine-Learning-Recommendation-Solution.tar.gz similarity index 100% rename from In-Database-Machine-Learning-Recommendation/In-Database-Machine-Learning-Recommendation.tar.gz rename to In-Database-Machine-Learning-Recommendation/Machine-Learning-Recommendation-Solution.tar.gz diff --git a/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/.DS_Store b/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/.DS_Store index a1562ec..feef84e 100644 Binary files a/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/.DS_Store and b/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/.DS_Store differ diff --git a/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Entity_Resolution_Queries.zip b/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Machine-Learning-Entity-Resolution-Queries.zip similarity index 100% rename from In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Entity_Resolution_Queries.zip rename to In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Machine-Learning-Entity-Resolution-Queries.zip diff --git a/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/export_766552986.tar.gz b/In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Machine-Learning-Entity-Resolution-Solution.tar.gz similarity index 100% rename from In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/export_766552986.tar.gz rename to In-Database-Machine-Learning-for-Big-Data-Entity-Resolution/Machine-Learning-Entity-Resolution-Solution.tar.gz diff --git a/Low-Rank-Approximation-Machine-Learning/.DS_Store b/Low-Rank-Approximation-Machine-Learning/.DS_Store index ccc2d17..a5f731b 100644 Binary files a/Low-Rank-Approximation-Machine-Learning/.DS_Store and b/Low-Rank-Approximation-Machine-Learning/.DS_Store differ diff --git a/Low-Rank-Approximation-Machine-Learning/LowRankApproximation_Queries.zip b/Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning-Queries.zip similarity index 100% rename from Low-Rank-Approximation-Machine-Learning/LowRankApproximation_Queries.zip rename to Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning-Queries.zip diff --git a/Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning.tar.gz b/Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning-Solution.tar.gz similarity index 100% rename from Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning.tar.gz rename to Low-Rank-Approximation-Machine-Learning/Low-Rank-Approximation-Machine-Learning-Solution.tar.gz diff --git a/Low-Rank-Approximation-Machine-Learning/export_318145902.tar.gz b/Low-Rank-Approximation-Machine-Learning/export_318145902.tar.gz deleted file mode 100644 index 2419f5e..0000000 Binary files a/Low-Rank-Approximation-Machine-Learning/export_318145902.tar.gz and /dev/null differ diff --git a/Machine-Learning-and-Real-time-Fraud-Detection/.DS_Store b/Machine-Learning-and-Real-time-Fraud-Detection/.DS_Store index e3f5aeb..716dbff 100644 Binary files a/Machine-Learning-and-Real-time-Fraud-Detection/.DS_Store and b/Machine-Learning-and-Real-time-Fraud-Detection/.DS_Store differ diff --git a/Machine-Learning-and-Real-time-Fraud-Detection/ML-Realtime fraud detction Query.zip b/Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection-Queries.zip similarity index 100% rename from Machine-Learning-and-Real-time-Fraud-Detection/ML-Realtime fraud detction Query.zip rename to Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection-Queries.zip diff --git a/Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection.tar.gz b/Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection-Solution.tar.gz similarity index 100% rename from Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection.tar.gz rename to Machine-Learning-and-Real-time-Fraud-Detection/Machine-Learning-and-Real-time-Fraud-Detection-Solution.tar.gz diff --git a/Network-and-IT-Resource-Optimization/.DS_Store b/Network-and-IT-Resource-Optimization/.DS_Store index 2a71d00..1c7f3da 100644 Binary files a/Network-and-IT-Resource-Optimization/.DS_Store and b/Network-and-IT-Resource-Optimization/.DS_Store differ diff --git a/Network-and-IT-Resource-Optimization/Network-IT-resource Query.zip b/Network-and-IT-Resource-Optimization/Network-IT-Resource-Queries.zip similarity index 100% rename from Network-and-IT-Resource-Optimization/Network-IT-resource Query.zip rename to Network-and-IT-Resource-Optimization/Network-IT-Resource-Queries.zip diff --git a/Network-and-IT-Resource-Optimization/network.tar.gz b/Network-and-IT-Resource-Optimization/Network-IT-Resource-Solution.tar.gz similarity index 100% rename from Network-and-IT-Resource-Optimization/network.tar.gz rename to Network-and-IT-Resource-Optimization/Network-IT-Resource-Solution.tar.gz diff --git a/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/.DS_Store b/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/.DS_Store index f50124e..7783493 100644 Binary files a/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/.DS_Store and b/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/.DS_Store differ diff --git a/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/recommendation_Queries.zip b/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/Recommendation-Engine-2-Personallized-Marketing-Queries.zip similarity index 100% rename from Recommendation-Engine-2.0-Hyper-Personalized-Marketing/recommendation_Queries.zip rename to Recommendation-Engine-2.0-Hyper-Personalized-Marketing/Recommendation-Engine-2-Personallized-Marketing-Queries.zip diff --git a/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/recommendation.tar.gz b/Recommendation-Engine-2.0-Hyper-Personalized-Marketing/Recommendation-Engine-2-Personallized-Marketing-Solution.tar.gz similarity index 100% rename from Recommendation-Engine-2.0-Hyper-Personalized-Marketing/recommendation.tar.gz rename to Recommendation-Engine-2.0-Hyper-Personalized-Marketing/Recommendation-Engine-2-Personallized-Marketing-Solution.tar.gz diff --git a/Recommendation-Engine-Movie-Recommendation/.DS_Store b/Recommendation-Engine-Movie-Recommendation/.DS_Store index 9e4a190..47cae4e 100644 Binary files a/Recommendation-Engine-Movie-Recommendation/.DS_Store and b/Recommendation-Engine-Movie-Recommendation/.DS_Store differ diff --git a/Recommendation-Engine-Movie-Recommendation/Movie-Recommendation Query.zip b/Recommendation-Engine-Movie-Recommendation/Movie-Recommendation-Queries.zip similarity index 100% rename from Recommendation-Engine-Movie-Recommendation/Movie-Recommendation Query.zip rename to Recommendation-Engine-Movie-Recommendation/Movie-Recommendation-Queries.zip diff --git a/Recommendation-Engine-Movie-Recommendation/Recommendation-Engine-Movie-Recommendation.tar.gz b/Recommendation-Engine-Movie-Recommendation/Movie-Recommendation-Solution.tar.gz similarity index 100% rename from Recommendation-Engine-Movie-Recommendation/Recommendation-Engine-Movie-Recommendation.tar.gz rename to Recommendation-Engine-Movie-Recommendation/Movie-Recommendation-Solution.tar.gz diff --git a/Social-Network-Analysis/.DS_Store b/Social-Network-Analysis/.DS_Store index bc9d2dc..16f5b38 100644 Binary files a/Social-Network-Analysis/.DS_Store and b/Social-Network-Analysis/.DS_Store differ diff --git a/Social-Network-Analysis/Social-Network Query.zip b/Social-Network-Analysis/Social-Network-Analysis-Queries.zip similarity index 100% rename from Social-Network-Analysis/Social-Network Query.zip rename to Social-Network-Analysis/Social-Network-Analysis-Queries.zip diff --git a/Social-Network-Analysis/Social-Network-Analysis.tar.gz b/Social-Network-Analysis/Social-Network-Analysis-Solution.tar.gz similarity index 100% rename from Social-Network-Analysis/Social-Network-Analysis.tar.gz rename to Social-Network-Analysis/Social-Network-Analysis-Solution.tar.gz diff --git a/Supply-Chain-Analysis/.DS_Store b/Supply-Chain-Analysis/.DS_Store index 1c6813c..fb5a9ee 100644 Binary files a/Supply-Chain-Analysis/.DS_Store and b/Supply-Chain-Analysis/.DS_Store differ diff --git a/Supply-Chain-Analysis/Supply-Chain-Analysis.tar.gz b/Supply-Chain-Analysis/Supply-Chain-Analysis-Solution.tar.gz similarity index 100% rename from Supply-Chain-Analysis/Supply-Chain-Analysis.tar.gz rename to Supply-Chain-Analysis/Supply-Chain-Analysis-Solution.tar.gz