Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ALGOS-263] feat(algos): Develop test scripts for evaluating the algorithms #154

Closed
wants to merge 10 commits into from
2 changes: 1 addition & 1 deletion algorithms/Centrality/article_rank/tg_article_rank.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_article_rank (STRING v_type, STRING e_type,
CREATE DISTRIBUTED QUERY tg_article_rank (STRING v_type, STRING e_type,
FLOAT max_change = 0.001, INT maximum_iteration = 25, FLOAT damping = 0.85, INT top_k = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_closeness_cent_approx (
CREATE DISTRIBUTED QUERY tg_closeness_cent_approx (
SET<STRING> v_type_set, SET<STRING> e_type_set, STRING reverse_e_type, INT top_k=100, INT k = 100, INT max_hops = 10, DOUBLE epsilon = 0.1, BOOL print_results = true,
STRING file_path = "", INT debug = 0, INT sample_index = 0, INT max_size = 1000, BOOL wf = True ) SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_degree_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set, BOOL in_degree = TRUE, BOOL out_degree = TRUE, INT top_k=100, BOOL print_results = TRUE, STRING result_attribute = "",STRING file_path = "", BOOL normalize = TRUE) SYNTAX V1 {
CREATE DISTRIBUTED QUERY tg_degree_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set, BOOL in_degree = TRUE, BOOL out_degree = TRUE, INT top_k=100, BOOL print_results = TRUE, STRING result_attribute = "",STRING file_path = "", BOOL normalize = TRUE) SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_weighted_degree_cent(STRING v_type, STRING e_type, STRING reverse_e_type, STRING weight_attribute, BOOL in_degree = TRUE, BOOL out_degree = TRUE, INT top_k=100, BOOL print_results = TRUE, STRING result_attribute = "",STRING file_path = "") SYNTAX V1 {
CREATE DISTRIBUTED QUERY tg_weighted_degree_cent(STRING v_type, STRING e_type, STRING reverse_e_type, STRING weight_attribute, BOOL in_degree = TRUE, BOOL out_degree = TRUE, INT top_k=100, BOOL print_results = TRUE, STRING result_attribute = "",STRING file_path = "") SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Centrality/eigenvector/tg_eigenvector_cent.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration = 100, FLOAT conv_limit = 0.000001,
CREATE DISTRIBUTED QUERY tg_eigenvector_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration = 100, FLOAT conv_limit = 0.000001,
INT top_k = 100, BOOL print_results = True, STRING result_attribute = "",STRING file_path = ""
) SYNTAX V1 {

Expand Down
2 changes: 1 addition & 1 deletion algorithms/Centrality/harmonic/tg_harmonic_cent.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_harmonic_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set,INT max_hops = 10,
CREATE DISTRIBUTED QUERY tg_harmonic_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set,INT max_hops = 10,
INT top_k = 100, BOOL wf = TRUE, BOOL print_results = True, STRING result_attribute = "",
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_influence_maximization_CELF(STRING v_type,STRING e_type,STRING weight_attribute,INT top_k,
CREATE DISTRIBUTED QUERY tg_influence_maximization_CELF(STRING v_type,STRING e_type,STRING weight_attribute,INT top_k,
BOOL print_results = True, STRING file_path = "") SYNTAX V1 {

/*
Expand Down Expand Up @@ -128,4 +128,4 @@ CREATE QUERY tg_influence_maximization_CELF(STRING v_type,STRING e_type,STRING w
IF print_results THEN
PRINT @@res_list;
END;
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_influence_maximization_greedy(STRING v_type,STRING e_type,STRING weight_attribute,INT top_k,
CREATE DISTRIBUTED QUERY tg_influence_maximization_greedy(STRING v_type,STRING e_type,STRING weight_attribute,INT top_k,
BOOL print_results = True, STRING file_path = "") SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_pagerank (STRING v_type, STRING e_type,
CREATE DISTRIBUTED QUERY tg_pagerank (STRING v_type, STRING e_type,
FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping=0.85, INT top_k = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "",
BOOL display_edges = FALSE) SYNTAX V1 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_pagerank_wt (STRING v_type, STRING e_type, STRING weight_attribute,
CREATE DISTRIBUTED QUERY tg_pagerank_wt (STRING v_type, STRING e_type, STRING weight_attribute,
FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping=0.85, INT top_k = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "",
BOOL display_edges = FALSE) SYNTAX V1 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_pagerank_pers_ap_batch(STRING v_type, STRING e_type,
CREATE DISTRIBUTED QUERY tg_pagerank_pers_ap_batch(STRING v_type, STRING e_type,
FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping = 0.85, INT top_k = 100,INT batch_num,BOOL print_results,STRING file_path) SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_pagerank_pers(SET<VERTEX> source, STRING e_type,
CREATE DISTRIBUTED QUERY tg_pagerank_pers(SET<VERTEX> source, STRING e_type,
FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping = 0.85, INT top_k = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_greedy_graph_coloring(SET<STRING> v_type_set,SET<STRING> e_type_set,UINT max_colors = 999999,
CREATE DISTRIBUTED QUERY tg_greedy_graph_coloring(SET<STRING> v_type_set,SET<STRING> e_type_set,UINT max_colors = 999999,
BOOL print_color_count = TRUE, BOOL print_stats = TRUE, STRING file_path = "") SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_knn_cosine_ss (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set, STRING weight_attribute,
CREATE DISTRIBUTED QUERY tg_knn_cosine_ss (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set, STRING weight_attribute,
STRING label, INT top_k, BOOL print_results = TRUE, STRING file_path = "", STRING result_attribute = "") RETURNS (STRING) SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_maximal_indep_set(STRING v_type, STRING e_type, INT maximum_iteration = 100, BOOL print_results = TRUE, STRING file_path = "") SYNTAX V1 {
CREATE DISTRIBUTED QUERY tg_maximal_indep_set(STRING v_type, STRING e_type, INT maximum_iteration = 100, BOOL print_results = TRUE, STRING file_path = "") SYNTAX V1 {

/*
First Author: <First Author Name>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_scc (SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set,
CREATE DISTRIBUTED QUERY tg_scc (SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set,
INT top_k_dist, INT print_limit, INT maximum_iteration = 500, INT iter_wcc = 5, BOOL print_results = TRUE, STRING result_attribute= "", STRING file_path="") SYNTAX V1 {
//INT iter_end_trim = 3

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_wcc (SET<STRING> v_type_set, SET<STRING> e_type_set, INT print_limit = 100,
CREATE DISTRIBUTED QUERY tg_wcc (SET<STRING> v_type_set, SET<STRING> e_type_set, INT print_limit = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Community/k_core/tg_kcore.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_kcore(STRING v_type, STRING e_type, INT k_min = 0, INT k_max = -1, BOOL print_results = TRUE,
CREATE DISTRIBUTED QUERY tg_kcore(STRING v_type, STRING e_type, INT k_min = 0, INT k_max = -1, BOOL print_results = TRUE,
STRING result_attribute = "", STRING file_path = "", BOOL print_all_k = FALSE, BOOL show_shells=FALSE) SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Community/label_propagation/tg_label_prop.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_label_prop (SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration, INT print_limit,
CREATE DISTRIBUTED QUERY tg_label_prop (SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration, INT print_limit,
BOOL print_results = TRUE, STRING file_path = "", STRING result_attribute = "") SYNTAX V1 {


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_lcc (STRING v_type, STRING e_type,INT top_k=100,BOOL print_results = True, STRING result_attribute = "",
CREATE DISTRIBUTED QUERY tg_lcc (STRING v_type, STRING e_type,INT top_k=100,BOOL print_results = True, STRING result_attribute = "",
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Community/louvain/tg_louvain.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_louvain(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute = "weight", INT maximum_iteration = 10,
CREATE DISTRIBUTED QUERY tg_louvain(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute = "weight", INT maximum_iteration = 10,
STRING result_attribute = "cid", STRING file_path = "", BOOL print_stats = FALSE) SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_slpa (SET<STRING> v_type_set, SET<STRING> e_type_set, FLOAT threshold, INT maximum_iteration, INT print_limit,
CREATE DISTRIBUTED QUERY tg_slpa (SET<STRING> v_type_set, SET<STRING> e_type_set, FLOAT threshold, INT maximum_iteration, INT print_limit,
BOOL print_results = TRUE, STRING file_path = "") SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Path/bfs/tg_bfs.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_bfs(SET<STRING> v_type_set, SET<STRING> e_type_set,INT max_hops=10, VERTEX v_start,
CREATE DISTRIBUTED QUERY tg_bfs(SET<STRING> v_type_set, SET<STRING> e_type_set,INT max_hops=10, VERTEX v_start,
BOOL print_results = True, STRING result_attribute = "",STRING file_path = "", BOOL display_edges = TRUE) SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Path/cycle_component/tg_cycle_component.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_cycle_component(STRING v_type,STRING e_type,BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX v1{
CREATE DISTRIBUTED QUERY tg_cycle_component(STRING v_type,STRING e_type,BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX v1{


/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Path/minimum_spanning_forest/tg_msf.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_msf (SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute, STRING weight_type,
CREATE DISTRIBUTED QUERY tg_msf (SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute, STRING weight_type,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

/*
Expand Down
2 changes: 1 addition & 1 deletion algorithms/Path/minimum_spanning_tree/tg_mst.gsql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_mst(VERTEX opt_source, SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute, STRING weight_type,
CREATE DISTRIBUTED QUERY tg_mst(VERTEX opt_source, SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute, STRING weight_type,
INT maximum_iteration = -1, BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_all_path(VERTEX v_source, VERTEX target_v, INT depth = 10,
CREATE DISTRIBUTED QUERY tg_all_path(VERTEX v_source, VERTEX target_v, INT depth = 10,
BOOL print_results = TRUE, STRING file_path = "")SYNTAX v1 {

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_shortest_ss_no_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
CREATE DISTRIBUTED QUERY tg_shortest_ss_no_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
INT print_limit = -1, BOOL print_results =TRUE, STRING result_attribute ="", STRING file_path ="",
BOOL display_edges =FALSE) SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_shortest_ss_any_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
CREATE DISTRIBUTED QUERY tg_shortest_ss_any_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
STRING weight_attribute, STRING weight_type, INT print_limit = -1, BOOL print_results = TRUE,
STRING result_attribute = "", STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_shortest_ss_pos_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
CREATE DISTRIBUTED QUERY tg_shortest_ss_pos_wt (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
STRING weight_attribute, STRING weight_type, FLOAT epsilon = 0.001,BOOL print_results = TRUE, INT print_limit = -1,
BOOL display_edges = FALSE, STRING result_attribute = "",
STRING file_path = "") SYNTAX V1 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE QUERY tg_shortest_ss_pos_wt_tb (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
CREATE DISTRIBUTED QUERY tg_shortest_ss_pos_wt_tb (VERTEX source, SET<STRING> v_type_set, SET<STRING> e_type_set,
STRING weight_attribute, STRING weight_type, FLOAT epsilon = 0.001,BOOL print_results = TRUE, INT print_limit = -1,
BOOL display_edges = FALSE, STRING result_attribute = "",
STRING file_path = "", UINT write_size = 10000) SYNTAX V1 {
Expand Down
89 changes: 89 additions & 0 deletions algorithms_test/1_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash

# Main function
main() {
# Check if required commands are available
if ! command -v jq &> /dev/null; then
echo "Error: jq is not installed."
exit 1
fi

# Read the JSON configuration file
dir=$(cd "$(dirname "$0")"; pwd)
config_file="${dir}/config/1_dataset.json"

# Check if the configuration file exists
if [ ! -f "$config_file" ]; then
echo "Configuration file not found: $config_file"
exit 1
fi

# Extract general settings
default_directory=$(jq -r '.general_settings.default_directory' "$config_file")
default_directory=${default_directory/#\~/$HOME}

# Iterate over each dataset
datasets=$(jq -r '.datasets | to_entries[] | @base64' "$config_file")

# Decode each dataset entry and process it
echo "$datasets" | while IFS= read -r dataset_b64; do
dataset=$(echo "${dataset_b64}" | base64 --decode)
dataset_name=$(echo "$dataset" | jq -r '.key')
download_link=$(echo "$dataset" | jq -r '.value.download_link')
directory=$(echo "$dataset" | jq -r '.value.directory // empty')
directory=${directory/#\~/$HOME}
directory=${directory:-$default_directory}
top_level_dir=$(echo "$dataset" | jq -r '.value.top_level_dir')

echo "======================================== ${dataset_name} ========================================"

# Create the directory if it doesn't exist
mkdir -p "$directory"

# Extract the file name from the download link
file_name=$(basename "$download_link")

# Check if the folder exists before downloading the dataset
dataset_folder="$directory/$top_level_dir"
if [ ! -d "$dataset_folder" ]; then
echo "Created directory: $dataset_folder"
mkdir -p "$dataset_folder"

# Download the dataset if it doesn't exist
if [ ! -f "$directory/$file_name" ]; then
echo "Downloading $file_name..."
if ! wget -O "$directory/$file_name" "$download_link"; then
echo "Failed to download $file_name"
continue
fi
fi

# Determine the file extension and unzip accordingly
echo "Unzipping $file_name into $directory..."
case "$file_name" in
*.tar.bz2)
if tar -xvjf "$directory/$file_name" -C "$directory" --strip-components=1 --one-top-level="$top_level_dir"; then
echo "Finished unzipping $file_name."
else
echo "Failed to unzip $file_name"
fi
;;
*.gz)
if gunzip -c "$directory/$file_name" > "$directory/$top_level_dir/${file_name%.gz}"; then
echo "Finished unzipping $file_name."
else
echo "Failed to unzip $file_name"
fi
;;
*)
echo "Unsupported file format: $file_name"
;;
esac
else
echo "Directory $dataset_folder already exists, skipping unzipping."
fi
done
}

# Run the main function
main
Loading
Loading