-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClustering.yaml
100 lines (97 loc) · 2.97 KB
/
Clustering.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
id: clustering_benchmark
description: Clustering benchmark on Iris and Penguins Dataset.
version: 1.0
benchmarker: "OmniBenchmark Team at Robinsons Lab"
storage: https://play.min.io
benchmark_yaml_spec: 0.01
storage_api: S3
storage_bucket_name: clustering_benchmark
software_backend: envmodules
software_environments:
R:
description: "R 4.3.3 with gfbf-2023 toolchain"
easyconfig: R-4.3.3-gfbf-2023b.eb
envmodule: R/4.3.3-gfbf-2023b
conda: R_4.3.3_try.yaml
apptainer: http://registry.ch/R_4.3.3-gfbf-2023b.sif
python:
description: "Python3.6.3"
easyconfig: Python-3.6.3-foss-2017b.eb
envmodule: python/3.6.3-foss-2017b
conda: python_v363_test.yaml
apptainer: http://registry.ch/python_vX-gfbf-2023b.sif
stages:
- id: data
modules:
- id: iris
name: "Iris Dataset"
software_environment: "python"
repository:
url: https://github.com/omnibenchmark-example/iris.git
commit: 47c63f0
- id: penguins
name: "Penguins Dataset"
software_environment: "python"
repository:
url: https://github.com/omnibenchmark-example/penguins.git
commit: 9032478
outputs:
- id: data.features
path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
- id: data.labels
path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
- id: distances
modules:
- id: D1
software_environment: "python"
parameters:
- values: ["--measure", "cosine"]
- values: ["--measure", "euclidean"]
- values: ["--measure", "manhattan"]
- values: ["--measure", "chebyshev"]
repository:
url: https://github.com/omnibenchmark-example/distance.git
commit: dd99d4f
inputs:
- entries:
- data.features
outputs:
- id: distances
path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
- id: methods
modules:
- id: kmeans
software_environment: "python"
repository:
url: https://github.com/omnibenchmark-example/kmeans.git
commit: 049c8b1
- id: ward
software_environment: "R"
repository:
url: https://github.com/omnibenchmark-example/ward.git
commit: 976e3f3
inputs:
- entries:
- distances
outputs:
- id: methods.clusters
path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
- id: metrics
modules:
- id: ari
software_environment: "R"
repository:
url: https://github.com/omnibenchmark-example/ari.git
commit: 72708f0
- id: accuracy
software_environment: "R"
repository:
url: https://github.com/omnibenchmark-example/accuracy.git
commit: e26b32f
inputs:
- entries:
- methods.clusters
- data.labels
outputs:
- id: metrics.mapping
path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"