Skip to content

Commit

Permalink
add Wayang ML4all example
Browse files Browse the repository at this point in the history
  • Loading branch information
paulk-asert committed May 23, 2024
1 parent 95c67be commit 1a24aed
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 3 deletions.
5 changes: 5 additions & 0 deletions subprojects/WhiskeyWayang/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ apply plugin: 'application'

repositories {
mavenCentral()
mavenLocal()
// maven {
// url 'https://repository.apache.org/content/repositories/orgapachewayang-1017'
// }
Expand Down Expand Up @@ -47,6 +48,10 @@ dependencies {
implementation "org.apache.groovy:groovy:5.0.0-alpha-8"
implementation "org.apache.wayang:wayang-api-scala-java_$scalaMajorVersion:$wayangVersion"
implementation "org.apache.wayang:wayang-java:$wayangVersion"
implementation("org.apache.wayang:wayang-ml4all:$wayangVersion") {
exclude(group: 'org.apache.spark', module: 'spark-graphx_2.12')
exclude(group: 'org.apache.spark', module: 'spark-mllib_2.12')
}
implementation("org.apache.wayang:wayang-spark_$scalaMajorVersion:$wayangVersion") {
transitive = false
}
Expand Down
15 changes: 12 additions & 3 deletions subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayang.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import static java.lang.Math.sqrt

record Point(double[] pts) implements Serializable {
static Point fromLine(String line) {
new Point(line.split(',')[2..-1]*.toDouble() as double[]) }
new Point(line.split(',')[2..-1] as double[]) }
}

record TaggedPointCounter(double[] pts, int cluster, long count) implements Serializable {
Expand All @@ -44,7 +44,7 @@ record TaggedPointCounter(double[] pts, int cluster, long count) implements Seri
}

TaggedPointCounter average() {
new TaggedPointCounter(pts.collect{ double d -> d/count }, cluster, 0)
new TaggedPointCounter(pts.collect{ double d -> d/count }, cluster, count)
}
}

Expand Down Expand Up @@ -119,5 +119,14 @@ var finalCentroids = initialCentroids

println 'Centroids:'
finalCentroids.each { c ->
println "Cluster$c.cluster: ${c.pts.collect{ sprintf('%.3f', it) }.join(', ')}"
var pts = c.pts.collect{ sprintf '%.2f', it }.join(', ')
println "Cluster$c.cluster ($c.count points): $pts"
}
/*
Centroids:
Cluster0 (24 points): 2.79, 2.42, 1.46, 0.04, 0.00, 1.88, 1.67, 1.96, 1.92, 2.08, 2.17, 1.71
Cluster1 (6 points): 3.67, 1.50, 3.67, 3.33, 0.67, 0.17, 1.67, 0.50, 1.17, 1.33, 1.17, 0.17
Cluster2 (15 points): 1.80, 1.93, 1.93, 1.13, 0.20, 1.20, 1.33, 0.80, 1.60, 1.80, 1.00, 1.13
Cluster3 (2 points): 2.00, 1.50, 2.50, 0.50, 0.00, 0.00, 2.50, 0.50, 0.00, 1.00, 2.00, 2.00
Cluster4 (39 points): 1.49, 2.51, 1.05, 0.21, 0.08, 1.10, 1.13, 0.54, 1.26, 1.74, 1.97, 2.13
*/
76 changes: 76 additions & 0 deletions subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayangML.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import org.apache.wayang.core.api.WayangContext
import org.apache.wayang.java.Java
import org.apache.wayang.ml4all.abstraction.api.LocalStage
import org.apache.wayang.ml4all.abstraction.api.Transform
import org.apache.wayang.ml4all.abstraction.plan.ML4allModel
import org.apache.wayang.ml4all.abstraction.plan.ML4allPlan
import org.apache.wayang.ml4all.algorithms.kmeans.KMeansCompute
import org.apache.wayang.ml4all.algorithms.kmeans.KMeansConvergeOrMaxIterationsLoop
import org.apache.wayang.ml4all.algorithms.kmeans.KMeansUpdate
//import org.apache.wayang.ml4all.algorithms.kmeans.TransformCSV
import org.apache.wayang.spark.Spark

int k = 3
int maxIterations = 100
double accuracy = 0

class TransformCSV extends Transform<double[], String> {
double[] transform(String input) {
input.split(',')[2..-1] as double[]
}
}

class KMeansStageWithRandoms extends LocalStage {
int k, dimension
private r = new Random()

void staging(ML4allModel model) {
double[][] centers = new double[k][]
for (i in 0..<k) {
centers[i] = (0..<dimension).collect { r.nextGaussian() + 2 } as double[]
}
model.put('centers', centers)
}
}

var url = WhiskeyWayangML.classLoader.getResource('whiskey_noheader.csv').path
var dims = 12
var context = new WayangContext()
.withPlugin(Spark.basicPlugin())
.withPlugin(Java.basicPlugin())

var plan = new ML4allPlan(
transformOp: new TransformCSV(),
localStage: new KMeansStageWithRandoms(k: k, dimension: dims),
computeOp: new KMeansCompute(),
updateOp: new KMeansUpdate(),
loopOp: new KMeansConvergeOrMaxIterationsLoop(accuracy, maxIterations)
)

var model = plan.execute('file:' + url, context)
model.getByKey("centers").eachWithIndex { center, idx ->
var pts = center.collect { sprintf '%.2f', it }.join(', ')
println "Cluster$idx: $pts"
}

/*
Cluster0: 1.57, 2.32, 1.32, 0.45, 0.09, 1.08, 1.19, 0.60, 1.26, 1.74, 1.72, 1.85
Cluster1: 3.43, 1.57, 3.43, 3.14, 0.57, 0.14, 1.71, 0.43, 1.29, 1.43, 1.29, 0.14
Cluster2: 2.73, 2.42, 1.46, 0.04, 0.04, 1.88, 1.69, 1.88, 1.92, 2.04, 2.12, 1.81
*/
86 changes: 86 additions & 0 deletions subprojects/WhiskeyWayang/src/main/resources/whiskey_noheader.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
01,Aberfeldy,2,2,2,0,0,2,1,2,2,2,2,2
02,Aberlour,3,3,1,0,0,4,3,2,2,3,3,2
03,AnCnoc,1,3,2,0,0,2,0,0,2,2,3,2
04,Ardbeg,4,1,4,4,0,0,2,0,1,2,1,0
05,Ardmore,2,2,2,0,0,1,1,1,2,3,1,1
06,ArranIsleOf,2,3,1,1,0,1,1,1,0,1,1,2
07,Auchentoshan,0,2,0,0,0,1,1,0,2,2,3,3
08,Auchroisk,2,3,1,0,0,2,1,2,2,2,2,1
09,Aultmore,2,2,1,0,0,1,0,0,2,2,2,2
10,Balblair,2,3,2,1,0,0,2,0,2,1,2,1
11,Balmenach,4,3,2,0,0,2,1,3,3,0,1,2
12,Belvenie,3,2,1,0,0,3,2,1,0,2,2,2
13,BenNevis,4,2,2,0,0,2,2,0,2,2,2,2
14,Benriach,2,2,1,0,0,2,2,0,0,2,3,2
15,Benrinnes,3,2,2,0,0,3,1,1,2,3,2,2
16,Benromach,2,2,2,0,0,2,2,1,2,2,2,2
17,Bladnoch,1,2,1,0,0,0,1,1,0,2,2,3
18,BlairAthol,2,2,2,0,0,1,2,2,2,2,2,2
19,Bowmore,2,2,3,1,0,2,2,1,1,1,1,2
20,Bruichladdich,1,1,2,2,0,2,2,1,2,2,2,2
21,Bunnahabhain,1,2,1,1,0,1,1,1,1,2,2,3
22,Caol Ila,3,1,4,2,1,0,2,0,2,1,1,1
23,Cardhu,1,3,1,0,0,1,1,0,2,2,2,2
24,Clynelish,3,2,3,3,1,0,2,0,1,1,2,0
25,Craigallechie,2,2,2,0,1,2,2,1,2,2,1,4
26,Craigganmore,2,3,2,1,0,0,1,0,2,2,2,2
27,Dailuaine,4,2,2,0,0,1,2,2,2,2,2,1
28,Dalmore,3,2,2,1,0,1,2,2,1,2,3,1
29,Dalwhinnie,2,2,2,0,0,2,1,0,1,2,2,2
30,Deanston,2,2,1,0,0,2,1,1,1,3,2,1
31,Dufftown,2,3,1,1,0,0,0,0,1,2,2,2
32,Edradour,2,3,1,0,0,2,1,1,4,2,2,2
33,GlenDeveronMacduff,2,3,1,1,1,1,1,2,0,2,0,1
34,GlenElgin,2,3,1,0,0,2,1,1,1,1,2,3
35,GlenGarioch,2,1,3,0,0,0,3,1,0,2,2,2
36,GlenGrant,1,2,0,0,0,1,0,1,2,1,2,1
37,GlenKeith,2,3,1,0,0,1,2,1,2,1,2,1
38,GlenMoray,1,2,1,0,0,1,2,1,2,2,2,4
39,GlenOrd,3,2,1,0,0,1,2,1,1,2,2,2
40,GlenScotia,2,2,2,2,0,1,0,1,2,2,1,1
41,GlenSpey,1,3,1,0,0,0,1,1,1,2,0,2
42,Glenallachie,1,3,1,0,0,1,1,0,1,2,2,2
43,Glendronach,4,2,2,0,0,2,1,4,2,2,2,0
44,Glendullan,3,2,1,0,0,2,1,2,1,2,3,2
45,Glenfarclas,2,4,1,0,0,1,2,3,2,3,2,2
46,Glenfiddich,1,3,1,0,0,0,0,0,0,2,2,2
47,Glengoyne,1,2,0,0,0,1,1,1,2,2,3,2
48,Glenkinchie,1,2,1,0,0,1,2,0,0,2,2,2
49,Glenlivet,2,3,1,0,0,2,2,2,1,2,2,3
50,Glenlossie,1,2,1,0,0,1,2,0,1,2,2,2
51,Glenmorangie,2,2,1,1,0,1,2,0,2,1,2,2
52,Glenrothes,2,3,1,0,0,1,1,2,1,2,2,0
53,Glenturret,2,3,1,0,0,2,2,2,2,2,1,2
54,Highland Park,2,2,3,1,0,2,1,1,1,2,1,1
55,Inchgower,1,3,1,1,0,2,2,0,1,2,1,2
56,Isle of Jura,2,1,2,2,0,1,1,0,2,1,1,1
57,Knochando,2,3,1,0,0,2,2,1,2,1,2,2
58,Lagavulin,4,1,4,4,1,0,1,2,1,1,1,0
59,Laphroig,4,2,4,4,1,0,0,1,1,1,0,0
60,Linkwood,2,3,1,0,0,1,1,2,0,1,3,2
61,Loch Lomond,1,1,1,1,0,1,1,0,1,2,1,2
62,Longmorn,3,2,1,0,0,1,1,1,3,3,2,3
63,Macallan,4,3,1,0,0,2,1,4,2,2,3,1
64,Mannochmore,2,1,1,0,0,1,1,1,2,1,2,2
65,Miltonduff,2,4,1,0,0,1,0,0,2,1,1,2
66,Mortlach,3,2,2,0,0,2,3,3,2,1,2,2
67,Oban,2,2,2,2,0,0,2,0,2,2,2,0
68,OldFettercairn,1,2,2,0,1,2,2,1,2,3,1,1
69,OldPulteney,2,1,2,2,1,0,1,1,2,2,2,2
70,RoyalBrackla,2,3,2,1,1,1,2,1,0,2,3,2
71,RoyalLochnagar,3,2,2,0,0,2,2,2,2,2,3,1
72,Scapa,2,2,1,1,0,2,1,1,2,2,2,2
73,Speyburn,2,4,1,0,0,2,1,0,0,2,1,2
74,Speyside,2,2,1,0,0,1,0,1,2,2,2,2
75,Springbank,2,2,2,2,0,2,2,1,2,1,0,1
76,Strathisla,2,2,1,0,0,2,2,2,3,3,3,2
77,Strathmill,2,3,1,0,0,0,2,0,2,1,3,2
78,Talisker,4,2,3,3,0,1,3,0,1,2,2,0
79,Tamdhu,1,2,1,0,0,2,0,1,1,2,2,2
80,Tamnavulin,1,3,2,0,0,0,2,0,2,1,2,3
81,Teaninich,2,2,2,1,0,0,2,0,0,0,2,2
82,Tobermory,1,1,1,0,0,1,0,0,1,2,2,2
83,Tomatin,2,3,2,0,0,2,2,1,1,2,0,1
84,Tomintoul,0,3,1,0,0,2,2,1,1,2,1,2
85,Tomore,2,2,1,0,0,1,0,1,2,1,0,0
86,Tullibardine,2,3,0,0,1,0,2,1,1,2,2,1

0 comments on commit 1a24aed

Please sign in to comment.