diff --git a/subprojects/WhiskeyWayang/build.gradle b/subprojects/WhiskeyWayang/build.gradle index 06a27ed..7b80e56 100644 --- a/subprojects/WhiskeyWayang/build.gradle +++ b/subprojects/WhiskeyWayang/build.gradle @@ -18,6 +18,7 @@ apply plugin: 'application' repositories { mavenCentral() + mavenLocal() // maven { // url 'https://repository.apache.org/content/repositories/orgapachewayang-1017' // } @@ -47,6 +48,10 @@ dependencies { implementation "org.apache.groovy:groovy:5.0.0-alpha-8" implementation "org.apache.wayang:wayang-api-scala-java_$scalaMajorVersion:$wayangVersion" implementation "org.apache.wayang:wayang-java:$wayangVersion" + implementation("org.apache.wayang:wayang-ml4all:$wayangVersion") { + exclude(group: 'org.apache.spark', module: 'spark-graphx_2.12') + exclude(group: 'org.apache.spark', module: 'spark-mllib_2.12') + } implementation("org.apache.wayang:wayang-spark_$scalaMajorVersion:$wayangVersion") { transitive = false } diff --git a/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayang.groovy b/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayang.groovy index 237e10d..8c75ec8 100644 --- a/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayang.groovy +++ b/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayang.groovy @@ -30,7 +30,7 @@ import static java.lang.Math.sqrt record Point(double[] pts) implements Serializable { static Point fromLine(String line) { - new Point(line.split(',')[2..-1]*.toDouble() as double[]) } + new Point(line.split(',')[2..-1] as double[]) } } record TaggedPointCounter(double[] pts, int cluster, long count) implements Serializable { @@ -44,7 +44,7 @@ record TaggedPointCounter(double[] pts, int cluster, long count) implements Seri } TaggedPointCounter average() { - new TaggedPointCounter(pts.collect{ double d -> d/count }, cluster, 0) + new TaggedPointCounter(pts.collect{ double d -> d/count }, cluster, count) } } @@ -119,5 +119,14 @@ var finalCentroids = initialCentroids println 'Centroids:' finalCentroids.each { c -> - println "Cluster$c.cluster: ${c.pts.collect{ sprintf('%.3f', it) }.join(', ')}" + var pts = c.pts.collect{ sprintf '%.2f', it }.join(', ') + println "Cluster$c.cluster ($c.count points): $pts" } +/* +Centroids: +Cluster0 (24 points): 2.79, 2.42, 1.46, 0.04, 0.00, 1.88, 1.67, 1.96, 1.92, 2.08, 2.17, 1.71 +Cluster1 (6 points): 3.67, 1.50, 3.67, 3.33, 0.67, 0.17, 1.67, 0.50, 1.17, 1.33, 1.17, 0.17 +Cluster2 (15 points): 1.80, 1.93, 1.93, 1.13, 0.20, 1.20, 1.33, 0.80, 1.60, 1.80, 1.00, 1.13 +Cluster3 (2 points): 2.00, 1.50, 2.50, 0.50, 0.00, 0.00, 2.50, 0.50, 0.00, 1.00, 2.00, 2.00 +Cluster4 (39 points): 1.49, 2.51, 1.05, 0.21, 0.08, 1.10, 1.13, 0.54, 1.26, 1.74, 1.97, 2.13 +*/ diff --git a/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayangML.groovy b/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayangML.groovy new file mode 100644 index 0000000..6cc6e4a --- /dev/null +++ b/subprojects/WhiskeyWayang/src/main/groovy/WhiskeyWayangML.groovy @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.wayang.core.api.WayangContext +import org.apache.wayang.java.Java +import org.apache.wayang.ml4all.abstraction.api.LocalStage +import org.apache.wayang.ml4all.abstraction.api.Transform +import org.apache.wayang.ml4all.abstraction.plan.ML4allModel +import org.apache.wayang.ml4all.abstraction.plan.ML4allPlan +import org.apache.wayang.ml4all.algorithms.kmeans.KMeansCompute +import org.apache.wayang.ml4all.algorithms.kmeans.KMeansConvergeOrMaxIterationsLoop +import org.apache.wayang.ml4all.algorithms.kmeans.KMeansUpdate +//import org.apache.wayang.ml4all.algorithms.kmeans.TransformCSV +import org.apache.wayang.spark.Spark + +int k = 3 +int maxIterations = 100 +double accuracy = 0 + +class TransformCSV extends Transform { + double[] transform(String input) { + input.split(',')[2..-1] as double[] + } +} + +class KMeansStageWithRandoms extends LocalStage { + int k, dimension + private r = new Random() + + void staging(ML4allModel model) { + double[][] centers = new double[k][] + for (i in 0.. + var pts = center.collect { sprintf '%.2f', it }.join(', ') + println "Cluster$idx: $pts" +} + +/* +Cluster0: 1.57, 2.32, 1.32, 0.45, 0.09, 1.08, 1.19, 0.60, 1.26, 1.74, 1.72, 1.85 +Cluster1: 3.43, 1.57, 3.43, 3.14, 0.57, 0.14, 1.71, 0.43, 1.29, 1.43, 1.29, 0.14 +Cluster2: 2.73, 2.42, 1.46, 0.04, 0.04, 1.88, 1.69, 1.88, 1.92, 2.04, 2.12, 1.81 +*/ diff --git a/subprojects/WhiskeyWayang/src/main/resources/whiskey_noheader.csv b/subprojects/WhiskeyWayang/src/main/resources/whiskey_noheader.csv new file mode 100644 index 0000000..23bcbfb --- /dev/null +++ b/subprojects/WhiskeyWayang/src/main/resources/whiskey_noheader.csv @@ -0,0 +1,86 @@ +01,Aberfeldy,2,2,2,0,0,2,1,2,2,2,2,2 +02,Aberlour,3,3,1,0,0,4,3,2,2,3,3,2 +03,AnCnoc,1,3,2,0,0,2,0,0,2,2,3,2 +04,Ardbeg,4,1,4,4,0,0,2,0,1,2,1,0 +05,Ardmore,2,2,2,0,0,1,1,1,2,3,1,1 +06,ArranIsleOf,2,3,1,1,0,1,1,1,0,1,1,2 +07,Auchentoshan,0,2,0,0,0,1,1,0,2,2,3,3 +08,Auchroisk,2,3,1,0,0,2,1,2,2,2,2,1 +09,Aultmore,2,2,1,0,0,1,0,0,2,2,2,2 +10,Balblair,2,3,2,1,0,0,2,0,2,1,2,1 +11,Balmenach,4,3,2,0,0,2,1,3,3,0,1,2 +12,Belvenie,3,2,1,0,0,3,2,1,0,2,2,2 +13,BenNevis,4,2,2,0,0,2,2,0,2,2,2,2 +14,Benriach,2,2,1,0,0,2,2,0,0,2,3,2 +15,Benrinnes,3,2,2,0,0,3,1,1,2,3,2,2 +16,Benromach,2,2,2,0,0,2,2,1,2,2,2,2 +17,Bladnoch,1,2,1,0,0,0,1,1,0,2,2,3 +18,BlairAthol,2,2,2,0,0,1,2,2,2,2,2,2 +19,Bowmore,2,2,3,1,0,2,2,1,1,1,1,2 +20,Bruichladdich,1,1,2,2,0,2,2,1,2,2,2,2 +21,Bunnahabhain,1,2,1,1,0,1,1,1,1,2,2,3 +22,Caol Ila,3,1,4,2,1,0,2,0,2,1,1,1 +23,Cardhu,1,3,1,0,0,1,1,0,2,2,2,2 +24,Clynelish,3,2,3,3,1,0,2,0,1,1,2,0 +25,Craigallechie,2,2,2,0,1,2,2,1,2,2,1,4 +26,Craigganmore,2,3,2,1,0,0,1,0,2,2,2,2 +27,Dailuaine,4,2,2,0,0,1,2,2,2,2,2,1 +28,Dalmore,3,2,2,1,0,1,2,2,1,2,3,1 +29,Dalwhinnie,2,2,2,0,0,2,1,0,1,2,2,2 +30,Deanston,2,2,1,0,0,2,1,1,1,3,2,1 +31,Dufftown,2,3,1,1,0,0,0,0,1,2,2,2 +32,Edradour,2,3,1,0,0,2,1,1,4,2,2,2 +33,GlenDeveronMacduff,2,3,1,1,1,1,1,2,0,2,0,1 +34,GlenElgin,2,3,1,0,0,2,1,1,1,1,2,3 +35,GlenGarioch,2,1,3,0,0,0,3,1,0,2,2,2 +36,GlenGrant,1,2,0,0,0,1,0,1,2,1,2,1 +37,GlenKeith,2,3,1,0,0,1,2,1,2,1,2,1 +38,GlenMoray,1,2,1,0,0,1,2,1,2,2,2,4 +39,GlenOrd,3,2,1,0,0,1,2,1,1,2,2,2 +40,GlenScotia,2,2,2,2,0,1,0,1,2,2,1,1 +41,GlenSpey,1,3,1,0,0,0,1,1,1,2,0,2 +42,Glenallachie,1,3,1,0,0,1,1,0,1,2,2,2 +43,Glendronach,4,2,2,0,0,2,1,4,2,2,2,0 +44,Glendullan,3,2,1,0,0,2,1,2,1,2,3,2 +45,Glenfarclas,2,4,1,0,0,1,2,3,2,3,2,2 +46,Glenfiddich,1,3,1,0,0,0,0,0,0,2,2,2 +47,Glengoyne,1,2,0,0,0,1,1,1,2,2,3,2 +48,Glenkinchie,1,2,1,0,0,1,2,0,0,2,2,2 +49,Glenlivet,2,3,1,0,0,2,2,2,1,2,2,3 +50,Glenlossie,1,2,1,0,0,1,2,0,1,2,2,2 +51,Glenmorangie,2,2,1,1,0,1,2,0,2,1,2,2 +52,Glenrothes,2,3,1,0,0,1,1,2,1,2,2,0 +53,Glenturret,2,3,1,0,0,2,2,2,2,2,1,2 +54,Highland Park,2,2,3,1,0,2,1,1,1,2,1,1 +55,Inchgower,1,3,1,1,0,2,2,0,1,2,1,2 +56,Isle of Jura,2,1,2,2,0,1,1,0,2,1,1,1 +57,Knochando,2,3,1,0,0,2,2,1,2,1,2,2 +58,Lagavulin,4,1,4,4,1,0,1,2,1,1,1,0 +59,Laphroig,4,2,4,4,1,0,0,1,1,1,0,0 +60,Linkwood,2,3,1,0,0,1,1,2,0,1,3,2 +61,Loch Lomond,1,1,1,1,0,1,1,0,1,2,1,2 +62,Longmorn,3,2,1,0,0,1,1,1,3,3,2,3 +63,Macallan,4,3,1,0,0,2,1,4,2,2,3,1 +64,Mannochmore,2,1,1,0,0,1,1,1,2,1,2,2 +65,Miltonduff,2,4,1,0,0,1,0,0,2,1,1,2 +66,Mortlach,3,2,2,0,0,2,3,3,2,1,2,2 +67,Oban,2,2,2,2,0,0,2,0,2,2,2,0 +68,OldFettercairn,1,2,2,0,1,2,2,1,2,3,1,1 +69,OldPulteney,2,1,2,2,1,0,1,1,2,2,2,2 +70,RoyalBrackla,2,3,2,1,1,1,2,1,0,2,3,2 +71,RoyalLochnagar,3,2,2,0,0,2,2,2,2,2,3,1 +72,Scapa,2,2,1,1,0,2,1,1,2,2,2,2 +73,Speyburn,2,4,1,0,0,2,1,0,0,2,1,2 +74,Speyside,2,2,1,0,0,1,0,1,2,2,2,2 +75,Springbank,2,2,2,2,0,2,2,1,2,1,0,1 +76,Strathisla,2,2,1,0,0,2,2,2,3,3,3,2 +77,Strathmill,2,3,1,0,0,0,2,0,2,1,3,2 +78,Talisker,4,2,3,3,0,1,3,0,1,2,2,0 +79,Tamdhu,1,2,1,0,0,2,0,1,1,2,2,2 +80,Tamnavulin,1,3,2,0,0,0,2,0,2,1,2,3 +81,Teaninich,2,2,2,1,0,0,2,0,0,0,2,2 +82,Tobermory,1,1,1,0,0,1,0,0,1,2,2,2 +83,Tomatin,2,3,2,0,0,2,2,1,1,2,0,1 +84,Tomintoul,0,3,1,0,0,2,2,1,1,2,1,2 +85,Tomore,2,2,1,0,0,1,0,1,2,1,0,0 +86,Tullibardine,2,3,0,0,1,0,2,1,1,2,2,1