Skip to content

Commit

Permalink
bump dependency versions
Browse files Browse the repository at this point in the history
  • Loading branch information
paulk-asert committed May 21, 2024
1 parent e70b3ba commit 2d707d6
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 14 deletions.
4 changes: 2 additions & 2 deletions subprojects/HousePricesSpark/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ dependencies {
runtimeOnly "org.apache.spark:spark-core_$sparkVariant:$sparkVersion"
}

task copyToLib(type: Copy) {
into "$buildDir/deps"
tasks.register('copyToLib', Copy) {
into layout.buildDirectory.dir('deps')
from configurations.runtimeClasspath
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import org.apache.spark.sql.Row
import static org.apache.spark.sql.SparkSession.builder

static main(args) {

def spark = builder().config('spark.master', 'local[8]').appName('HousePrices').orCreate
spark.sparkContext().logLevel = 'WARN'
def file = HousePricesSpark.classLoader.getResource('kc_house_data.csv').file
int k = 5
Dataset<Row> ds = spark.read().format('csv')
Expand All @@ -39,19 +39,18 @@ static main(args) {
def (training, test) = ds.randomSplit(splits)

String[] colNames = ds.columns().toList() - ['id', 'date', 'price']
def assembler = new VectorAssembler(inputCols: colNames,
outputCol: 'features')
def assembler = new VectorAssembler(inputCols: colNames, outputCol: 'features')
Dataset<Row> dataset = assembler.transform(training)
def lr = new LinearRegression(labelCol: 'price', maxIter: 10)
def model = lr.fit(dataset)
println 'Coefficients:'
println '\nCoefficients:'
println model.coefficients().values()[1..-1]
.collect { sprintf '%.2f', it }.join(', ')
def testSummary = model.evaluate(assembler.transform(test))
printf 'RMSE: %.2f%n', testSummary.rootMeanSquaredError
printf 'r2: %.2f%n', testSummary.r2
printf 'r2: %.2f%n%n', testSummary.r2
spark.sparkContext().logLevel = 'INFO'
spark.stop()

}
/*
41979.78, 80853.89, 0.15, 5412.83, 564343.22, 53834.10, 24817.09, 93195.29, -80662.68, -80694.28, -2713.58, 19.02, -628.67, 594468.23, -228397.19, 21.23, -0.42
Expand Down
2 changes: 1 addition & 1 deletion subprojects/WhiskeySpark/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ apply plugin: 'application'
ext {
appName = 'WhiskeySpark'
sparkVariant = '2.13'
sparkVersion = '3.5.0'
sparkVersion = '3.5.1'
}

application {
Expand Down
10 changes: 5 additions & 5 deletions subprojects/WhiskeySpark/src/main/groovy/WhiskeySpark.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ import org.apache.spark.sql.Row
import static org.apache.spark.sql.SparkSession.builder

static main(args) {

def spark = builder().config('spark.master', 'local[8]').appName('Whiskey').orCreate
spark.sparkContext().logLevel = 'WARN'
def file = WhiskeySpark.classLoader.getResource('whiskey.csv').file
//def file = '/path/to/whiskey.csv'
int k = 5
Dataset<Row> rows = spark.read().format('com.databricks.spark.csv')
.options('header': 'true', 'inferSchema': 'true').load(file)
Expand All @@ -41,10 +40,11 @@ static main(args) {
Dataset<Row> dataset = assembler.transform(rows)
def clusterer = new KMeans(k: k, seed: 1L)
def model = clusterer.fit(dataset)
println 'Cluster centers:'
println '\nCluster centers:'
model.clusterCenters().each { println it.values().collect { sprintf '%.2f', it }.join(', ') }
println()
spark.sparkContext().logLevel = 'INFO'
spark.stop()

}
/*
Cluster centers:
Expand All @@ -53,4 +53,4 @@ Cluster centers:
2.86, 2.38, 1.52, 0.05, 0.00, 1.95, 1.76, 2.05, 1.81, 2.05, 2.19, 1.71
1.53, 2.38, 1.06, 0.16, 0.03, 1.09, 1.00, 0.50, 1.53, 1.75, 2.13, 2.28
3.67, 1.50, 3.67, 3.33, 0.67, 0.17, 1.67, 0.50, 1.17, 1.33, 1.17, 0.17
*/
*/

0 comments on commit 2d707d6

Please sign in to comment.