Add a better schema mismatch error message

mrpowers-io · Mar 21, 2019 · 5db7e0a · 5db7e0a
1 parent 446c83a
commit 5db7e0a
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -1,10 +1,10 @@
 # spark-fast-tests
 
-A fast, test framework independent Apache Spark testing helper library with beautifully formatted error messages!
+A fast Apache Spark testing helper library with beautifully formatted error messages!  Works with scalatest and uTest.
 
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/ab42211c18984740bee7f87c631a8f42)](https://www.codacy.com/app/MrPowers/spark-fast-tests?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=MrPowers/spark-fast-tests&amp;utm_campaign=Badge_Grade)
 
-For example, the `assertSmallDatasetEquality` method can be used to compare two Datasets (or two DataFrames).
+The `assertSmallDatasetEquality` method can be used to compare two Datasets (or two DataFrames).
 
 ```scala
 val sourceDF = Seq(
@@ -47,8 +47,8 @@ The `DatasetComparer` has `assertSmallDatasetEquality` and `assertLargeDatasetEq
 
 If you only need to compare DataFrames, you can use `DataFrameComparer` with the associated `assertSmallDataFrameEquality` and `assertLargeDataFrameEquality` methods.  Under the hood, `DataFrameComparer` uses the `assertSmallDatasetEquality` and `assertLargeDatasetEquality`.
 
-*Note : comparing Datasets can be tricky since some column names might be given by spark when applying transformations. 
-Would this happen, use the `ignoreColumnNames` boolean to skip name verification.*
+*Note : comparing Datasets can be tricky since some column names might be given by Spark when applying transformations. 
+Use the `ignoreColumnNames` boolean to skip name verification.*
 
 ## Setup
 

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala
@@ -16,6 +16,7 @@ object DatasetComparerLike {
   def naiveEquality[T](o1: T, o2: T): Boolean = {
     o1.equals(o2)
   }
+
 }
 
 trait DatasetComparer {
@@ -29,6 +30,24 @@ ${expectedDS.schema}
 """
   }
 
+  private def betterSchemaMismatchMessage[T](actualDS: Dataset[T], expectedDS: Dataset[T]): String = {
+    "\n" + actualDS.schema
+      .zipAll(
+        expectedDS.schema,
+        "",
+        ""
+      )
+      .map {
+        case (sf1, sf2) =>
+          if (sf1.equals(sf2)) {
+            ufansi.Color.Blue(s"$sf1 | $sf2")
+          } else {
+            ufansi.Color.Red(s"$sf1 | $sf2")
+          }
+      }
+      .mkString("\n")
+  }
+
   private def countMismatchMessage(actualCount: Long, expectedCount: Long): String = {
     s"""
 Actual DataFrame Row Count: '${actualCount}'
@@ -80,7 +99,7 @@ ${DataFramePrettyPrint.showString(
           ignoreColumnNames
         )) {
       throw DatasetSchemaMismatch(
-        schemaMismatchMessage(
+        betterSchemaMismatchMessage(
           actualDS,
           expectedDS
         )
@@ -123,7 +142,7 @@ ${DataFramePrettyPrint.showString(
   def assertLargeDatasetEquality[T: ClassTag](actualDS: Dataset[T], expectedDS: Dataset[T], equals: (T, T) => Boolean = naiveEquality _): Unit = {
     if (!actualDS.schema.equals(expectedDS.schema)) {
       throw DatasetSchemaMismatch(
-        schemaMismatchMessage(
+        betterSchemaMismatchMessage(
           actualDS,
           expectedDS
         )

diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala
@@ -37,6 +37,37 @@
 //
 //      }
 //
+//      "error when schemas don't match" - {
+//
+//        val sourceDF = spark.createDF(
+//          List(
+//            (1, "a"),
+//            (5, "b")
+//          ),
+//          List(
+//            ("number", IntegerType, true),
+//            ("letter", StringType, true)
+//          )
+//        )
+//
+//        val expectedDF = spark.createDF(
+//          List(
+//            (1, "a"),
+//            (5, "b")
+//          ),
+//          List(
+//            ("num", IntegerType, true),
+//            ("letter", StringType, true)
+//          )
+//        )
+//
+//        assertSmallDatasetEquality(
+//          sourceDF,
+//          expectedDF
+//        )
+//
+//      }
+//
 //    }
 //
 //  }