[SPARK-34760][EXAMPLES] Replace favorite_color with age in JavaSQLDataSourceExample

zengruios · yaooqinn · commit 5570f817b286 · 2021-03-18T22:53:58.000+08:00
### What changes were proposed in this pull request? In JavaSparkSQLExample when excecute 'peopleDF.write().partitionBy("favorite_color").bucketBy(42,"name").saveAsTable("people_partitioned_bucketed");' throws Exception: 'Exception in thread "main" org.apache.spark.sql.AnalysisException: partition column favorite_color is not defined in table people_partitioned_bucketed, defined table columns are: age, name;' Change the column favorite_color to age. ### Why are the changes needed? Run JavaSparkSQLExample successfully. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? test in JavaSparkSQLExample . Closes apache#31851 from zengruios/SPARK-34760. Authored-by: zengruios <578395184@qq.com> Signed-off-by: Kent Yao <yao@apache.org>
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -204,15 +204,15 @@ private static void runBasicDataSourceExample(SparkSession spark) {
       .save("namesPartByColor.parquet");
     // $example off:write_partitioning$
     // $example on:write_partition_and_bucket$
-    peopleDF
+    usersDF
       .write()
       .partitionBy("favorite_color")
       .bucketBy(42, "name")
-      .saveAsTable("people_partitioned_bucketed");
+      .saveAsTable("users_partitioned_bucketed");
     // $example off:write_partition_and_bucket$
 
     spark.sql("DROP TABLE IF EXISTS people_bucketed");
-    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed");
+    spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed");
   }
 
   private static void runBasicParquetExample(SparkSession spark) {
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java
@@ -65,7 +65,7 @@ public class JavaSparkSQLExample {
   // $example on:create_ds$
   public static class Person implements Serializable {
     private String name;
-    private int age;
+    private long age;
 
     public String getName() {
       return name;
@@ -75,11 +75,11 @@ public void setName(String name) {
       this.name = name;
     }
 
-    public int getAge() {
+    public long getAge() {
       return age;
     }
 
-    public void setAge(int age) {
+    public void setAge(long age) {
       this.age = age;
     }
   }
@@ -225,11 +225,11 @@ private static void runDatasetCreationExample(SparkSession spark) {
     // +---+----+
 
     // Encoders for most common types are provided in class Encoders
-    Encoder<Integer> integerEncoder = Encoders.INT();
-    Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder);
-    Dataset<Integer> transformedDS = primitiveDS.map(
-        (MapFunction<Integer, Integer>) value -> value + 1,
-        integerEncoder);
+    Encoder<Long> longEncoder = Encoders.LONG();
+    Dataset<Long> primitiveDS = spark.createDataset(Arrays.asList(1L, 2L, 3L), longEncoder);
+    Dataset<Long> transformedDS = primitiveDS.map(
+        (MapFunction<Long, Long>) value -> value + 1L,
+        longEncoder);
     transformedDS.collect(); // Returns [2, 3, 4]
 
     // DataFrames can be converted to a Dataset by providing a class. Mapping based on name
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
@@ -104,7 +104,7 @@ def basic_datasource_example(spark):
         .write
         .partitionBy("favorite_color")
         .bucketBy(42, "name")
-        .saveAsTable("people_partitioned_bucketed"))
+        .saveAsTable("users_partitioned_bucketed"))
     # $example off:write_partition_and_bucket$
 
     # $example on:manual_load_options$
@@ -135,7 +135,7 @@ def basic_datasource_example(spark):
     # $example off:direct_sql$
 
     spark.sql("DROP TABLE IF EXISTS people_bucketed")
-    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
+    spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")
 
 
 def parquet_example(spark):