Skip to content

Commit 5570f81

Browse files
zengruiosyaooqinn
authored andcommitted
[SPARK-34760][EXAMPLES] Replace favorite_color with age in JavaSQLDataSourceExample
### What changes were proposed in this pull request? In JavaSparkSQLExample when excecute 'peopleDF.write().partitionBy("favorite_color").bucketBy(42,"name").saveAsTable("people_partitioned_bucketed");' throws Exception: 'Exception in thread "main" org.apache.spark.sql.AnalysisException: partition column favorite_color is not defined in table people_partitioned_bucketed, defined table columns are: age, name;' Change the column favorite_color to age. ### Why are the changes needed? Run JavaSparkSQLExample successfully. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? test in JavaSparkSQLExample . Closes apache#31851 from zengruios/SPARK-34760. Authored-by: zengruios <[email protected]> Signed-off-by: Kent Yao <[email protected]>
1 parent 2e836cd commit 5570f81

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,15 @@ private static void runBasicDataSourceExample(SparkSession spark) {
204204
.save("namesPartByColor.parquet");
205205
// $example off:write_partitioning$
206206
// $example on:write_partition_and_bucket$
207-
peopleDF
207+
usersDF
208208
.write()
209209
.partitionBy("favorite_color")
210210
.bucketBy(42, "name")
211-
.saveAsTable("people_partitioned_bucketed");
211+
.saveAsTable("users_partitioned_bucketed");
212212
// $example off:write_partition_and_bucket$
213213

214214
spark.sql("DROP TABLE IF EXISTS people_bucketed");
215-
spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed");
215+
spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed");
216216
}
217217

218218
private static void runBasicParquetExample(SparkSession spark) {

examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java

+8-8
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public class JavaSparkSQLExample {
6565
// $example on:create_ds$
6666
public static class Person implements Serializable {
6767
private String name;
68-
private int age;
68+
private long age;
6969

7070
public String getName() {
7171
return name;
@@ -75,11 +75,11 @@ public void setName(String name) {
7575
this.name = name;
7676
}
7777

78-
public int getAge() {
78+
public long getAge() {
7979
return age;
8080
}
8181

82-
public void setAge(int age) {
82+
public void setAge(long age) {
8383
this.age = age;
8484
}
8585
}
@@ -225,11 +225,11 @@ private static void runDatasetCreationExample(SparkSession spark) {
225225
// +---+----+
226226

227227
// Encoders for most common types are provided in class Encoders
228-
Encoder<Integer> integerEncoder = Encoders.INT();
229-
Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder);
230-
Dataset<Integer> transformedDS = primitiveDS.map(
231-
(MapFunction<Integer, Integer>) value -> value + 1,
232-
integerEncoder);
228+
Encoder<Long> longEncoder = Encoders.LONG();
229+
Dataset<Long> primitiveDS = spark.createDataset(Arrays.asList(1L, 2L, 3L), longEncoder);
230+
Dataset<Long> transformedDS = primitiveDS.map(
231+
(MapFunction<Long, Long>) value -> value + 1L,
232+
longEncoder);
233233
transformedDS.collect(); // Returns [2, 3, 4]
234234

235235
// DataFrames can be converted to a Dataset by providing a class. Mapping based on name

examples/src/main/python/sql/datasource.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def basic_datasource_example(spark):
104104
.write
105105
.partitionBy("favorite_color")
106106
.bucketBy(42, "name")
107-
.saveAsTable("people_partitioned_bucketed"))
107+
.saveAsTable("users_partitioned_bucketed"))
108108
# $example off:write_partition_and_bucket$
109109

110110
# $example on:manual_load_options$
@@ -135,7 +135,7 @@ def basic_datasource_example(spark):
135135
# $example off:direct_sql$
136136

137137
spark.sql("DROP TABLE IF EXISTS people_bucketed")
138-
spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
138+
spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")
139139

140140

141141
def parquet_example(spark):

0 commit comments

Comments
 (0)