Skip to content

Latest commit

 

History

History
480 lines (480 loc) · 76.3 KB

sql-expression-schema.md

File metadata and controls

480 lines (480 loc) · 76.3 KB

Schema of Built-in Functions

Class name Function name or alias Query example Output schema
org.apache.spark.sql.catalyst.expressions.Abs abs SELECT abs(-1) struct<abs(-1):int>
org.apache.spark.sql.catalyst.expressions.Acos acos SELECT acos(1) struct<ACOS(1):double>
org.apache.spark.sql.catalyst.expressions.Acosh acosh SELECT acosh(1) struct<ACOSH(1):double>
org.apache.spark.sql.catalyst.expressions.Add + SELECT 1 + 2 struct<(1 + 2):int>
org.apache.spark.sql.catalyst.expressions.AddMonths add_months SELECT add_months('2016-08-31', 1) struct<add_months(2016-08-31, 1):date>
org.apache.spark.sql.catalyst.expressions.AesDecrypt aes_decrypt SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT, ):binary>
org.apache.spark.sql.catalyst.expressions.AesEncrypt aes_encrypt SELECT hex(aes_encrypt('Spark', '0000111122223333')) struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT, , )):string>
org.apache.spark.sql.catalyst.expressions.And and SELECT true and true struct<(true AND true):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayAggregate aggregate SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int>
org.apache.spark.sql.catalyst.expressions.ArrayAggregate reduce SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x) struct<reduce(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int>
org.apache.spark.sql.catalyst.expressions.ArrayAppend array_append SELECT array_append(array('b', 'd', 'c', 'a'), 'd') struct<array_append(array(b, d, c, a), d):array>
org.apache.spark.sql.catalyst.expressions.ArrayCompact array_compact SELECT array_compact(array(1, 2, 3, null)) struct<array_compact(array(1, 2, 3, NULL)):array>
org.apache.spark.sql.catalyst.expressions.ArrayContains array_contains SELECT array_contains(array(1, 2, 3), 2) struct<array_contains(array(1, 2, 3), 2):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayDistinct array_distinct SELECT array_distinct(array(1, 2, 3, null, 3)) struct<array_distinct(array(1, 2, 3, NULL, 3)):array>
org.apache.spark.sql.catalyst.expressions.ArrayExcept array_except SELECT array_except(array(1, 2, 3), array(1, 3, 5)) struct<array_except(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArrayExists exists SELECT exists(array(1, 2, 3), x -> x % 2 == 0) struct<exists(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayFilter filter SELECT filter(array(1, 2, 3), x -> x % 2 == 1) struct<filter(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 1), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayForAll forall SELECT forall(array(1, 2, 3), x -> x % 2 == 0) struct<forall(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean>
org.apache.spark.sql.catalyst.expressions.ArrayInsert array_insert SELECT array_insert(array(1, 2, 3, 4), 5, 5) struct<array_insert(array(1, 2, 3, 4), 5, 5):array>
org.apache.spark.sql.catalyst.expressions.ArrayIntersect array_intersect SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) struct<array_intersect(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArrayJoin array_join SELECT array_join(array('hello', 'world'), ' ') struct<array_join(array(hello, world), ):string>
org.apache.spark.sql.catalyst.expressions.ArrayMax array_max SELECT array_max(array(1, 20, null, 3)) struct<array_max(array(1, 20, NULL, 3)):int>
org.apache.spark.sql.catalyst.expressions.ArrayMin array_min SELECT array_min(array(1, 20, null, 3)) struct<array_min(array(1, 20, NULL, 3)):int>
org.apache.spark.sql.catalyst.expressions.ArrayPosition array_position SELECT array_position(array(312, 773, 708, 708), 708) struct<array_position(array(312, 773, 708, 708), 708):bigint>
org.apache.spark.sql.catalyst.expressions.ArrayPrepend array_prepend SELECT array_prepend(array('b', 'd', 'c', 'a'), 'd') struct<array_prepend(array(b, d, c, a), d):array>
org.apache.spark.sql.catalyst.expressions.ArrayRemove array_remove SELECT array_remove(array(1, 2, 3, null, 3), 3) struct<array_remove(array(1, 2, 3, NULL, 3), 3):array>
org.apache.spark.sql.catalyst.expressions.ArrayRepeat array_repeat SELECT array_repeat('123', 2) struct<array_repeat(123, 2):array>
org.apache.spark.sql.catalyst.expressions.ArraySize array_size SELECT array_size(array('b', 'd', 'c', 'a')) struct<array_size(array(b, d, c, a)):int>
org.apache.spark.sql.catalyst.expressions.ArraySort array_sort SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) struct<array_sort(array(5, 6, 1), lambdafunction(CASE WHEN (namedlambdavariable() < namedlambdavariable()) THEN -1 WHEN (namedlambdavariable() > namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayTransform transform SELECT transform(array(1, 2, 3), x -> x + 1) struct<transform(array(1, 2, 3), lambdafunction((namedlambdavariable() + 1), namedlambdavariable())):array>
org.apache.spark.sql.catalyst.expressions.ArrayUnion array_union SELECT array_union(array(1, 2, 3), array(1, 3, 5)) struct<array_union(array(1, 2, 3), array(1, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.ArraysOverlap arrays_overlap SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) struct<arrays_overlap(array(1, 2, 3), array(3, 4, 5)):boolean>
org.apache.spark.sql.catalyst.expressions.ArraysZip arrays_zip SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) struct<arrays_zip(array(1, 2, 3), array(2, 3, 4)):array<struct<0:int,1:int>>>
org.apache.spark.sql.catalyst.expressions.Ascii ascii SELECT ascii('222') struct<ascii(222):int>
org.apache.spark.sql.catalyst.expressions.Asin asin SELECT asin(0) struct<ASIN(0):double>
org.apache.spark.sql.catalyst.expressions.Asinh asinh SELECT asinh(0) struct<ASINH(0):double>
org.apache.spark.sql.catalyst.expressions.AssertTrue assert_true SELECT assert_true(0 < 1) struct<assert_true((0 < 1), '(0 < 1)' is not true!):void>
org.apache.spark.sql.catalyst.expressions.Atan atan SELECT atan(0) struct<ATAN(0):double>
org.apache.spark.sql.catalyst.expressions.Atan2 atan2 SELECT atan2(0, 0) struct<ATAN2(0, 0):double>
org.apache.spark.sql.catalyst.expressions.Atanh atanh SELECT atanh(0) struct<ATANH(0):double>
org.apache.spark.sql.catalyst.expressions.BRound bround SELECT bround(2.5, 0) struct<bround(2.5, 0):decimal(2,0)>
org.apache.spark.sql.catalyst.expressions.Base64 base64 SELECT base64('Spark SQL') struct<base64(Spark SQL):string>
org.apache.spark.sql.catalyst.expressions.Between between SELECT 0.5 between 0.1 AND 1.0 struct<between(0.5, 0.1, 1.0):boolean>
org.apache.spark.sql.catalyst.expressions.Bin bin SELECT bin(13) struct<bin(13):string>
org.apache.spark.sql.catalyst.expressions.BitLength bit_length SELECT bit_length('Spark SQL') struct<bit_length(Spark SQL):int>
org.apache.spark.sql.catalyst.expressions.BitmapBitPosition bitmap_bit_position SELECT bitmap_bit_position(1) struct<bitmap_bit_position(1):bigint>
org.apache.spark.sql.catalyst.expressions.BitmapBucketNumber bitmap_bucket_number SELECT bitmap_bucket_number(123) struct<bitmap_bucket_number(123):bigint>
org.apache.spark.sql.catalyst.expressions.BitmapConstructAgg bitmap_construct_agg SELECT substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6) FROM VALUES (1), (2), (3) AS tab(col) struct<substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6):string>
org.apache.spark.sql.catalyst.expressions.BitmapCount bitmap_count SELECT bitmap_count(X '1010') struct<bitmap_count(X'1010'):bigint>
org.apache.spark.sql.catalyst.expressions.BitmapOrAgg bitmap_or_agg SELECT substring(hex(bitmap_or_agg(col)), 0, 6) FROM VALUES (X '10'), (X '20'), (X '40') AS tab(col) struct<substring(hex(bitmap_or_agg(col)), 0, 6):string>
org.apache.spark.sql.catalyst.expressions.BitwiseAnd & SELECT 3 & 5 struct<(3 & 5):int>
org.apache.spark.sql.catalyst.expressions.BitwiseCount bit_count SELECT bit_count(0) struct<bit_count(0):int>
org.apache.spark.sql.catalyst.expressions.BitwiseGet bit_get SELECT bit_get(11, 0) struct<bit_get(11, 0):tinyint>
org.apache.spark.sql.catalyst.expressions.BitwiseGet getbit SELECT getbit(11, 0) struct<getbit(11, 0):tinyint>
org.apache.spark.sql.catalyst.expressions.BitwiseNot ~ SELECT ~ 0 struct<~0:int>
org.apache.spark.sql.catalyst.expressions.BitwiseOr | SELECT 3 | 5 struct<(3 | 5):int>
org.apache.spark.sql.catalyst.expressions.BitwiseXor ^ SELECT 3 ^ 5 struct<(3 ^ 5):int>
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection java_method SELECT java_method('java.util.UUID', 'randomUUID') struct<java_method(java.util.UUID, randomUUID):string>
org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection reflect SELECT reflect('java.util.UUID', 'randomUUID') struct<reflect(java.util.UUID, randomUUID):string>
org.apache.spark.sql.catalyst.expressions.CaseWhen when SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END struct<CASE WHEN (1 > 0) THEN 1 WHEN (2 > 0) THEN 2.0 ELSE 1.2 END:decimal(11,1)>
org.apache.spark.sql.catalyst.expressions.Cast bigint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast binary N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast boolean N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast cast SELECT cast('10' as int) struct<CAST(10 AS INT):int>
org.apache.spark.sql.catalyst.expressions.Cast date N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast decimal N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast double N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast float N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast int N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast smallint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast string N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast timestamp N/A N/A
org.apache.spark.sql.catalyst.expressions.Cast tinyint N/A N/A
org.apache.spark.sql.catalyst.expressions.Cbrt cbrt SELECT cbrt(27.0) struct<CBRT(27.0):double>
org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder ceil SELECT ceil(-0.1) struct<CEIL(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder ceiling SELECT ceiling(-0.1) struct<ceiling(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.Chr char SELECT char(65) struct<char(65):string>
org.apache.spark.sql.catalyst.expressions.Chr chr SELECT chr(65) struct<chr(65):string>
org.apache.spark.sql.catalyst.expressions.Coalesce coalesce SELECT coalesce(NULL, 1, NULL) struct<coalesce(NULL, 1, NULL):int>
org.apache.spark.sql.catalyst.expressions.CollateExpressionBuilder collate SELECT COLLATION('Spark SQL' collate UTF8_LCASE) struct<collation(collate(Spark SQL, UTF8_LCASE)):string>
org.apache.spark.sql.catalyst.expressions.Collation collation SELECT collation('Spark SQL') struct<collation(Spark SQL):string>
org.apache.spark.sql.catalyst.expressions.Concat concat SELECT concat('Spark', 'SQL') struct<concat(Spark, SQL):string>
org.apache.spark.sql.catalyst.expressions.ConcatWs concat_ws SELECT concat_ws(' ', 'Spark', 'SQL') struct<concat_ws( , Spark, SQL):string>
org.apache.spark.sql.catalyst.expressions.ContainsExpressionBuilder contains SELECT contains('Spark SQL', 'Spark') struct<contains(Spark SQL, Spark):boolean>
org.apache.spark.sql.catalyst.expressions.Conv conv SELECT conv('100', 2, 10) struct<conv(100, 2, 10):string>
org.apache.spark.sql.catalyst.expressions.ConvertTimezone convert_timezone SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00') struct<convert_timezone(Europe/Brussels, America/Los_Angeles, TIMESTAMP_NTZ '2021-12-06 00:00:00'):timestamp_ntz>
org.apache.spark.sql.catalyst.expressions.Cos cos SELECT cos(0) struct<COS(0):double>
org.apache.spark.sql.catalyst.expressions.Cosh cosh SELECT cosh(0) struct<COSH(0):double>
org.apache.spark.sql.catalyst.expressions.Cot cot SELECT cot(1) struct<COT(1):double>
org.apache.spark.sql.catalyst.expressions.Crc32 crc32 SELECT crc32('Spark') struct<crc32(Spark):bigint>
org.apache.spark.sql.catalyst.expressions.CreateArray array SELECT array(1, 2, 3) struct<array(1, 2, 3):array>
org.apache.spark.sql.catalyst.expressions.CreateMap map SELECT map(1.0, '2', 3.0, '4') struct<map(1.0, 2, 3.0, 4):map<decimal(2,1),string>>
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct named_struct SELECT named_struct("a", 1, "b", 2, "c", 3) struct<named_struct(a, 1, b, 2, c, 3):struct<a:int,b:int,c:int>>
org.apache.spark.sql.catalyst.expressions.CreateNamedStruct struct SELECT struct(1, 2, 3) struct<struct(1, 2, 3):structcol1:int,col2:int,col3:int>
org.apache.spark.sql.catalyst.expressions.Csc csc SELECT csc(1) struct<CSC(1):double>
org.apache.spark.sql.catalyst.expressions.CsvToStructs from_csv SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') struct<from_csv(1, 0.8):struct<a:int,b:double>>
org.apache.spark.sql.catalyst.expressions.CumeDist cume_dist SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder curdate SELECT curdate() struct<current_date():date>
org.apache.spark.sql.catalyst.expressions.CurrentCatalog current_catalog SELECT current_catalog() struct<current_catalog():string>
org.apache.spark.sql.catalyst.expressions.CurrentDatabase current_database SELECT current_database() struct<current_schema():string>
org.apache.spark.sql.catalyst.expressions.CurrentDatabase current_schema SELECT current_schema() struct<current_schema():string>
org.apache.spark.sql.catalyst.expressions.CurrentDate current_date SELECT current_date() struct<current_date():date>
org.apache.spark.sql.catalyst.expressions.CurrentTimeZone current_timezone SELECT current_timezone() struct<current_timezone():string>
org.apache.spark.sql.catalyst.expressions.CurrentTimestamp current_timestamp SELECT current_timestamp() struct<current_timestamp():timestamp>
org.apache.spark.sql.catalyst.expressions.CurrentUser current_user SELECT current_user() struct<current_user():string>
org.apache.spark.sql.catalyst.expressions.CurrentUser session_user SELECT session_user() struct<session_user():string>
org.apache.spark.sql.catalyst.expressions.CurrentUser user SELECT user() struct<user():string>
org.apache.spark.sql.catalyst.expressions.DateAdd date_add SELECT date_add('2016-07-30', 1) struct<date_add(2016-07-30, 1):date>
org.apache.spark.sql.catalyst.expressions.DateAdd dateadd SELECT dateadd('2016-07-30', 1) struct<date_add(2016-07-30, 1):date>
org.apache.spark.sql.catalyst.expressions.DateDiff date_diff SELECT date_diff('2009-07-31', '2009-07-30') struct<date_diff(2009-07-31, 2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DateDiff datediff SELECT datediff('2009-07-31', '2009-07-30') struct<datediff(2009-07-31, 2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DateFormatClass date_format SELECT date_format('2016-04-08', 'y') struct<date_format(2016-04-08, y):string>
org.apache.spark.sql.catalyst.expressions.DateFromUnixDate date_from_unix_date SELECT date_from_unix_date(1) struct<date_from_unix_date(1):date>
org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder date_part SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int>
org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder datepart SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') struct<datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int>
org.apache.spark.sql.catalyst.expressions.DateSub date_sub SELECT date_sub('2016-07-30', 1) struct<date_sub(2016-07-30, 1):date>
org.apache.spark.sql.catalyst.expressions.DayName dayname SELECT dayname(DATE('2008-02-20')) struct<dayname(2008-02-20):string>
org.apache.spark.sql.catalyst.expressions.DayOfMonth day SELECT day('2009-07-30') struct<day(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfMonth dayofmonth SELECT dayofmonth('2009-07-30') struct<dayofmonth(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfWeek dayofweek SELECT dayofweek('2009-07-30') struct<dayofweek(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.DayOfYear dayofyear SELECT dayofyear('2016-04-09') struct<dayofyear(2016-04-09):int>
org.apache.spark.sql.catalyst.expressions.Decode decode SELECT decode(encode('abc', 'utf-8'), 'utf-8') struct<decode(encode(abc, utf-8), utf-8):string>
org.apache.spark.sql.catalyst.expressions.DenseRank dense_rank SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,DENSE_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.Divide / SELECT 3 / 2 struct<(3 / 2):double>
org.apache.spark.sql.catalyst.expressions.ElementAt element_at SELECT element_at(array(1, 2, 3), 2) struct<element_at(array(1, 2, 3), 2):int>
org.apache.spark.sql.catalyst.expressions.Elt elt SELECT elt(1, 'scala', 'java') struct<elt(1, scala, java):string>
org.apache.spark.sql.catalyst.expressions.Encode encode SELECT encode('abc', 'utf-8') struct<encode(abc, utf-8):binary>
org.apache.spark.sql.catalyst.expressions.EndsWithExpressionBuilder endswith SELECT endswith('Spark SQL', 'SQL') struct<endswith(Spark SQL, SQL):boolean>
org.apache.spark.sql.catalyst.expressions.EqualNull equal_null SELECT equal_null(3, 3) struct<equal_null(3, 3):boolean>
org.apache.spark.sql.catalyst.expressions.EqualNullSafe <=> SELECT 2 <=> 2 struct<(2 <=> 2):boolean>
org.apache.spark.sql.catalyst.expressions.EqualTo = SELECT 2 = 2 struct<(2 = 2):boolean>
org.apache.spark.sql.catalyst.expressions.EqualTo == SELECT 2 == 2 struct<(2 = 2):boolean>
org.apache.spark.sql.catalyst.expressions.EulerNumber e SELECT e() struct<E():double>
org.apache.spark.sql.catalyst.expressions.Exp exp SELECT exp(0) struct<EXP(0):double>
org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder explode SELECT explode(array(10, 20)) structcol:int
org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder explode_outer SELECT explode_outer(array(10, 20)) structcol:int
org.apache.spark.sql.catalyst.expressions.Expm1 expm1 SELECT expm1(0) struct<EXPM1(0):double>
org.apache.spark.sql.catalyst.expressions.Extract extract SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int>
org.apache.spark.sql.catalyst.expressions.Factorial factorial SELECT factorial(5) struct<factorial(5):bigint>
org.apache.spark.sql.catalyst.expressions.FindInSet find_in_set SELECT find_in_set('ab','abc,b,ab,c,def') struct<find_in_set(ab, abc,b,ab,c,def):int>
org.apache.spark.sql.catalyst.expressions.Flatten flatten SELECT flatten(array(array(1, 2), array(3, 4))) struct<flatten(array(array(1, 2), array(3, 4))):array>
org.apache.spark.sql.catalyst.expressions.FloorExpressionBuilder floor SELECT floor(-0.1) struct<FLOOR(-0.1):decimal(1,0)>
org.apache.spark.sql.catalyst.expressions.FormatNumber format_number SELECT format_number(12332.123456, 4) struct<format_number(12332.123456, 4):string>
org.apache.spark.sql.catalyst.expressions.FormatString format_string SELECT format_string("Hello World %d %s", 100, "days") struct<format_string(Hello World %d %s, 100, days):string>
org.apache.spark.sql.catalyst.expressions.FormatString printf SELECT printf("Hello World %d %s", 100, "days") struct<printf(Hello World %d %s, 100, days):string>
org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp from_utc_timestamp SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') struct<from_utc_timestamp(2016-08-31, Asia/Seoul):timestamp>
org.apache.spark.sql.catalyst.expressions.FromUnixTime from_unixtime SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') struct<from_unixtime(0, yyyy-MM-dd HH:mm:ss):string>
org.apache.spark.sql.catalyst.expressions.Get get SELECT get(array(1, 2, 3), 0) struct<get(array(1, 2, 3), 0):int>
org.apache.spark.sql.catalyst.expressions.GetJsonObject get_json_object SELECT get_json_object('{"a":"b"}', '$.a') struct<get_json_object({"a":"b"}, $.a):string>
org.apache.spark.sql.catalyst.expressions.GreaterThan > SELECT 2 > 1 struct<(2 > 1):boolean>
org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual >= SELECT 2 >= 1 struct<(2 >= 1):boolean>
org.apache.spark.sql.catalyst.expressions.Greatest greatest SELECT greatest(10, 9, 2, 4, 3) struct<greatest(10, 9, 2, 4, 3):int>
org.apache.spark.sql.catalyst.expressions.Grouping grouping SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) structname:string,grouping(name):tinyint,sum(age):bigint
org.apache.spark.sql.catalyst.expressions.GroupingID grouping_id SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) structname:string,grouping_id():bigint,sum(age):bigint,avg(height):double
org.apache.spark.sql.catalyst.expressions.Hex hex SELECT hex(17) struct<hex(17):string>
org.apache.spark.sql.catalyst.expressions.HllSketchEstimate hll_sketch_estimate SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) struct<hll_sketch_estimate(hll_sketch_agg(col, 12)):bigint>
org.apache.spark.sql.catalyst.expressions.HllUnion hll_union SELECT hll_sketch_estimate(hll_union(hll_sketch_agg(col1), hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) struct<hll_sketch_estimate(hll_union(hll_sketch_agg(col1, 12), hll_sketch_agg(col2, 12), false)):bigint>
org.apache.spark.sql.catalyst.expressions.HourExpressionBuilder hour SELECT hour('2018-02-14 12:58:59') struct<hour(2018-02-14 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.Hypot hypot SELECT hypot(3, 4) struct<HYPOT(3, 4):double>
org.apache.spark.sql.catalyst.expressions.ILike ilike SELECT ilike('Spark', '_Park') struct<ilike(Spark, _Park):boolean>
org.apache.spark.sql.catalyst.expressions.If if SELECT if(1 < 2, 'a', 'b') struct<(IF((1 < 2), a, b)):string>
org.apache.spark.sql.catalyst.expressions.In in SELECT 1 in(1, 2, 3) struct<(1 IN (1, 2, 3)):boolean>
org.apache.spark.sql.catalyst.expressions.InitCap initcap SELECT initcap('sPark sql') struct<initcap(sPark sql):string>
org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder inline SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) structcol1:int,col2:string
org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder inline_outer SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) structcol1:int,col2:string
org.apache.spark.sql.catalyst.expressions.InputFileBlockLength input_file_block_length SELECT input_file_block_length() struct<input_file_block_length():bigint>
org.apache.spark.sql.catalyst.expressions.InputFileBlockStart input_file_block_start SELECT input_file_block_start() struct<input_file_block_start():bigint>
org.apache.spark.sql.catalyst.expressions.InputFileName input_file_name SELECT input_file_name() struct<input_file_name():string>
org.apache.spark.sql.catalyst.expressions.IntegralDivide div SELECT 3 div 2 struct<(3 div 2):bigint>
org.apache.spark.sql.catalyst.expressions.IsNaN isnan SELECT isnan(cast('NaN' as double)) struct<isnan(CAST(NaN AS DOUBLE)):boolean>
org.apache.spark.sql.catalyst.expressions.IsNotNull isnotnull SELECT isnotnull(1) struct<(1 IS NOT NULL):boolean>
org.apache.spark.sql.catalyst.expressions.IsNull isnull SELECT isnull(1) struct<(1 IS NULL):boolean>
org.apache.spark.sql.catalyst.expressions.IsValidUTF8 is_valid_utf8 SELECT is_valid_utf8('Spark') struct<is_valid_utf8(Spark):boolean>
org.apache.spark.sql.catalyst.expressions.JsonObjectKeys json_object_keys SELECT json_object_keys('{}') struct<json_object_keys({}):array>
org.apache.spark.sql.catalyst.expressions.JsonToStructs from_json SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>>
org.apache.spark.sql.catalyst.expressions.JsonTuple json_tuple SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') structc0:string,c1:string
org.apache.spark.sql.catalyst.expressions.LPadExpressionBuilder lpad SELECT lpad('hi', 5, '??') struct<lpad(hi, 5, ??):string>
org.apache.spark.sql.catalyst.expressions.Lag lag SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,lag(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int>
org.apache.spark.sql.catalyst.expressions.LastDay last_day SELECT last_day('2009-01-12') struct<last_day(2009-01-12):date>
org.apache.spark.sql.catalyst.expressions.Lead lead SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,lead(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int>
org.apache.spark.sql.catalyst.expressions.Least least SELECT least(10, 9, 2, 4, 3) struct<least(10, 9, 2, 4, 3):int>
org.apache.spark.sql.catalyst.expressions.Left left SELECT left('Spark SQL', 3) struct<left(Spark SQL, 3):string>
org.apache.spark.sql.catalyst.expressions.Length char_length SELECT char_length('Spark SQL ') struct<char_length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.Length character_length SELECT character_length('Spark SQL ') struct<character_length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.Length len SELECT len('Spark SQL ') struct<len(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.Length length SELECT length('Spark SQL ') struct<length(Spark SQL ):int>
org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray json_array_length SELECT json_array_length('[1,2,3,4]') struct<json_array_length([1,2,3,4]):int>
org.apache.spark.sql.catalyst.expressions.LessThan < SELECT 1 < 2 struct<(1 < 2):boolean>
org.apache.spark.sql.catalyst.expressions.LessThanOrEqual <= SELECT 2 <= 2 struct<(2 <= 2):boolean>
org.apache.spark.sql.catalyst.expressions.Levenshtein levenshtein SELECT levenshtein('kitten', 'sitting') struct<levenshtein(kitten, sitting):int>
org.apache.spark.sql.catalyst.expressions.Like like SELECT like('Spark', '_park') struct
org.apache.spark.sql.catalyst.expressions.LocalTimestamp localtimestamp SELECT localtimestamp() struct<localtimestamp():timestamp_ntz>
org.apache.spark.sql.catalyst.expressions.Log ln SELECT ln(1) struct<ln(1):double>
org.apache.spark.sql.catalyst.expressions.Log10 log10 SELECT log10(10) struct<LOG10(10):double>
org.apache.spark.sql.catalyst.expressions.Log1p log1p SELECT log1p(0) struct<LOG1P(0):double>
org.apache.spark.sql.catalyst.expressions.Log2 log2 SELECT log2(2) struct<LOG2(2):double>
org.apache.spark.sql.catalyst.expressions.Logarithm log SELECT log(10, 100) struct<LOG(10, 100):double>
org.apache.spark.sql.catalyst.expressions.Lower lcase SELECT lcase('SparkSql') struct<lcase(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Lower lower SELECT lower('SparkSql') struct<lower(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Luhncheck luhn_check SELECT luhn_check('8112189876') struct<luhn_check(8112189876):boolean>
org.apache.spark.sql.catalyst.expressions.MakeDTInterval make_dt_interval SELECT make_dt_interval(1, 12, 30, 01.001001) struct<make_dt_interval(1, 12, 30, 1.001001):interval day to second>
org.apache.spark.sql.catalyst.expressions.MakeDate make_date SELECT make_date(2013, 7, 15) struct<make_date(2013, 7, 15):date>
org.apache.spark.sql.catalyst.expressions.MakeInterval make_interval SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval>
org.apache.spark.sql.catalyst.expressions.MakeTimestamp make_timestamp SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp>
org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder make_timestamp_ltz SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp>
org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder make_timestamp_ntz SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz>
org.apache.spark.sql.catalyst.expressions.MakeValidUTF8 make_valid_utf8 SELECT make_valid_utf8('Spark') struct<make_valid_utf8(Spark):string>
org.apache.spark.sql.catalyst.expressions.MakeYMInterval make_ym_interval SELECT make_ym_interval(1, 2) struct<make_ym_interval(1, 2):interval year to month>
org.apache.spark.sql.catalyst.expressions.MapConcat map_concat SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>>
org.apache.spark.sql.catalyst.expressions.MapContainsKey map_contains_key SELECT map_contains_key(map(1, 'a', 2, 'b'), 1) struct<map_contains_key(map(1, a, 2, b), 1):boolean>
org.apache.spark.sql.catalyst.expressions.MapEntries map_entries SELECT map_entries(map(1, 'a', 2, 'b')) struct<map_entries(map(1, a, 2, b)):array<structkey:int,value:string>>
org.apache.spark.sql.catalyst.expressions.MapFilter map_filter SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) struct<map_filter(map(1, 0, 2, 2, 3, -1), lambdafunction((namedlambdavariable() > namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.MapFromArrays map_from_arrays SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) struct<map_from_arrays(array(1.0, 3.0), array(2, 4)):map<decimal(2,1),string>>
org.apache.spark.sql.catalyst.expressions.MapFromEntries map_from_entries SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) struct<map_from_entries(array(struct(1, a), struct(2, b))):map<int,string>>
org.apache.spark.sql.catalyst.expressions.MapKeys map_keys SELECT map_keys(map(1, 'a', 2, 'b')) struct<map_keys(map(1, a, 2, b)):array>
org.apache.spark.sql.catalyst.expressions.MapValues map_values SELECT map_values(map(1, 'a', 2, 'b')) struct<map_values(map(1, a, 2, b)):array>
org.apache.spark.sql.catalyst.expressions.MapZipWith map_zip_with SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>>
org.apache.spark.sql.catalyst.expressions.MaskExpressionBuilder mask SELECT mask('abcd-EFGH-8765-4321') struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string>
org.apache.spark.sql.catalyst.expressions.Md5 md5 SELECT md5('Spark') struct<md5(Spark):string>
org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp timestamp_micros SELECT timestamp_micros(1230219000123123) struct<timestamp_micros(1230219000123123):timestamp>
org.apache.spark.sql.catalyst.expressions.MillisToTimestamp timestamp_millis SELECT timestamp_millis(1230219000123) struct<timestamp_millis(1230219000123):timestamp>
org.apache.spark.sql.catalyst.expressions.MinuteExpressionBuilder minute SELECT minute('2009-07-30 12:58:59') struct<minute(2009-07-30 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID monotonically_increasing_id SELECT monotonically_increasing_id() struct<monotonically_increasing_id():bigint>
org.apache.spark.sql.catalyst.expressions.Month month SELECT month('2016-07-30') struct<month(2016-07-30):int>
org.apache.spark.sql.catalyst.expressions.MonthName monthname SELECT monthname('2008-02-20') struct<monthname(2008-02-20):string>
org.apache.spark.sql.catalyst.expressions.MonthsBetween months_between SELECT months_between('1997-02-28 10:30:00', '1996-10-30') struct<months_between(1997-02-28 10:30:00, 1996-10-30, true):double>
org.apache.spark.sql.catalyst.expressions.Multiply * SELECT 2 * 3 struct<(2 * 3):int>
org.apache.spark.sql.catalyst.expressions.Murmur3Hash hash SELECT hash('Spark', array(123), 2) struct<hash(Spark, array(123), 2):int>
org.apache.spark.sql.catalyst.expressions.NTile ntile SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,ntile(2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.NaNvl nanvl SELECT nanvl(cast('NaN' as double), 123) struct<nanvl(CAST(NaN AS DOUBLE), 123):double>
org.apache.spark.sql.catalyst.expressions.NextDay next_day SELECT next_day('2015-01-14', 'TU') struct<next_day(2015-01-14, TU):date>
org.apache.spark.sql.catalyst.expressions.Not ! SELECT ! true struct<(NOT true):boolean>
org.apache.spark.sql.catalyst.expressions.Not not SELECT not true struct<(NOT true):boolean>
org.apache.spark.sql.catalyst.expressions.Now now SELECT now() struct<now():timestamp>
org.apache.spark.sql.catalyst.expressions.NthValue nth_value SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.NullIf nullif SELECT nullif(2, 2) struct<nullif(2, 2):int>
org.apache.spark.sql.catalyst.expressions.NullIfZero nullifzero SELECT nullifzero(0) struct<nullifzero(0):int>
org.apache.spark.sql.catalyst.expressions.Nvl ifnull SELECT ifnull(NULL, array('2')) struct<ifnull(NULL, array(2)):array>
org.apache.spark.sql.catalyst.expressions.Nvl nvl SELECT nvl(NULL, array('2')) struct<nvl(NULL, array(2)):array>
org.apache.spark.sql.catalyst.expressions.Nvl2 nvl2 SELECT nvl2(NULL, 2, 1) struct<nvl2(NULL, 2, 1):int>
org.apache.spark.sql.catalyst.expressions.OctetLength octet_length SELECT octet_length('Spark SQL') struct<octet_length(Spark SQL):int>
org.apache.spark.sql.catalyst.expressions.Or or SELECT true or false struct<(true OR false):boolean>
org.apache.spark.sql.catalyst.expressions.Overlay overlay SELECT overlay('Spark SQL' PLACING '_' FROM 6) struct<overlay(Spark SQL, _, 6, -1):string>
org.apache.spark.sql.catalyst.expressions.ParseToDate to_date SELECT to_date('2009-07-30 04:17:52') struct<to_date(2009-07-30 04:17:52):date>
org.apache.spark.sql.catalyst.expressions.ParseToTimestamp to_timestamp SELECT to_timestamp('2016-12-31 00:12:00') struct<to_timestamp(2016-12-31 00:12:00):timestamp>
org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZExpressionBuilder to_timestamp_ltz SELECT to_timestamp_ltz('2016-12-31 00:12:00') struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp>
org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZExpressionBuilder to_timestamp_ntz SELECT to_timestamp_ntz('2016-12-31 00:12:00') struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz>
org.apache.spark.sql.catalyst.expressions.ParseUrl parse_url SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') struct<parse_url(http://spark.apache.org/path?query=1, HOST):string>
org.apache.spark.sql.catalyst.expressions.PercentRank percent_rank SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
org.apache.spark.sql.catalyst.expressions.Pi pi SELECT pi() struct<PI():double>
org.apache.spark.sql.catalyst.expressions.Pmod pmod SELECT pmod(10, 3) struct<pmod(10, 3):int>
org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder posexplode SELECT posexplode(array(10,20)) structpos:int,col:int
org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder posexplode_outer SELECT posexplode_outer(array(10,20)) structpos:int,col:int
org.apache.spark.sql.catalyst.expressions.Pow pow SELECT pow(2, 3) struct<pow(2, 3):double>
org.apache.spark.sql.catalyst.expressions.Pow power SELECT power(2, 3) struct<POWER(2, 3):double>
org.apache.spark.sql.catalyst.expressions.Quarter quarter SELECT quarter('2016-08-31') struct<quarter(2016-08-31):int>
org.apache.spark.sql.catalyst.expressions.Quote quote SELECT quote('Don't') struct<quote(Don't):string>
org.apache.spark.sql.catalyst.expressions.RLike regexp SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RLike regexp_like SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RLike rlike SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\Users.*') struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean>
org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder rpad SELECT rpad('hi', 5, '??') struct<rpad(hi, 5, ??):string>
org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder raise_error SELECT raise_error('custom error message') struct<raise_error(USER_RAISED_EXCEPTION, map(errorMessage, custom error message)):void>
org.apache.spark.sql.catalyst.expressions.Rand rand SELECT rand() struct<rand():double>
org.apache.spark.sql.catalyst.expressions.Rand random SELECT random() struct<rand():double>
org.apache.spark.sql.catalyst.expressions.RandStr randstr SELECT randstr(3, 0) AS result structresult:string
org.apache.spark.sql.catalyst.expressions.Randn randn SELECT randn() struct<randn():double>
org.apache.spark.sql.catalyst.expressions.Rank rank SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.RegExpCount regexp_count SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en') struct<regexp_count(Steven Jones and Stephen Smith are the best players, Ste(v|ph)en):int>
org.apache.spark.sql.catalyst.expressions.RegExpExtract regexp_extract SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) struct<regexp_extract(100-200, (\d+)-(\d+), 1):string>
org.apache.spark.sql.catalyst.expressions.RegExpExtractAll regexp_extract_all SELECT regexp_extract_all('100-200, 300-400', '(\d+)-(\d+)', 1) struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array>
org.apache.spark.sql.catalyst.expressions.RegExpInStr regexp_instr SELECT regexp_instr(r"\abc", r"^\abc$") struct<regexp_instr(\abc, ^\abc$, 0):int>
org.apache.spark.sql.catalyst.expressions.RegExpReplace regexp_replace SELECT regexp_replace('100-200', '(\d+)', 'num') struct<regexp_replace(100-200, (\d+), num, 1):string>
org.apache.spark.sql.catalyst.expressions.RegExpSubStr regexp_substr SELECT regexp_substr('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en') struct<regexp_substr(Steven Jones and Stephen Smith are the best players, Ste(v|ph)en):string>
org.apache.spark.sql.catalyst.expressions.Remainder % SELECT 2 % 1.8 struct<(2 % 1.8):decimal(2,1)>
org.apache.spark.sql.catalyst.expressions.Remainder mod SELECT 2 % 1.8 struct<(2 % 1.8):decimal(2,1)>
org.apache.spark.sql.catalyst.expressions.Reverse reverse SELECT reverse('Spark SQL') struct<reverse(Spark SQL):string>
org.apache.spark.sql.catalyst.expressions.Right right SELECT right('Spark SQL', 3) struct<right(Spark SQL, 3):string>
org.apache.spark.sql.catalyst.expressions.Rint rint SELECT rint(12.3456) struct<rint(12.3456):double>
org.apache.spark.sql.catalyst.expressions.Round round SELECT round(2.5, 0) struct<round(2.5, 0):decimal(2,0)>
org.apache.spark.sql.catalyst.expressions.RowNumber row_number SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) struct<a:string,b:int,row_number() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
org.apache.spark.sql.catalyst.expressions.SchemaOfCsv schema_of_csv SELECT schema_of_csv('1,abc') struct<schema_of_csv(1,abc):string>
org.apache.spark.sql.catalyst.expressions.SchemaOfJson schema_of_json SELECT schema_of_json('[{"col":0}]') struct<schema_of_json([{"col":0}]):string>
org.apache.spark.sql.catalyst.expressions.SchemaOfXml schema_of_xml SELECT schema_of_xml('

1

')
struct<schema_of_xml(

1

):string>
org.apache.spark.sql.catalyst.expressions.Sec sec SELECT sec(0) struct<SEC(0):double>
org.apache.spark.sql.catalyst.expressions.SecondExpressionBuilder second SELECT second('2018-02-14 12:58:59') struct<second(2018-02-14 12:58:59):int>
org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp timestamp_seconds SELECT timestamp_seconds(1230219000) struct<timestamp_seconds(1230219000):timestamp>
org.apache.spark.sql.catalyst.expressions.Sentences sentences SELECT sentences('Hi there! Good morning.') struct<sentences(Hi there! Good morning., , ):array<array>>
org.apache.spark.sql.catalyst.expressions.Sequence sequence SELECT sequence(1, 5) struct<sequence(1, 5):array>
org.apache.spark.sql.catalyst.expressions.SessionWindow session_window SELECT a, session_window.start, session_window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:10:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, session_window(b, '5 minutes') ORDER BY a, start struct<a:string,start:timestamp,end:timestamp,cnt:bigint>
org.apache.spark.sql.catalyst.expressions.Sha1 sha SELECT sha('Spark') struct<sha(Spark):string>
org.apache.spark.sql.catalyst.expressions.Sha1 sha1 SELECT sha1('Spark') struct<sha1(Spark):string>
org.apache.spark.sql.catalyst.expressions.Sha2 sha2 SELECT sha2('Spark', 256) struct<sha2(Spark, 256):string>
org.apache.spark.sql.catalyst.expressions.ShiftLeft << SELECT shiftleft(2, 1) struct<shiftleft(2, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftLeft shiftleft SELECT shiftleft(2, 1) struct<shiftleft(2, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRight >> SELECT shiftright(4, 1) struct<shiftright(4, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRight shiftright SELECT shiftright(4, 1) struct<shiftright(4, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned >>> SELECT shiftrightunsigned(4, 1) struct<shiftrightunsigned(4, 1):int>
org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned shiftrightunsigned SELECT shiftrightunsigned(4, 1) struct<shiftrightunsigned(4, 1):int>
org.apache.spark.sql.catalyst.expressions.Shuffle shuffle SELECT shuffle(array(1, 20, 3, 5)) struct<shuffle(array(1, 20, 3, 5)):array>
org.apache.spark.sql.catalyst.expressions.Signum sign SELECT sign(40) struct<sign(40):double>
org.apache.spark.sql.catalyst.expressions.Signum signum SELECT signum(40) struct<SIGNUM(40):double>
org.apache.spark.sql.catalyst.expressions.Sin sin SELECT sin(0) struct<SIN(0):double>
org.apache.spark.sql.catalyst.expressions.Sinh sinh SELECT sinh(0) struct<SINH(0):double>
org.apache.spark.sql.catalyst.expressions.Size cardinality SELECT cardinality(array('b', 'd', 'c', 'a')) struct<cardinality(array(b, d, c, a)):int>
org.apache.spark.sql.catalyst.expressions.Size size SELECT size(array('b', 'd', 'c', 'a')) struct<size(array(b, d, c, a)):int>
org.apache.spark.sql.catalyst.expressions.Slice slice SELECT slice(array(1, 2, 3, 4), 2, 2) struct<slice(array(1, 2, 3, 4), 2, 2):array>
org.apache.spark.sql.catalyst.expressions.SortArray sort_array SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) struct<sort_array(array(b, d, NULL, c, a), true):array>
org.apache.spark.sql.catalyst.expressions.SoundEx soundex SELECT soundex('Miller') struct<soundex(Miller):string>
org.apache.spark.sql.catalyst.expressions.SparkPartitionID spark_partition_id SELECT spark_partition_id() struct<SPARK_PARTITION_ID():int>
org.apache.spark.sql.catalyst.expressions.SparkVersion version SELECT version() struct<version():string>
org.apache.spark.sql.catalyst.expressions.SplitPart split_part SELECT split_part('11.12.13', '.', 3) struct<split_part(11.12.13, ., 3):string>
org.apache.spark.sql.catalyst.expressions.Sqrt sqrt SELECT sqrt(4) struct<SQRT(4):double>
org.apache.spark.sql.catalyst.expressions.Stack stack SELECT stack(2, 1, 2, 3) structcol0:int,col1:int
org.apache.spark.sql.catalyst.expressions.StartsWithExpressionBuilder startswith SELECT startswith('Spark SQL', 'Spark') struct<startswith(Spark SQL, Spark):boolean>
org.apache.spark.sql.catalyst.expressions.StringInstr instr SELECT instr('SparkSQL', 'SQL') struct<instr(SparkSQL, SQL):int>
org.apache.spark.sql.catalyst.expressions.StringLocate locate SELECT locate('bar', 'foobarbar') struct<locate(bar, foobarbar, 1):int>
org.apache.spark.sql.catalyst.expressions.StringLocate position SELECT position('bar', 'foobarbar') struct<position(bar, foobarbar, 1):int>
org.apache.spark.sql.catalyst.expressions.StringRepeat repeat SELECT repeat('123', 2) struct<repeat(123, 2):string>
org.apache.spark.sql.catalyst.expressions.StringReplace replace SELECT replace('ABCabc', 'abc', 'DEF') struct<replace(ABCabc, abc, DEF):string>
org.apache.spark.sql.catalyst.expressions.StringSpace space SELECT concat(space(2), '1') struct<concat(space(2), 1):string>
org.apache.spark.sql.catalyst.expressions.StringSplit split SELECT split('oneAtwoBthreeC', '[ABC]') struct<split(oneAtwoBthreeC, [ABC], -1):array>
org.apache.spark.sql.catalyst.expressions.StringToMap str_to_map SELECT str_to_map('a:1,b:2,c:3', ',', ':') struct<str_to_map(a:1,b:2,c:3, ,, :):map<string,string>>
org.apache.spark.sql.catalyst.expressions.StringTranslate translate SELECT translate('AaBbCc', 'abc', '123') struct<translate(AaBbCc, abc, 123):string>
org.apache.spark.sql.catalyst.expressions.StringTrim trim SELECT trim(' SparkSQL ') struct<trim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimBoth btrim SELECT btrim(' SparkSQL ') struct<btrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimLeft ltrim SELECT ltrim(' SparkSQL ') struct<ltrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StringTrimRight rtrim SELECT rtrim(' SparkSQL ') struct<rtrim( SparkSQL ):string>
org.apache.spark.sql.catalyst.expressions.StructsToCsv to_csv SELECT to_csv(named_struct('a', 1, 'b', 2)) struct<to_csv(named_struct(a, 1, b, 2)):string>
org.apache.spark.sql.catalyst.expressions.StructsToJson to_json SELECT to_json(named_struct('a', 1, 'b', 2)) struct<to_json(named_struct(a, 1, b, 2)):string>
org.apache.spark.sql.catalyst.expressions.StructsToXml to_xml SELECT to_xml(named_struct('a', 1, 'b', 2)) struct<to_xml(named_struct(a, 1, b, 2)):string>
org.apache.spark.sql.catalyst.expressions.Substring substr SELECT substr('Spark SQL', 5) struct<substr(Spark SQL, 5, 2147483647):string>
org.apache.spark.sql.catalyst.expressions.Substring substring SELECT substring('Spark SQL', 5) struct<substring(Spark SQL, 5, 2147483647):string>
org.apache.spark.sql.catalyst.expressions.SubstringIndex substring_index SELECT substring_index('www.apache.org', '.', 2) struct<substring_index(www.apache.org, ., 2):string>
org.apache.spark.sql.catalyst.expressions.Subtract - SELECT 2 - 1 struct<(2 - 1):int>
org.apache.spark.sql.catalyst.expressions.Tan tan SELECT tan(0) struct<TAN(0):double>
org.apache.spark.sql.catalyst.expressions.Tanh tanh SELECT tanh(0) struct<TANH(0):double>
org.apache.spark.sql.catalyst.expressions.TimeExpressionBuilder time SELECT time('12:25:13.45') struct<time(12:25:13.45):time(6)>
org.apache.spark.sql.catalyst.expressions.TimeWindow window SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start struct<a:string,start:timestamp,end:timestamp,cnt:bigint>
org.apache.spark.sql.catalyst.expressions.ToBinary to_binary SELECT to_binary('abc', 'utf-8') struct<to_binary(abc, utf-8):binary>
org.apache.spark.sql.catalyst.expressions.ToCharacterBuilder to_char SELECT to_char(454, '999') struct<to_char(454, 999):string>
org.apache.spark.sql.catalyst.expressions.ToCharacterBuilder to_varchar SELECT to_varchar(454, '999') struct<to_char(454, 999):string>
org.apache.spark.sql.catalyst.expressions.ToDegrees degrees SELECT degrees(3.141592653589793) struct<DEGREES(3.141592653589793):double>
org.apache.spark.sql.catalyst.expressions.ToNumber to_number SELECT to_number('454', '999') struct<to_number(454, 999):decimal(3,0)>
org.apache.spark.sql.catalyst.expressions.ToRadians radians SELECT radians(180) struct<RADIANS(180):double>
org.apache.spark.sql.catalyst.expressions.ToTime to_time SELECT to_time('00:12:00') struct<to_time(00:12:00):time(6)>
org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp to_utc_timestamp SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp>
org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp to_unix_timestamp SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint>
org.apache.spark.sql.catalyst.expressions.TransformKeys transform_keys SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.TransformValues transform_values SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) struct<transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>>
org.apache.spark.sql.catalyst.expressions.TruncDate trunc SELECT trunc('2019-08-04', 'week') struct<trunc(2019-08-04, week):date>
org.apache.spark.sql.catalyst.expressions.TruncTimestamp date_trunc SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp>
org.apache.spark.sql.catalyst.expressions.TryAdd try_add SELECT try_add(1, 2) struct<try_add(1, 2):int>
org.apache.spark.sql.catalyst.expressions.TryAesDecrypt try_aes_decrypt SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT, ):binary>
org.apache.spark.sql.catalyst.expressions.TryDivide try_divide SELECT try_divide(3, 2) struct<try_divide(3, 2):double>
org.apache.spark.sql.catalyst.expressions.TryElementAt try_element_at SELECT try_element_at(array(1, 2, 3), 2) struct<try_element_at(array(1, 2, 3), 2):int>
org.apache.spark.sql.catalyst.expressions.TryMakeInterval try_make_interval SELECT try_make_interval(100, 11, 1, 1, 12, 30, 01.001001) struct<try_make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval>
org.apache.spark.sql.catalyst.expressions.TryMakeTimestamp try_make_timestamp SELECT try_make_timestamp(2014, 12, 28, 6, 30, 45.887) struct<try_make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp>
org.apache.spark.sql.catalyst.expressions.TryMakeTimestampLTZExpressionBuilder try_make_timestamp_ltz SELECT try_make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) struct<try_make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp>
org.apache.spark.sql.catalyst.expressions.TryMakeTimestampNTZExpressionBuilder try_make_timestamp_ntz SELECT try_make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) struct<try_make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz>
org.apache.spark.sql.catalyst.expressions.TryMod try_mod SELECT try_mod(3, 2) struct<try_mod(3, 2):int>
org.apache.spark.sql.catalyst.expressions.TryMultiply try_multiply SELECT try_multiply(2, 3) struct<try_multiply(2, 3):int>
org.apache.spark.sql.catalyst.expressions.TryParseUrl try_parse_url SELECT try_parse_url('http://spark.apache.org/path?query=1', 'HOST') struct<try_parse_url(http://spark.apache.org/path?query=1, HOST):string>
org.apache.spark.sql.catalyst.expressions.TryReflect try_reflect SELECT try_reflect('java.util.UUID', 'randomUUID') struct<try_reflect(java.util.UUID, randomUUID):string>
org.apache.spark.sql.catalyst.expressions.TrySubtract try_subtract SELECT try_subtract(2, 1) struct<try_subtract(2, 1):int>
org.apache.spark.sql.catalyst.expressions.TryTimeExpressionBuilder try_time SELECT try_time('12:25:13.45') struct<time(12:25:13.45):time(6)>
org.apache.spark.sql.catalyst.expressions.TryToBinary try_to_binary SELECT try_to_binary('abc', 'utf-8') struct<try_to_binary(abc, utf-8):binary>
org.apache.spark.sql.catalyst.expressions.TryToNumber try_to_number SELECT try_to_number('454', '999') struct<try_to_number(454, 999):decimal(3,0)>
org.apache.spark.sql.catalyst.expressions.TryToTimeExpressionBuilder try_to_time SELECT try_to_time('00:12:00.001') struct<try_to_time(to_time(00:12:00.001)):time(6)>
org.apache.spark.sql.catalyst.expressions.TryToTimestampExpressionBuilder try_to_timestamp SELECT try_to_timestamp('2016-12-31 00:12:00') struct<try_to_timestamp(2016-12-31 00:12:00):timestamp>
org.apache.spark.sql.catalyst.expressions.TryUrlDecode try_url_decode SELECT try_url_decode('https%3A%2F%2Fspark.apache.org') struct<try_url_decode(https%3A%2F%2Fspark.apache.org):string>
org.apache.spark.sql.catalyst.expressions.TryValidateUTF8 try_validate_utf8 SELECT try_validate_utf8('Spark') struct<try_validate_utf8(Spark):string>
org.apache.spark.sql.catalyst.expressions.TypeOf typeof SELECT typeof(1) struct<typeof(1):string>
org.apache.spark.sql.catalyst.expressions.UnBase64 unbase64 SELECT unbase64('U3BhcmsgU1FM') struct<unbase64(U3BhcmsgU1FM):binary>
org.apache.spark.sql.catalyst.expressions.UnaryMinus negative SELECT negative(1) struct<negative(1):int>
org.apache.spark.sql.catalyst.expressions.UnaryPositive positive SELECT positive(1) struct<(+ 1):int>
org.apache.spark.sql.catalyst.expressions.Unhex unhex SELECT decode(unhex('537061726B2053514C'), 'UTF-8') struct<decode(unhex(537061726B2053514C), UTF-8):string>
org.apache.spark.sql.catalyst.expressions.Uniform uniform SELECT uniform(10, 20, 0) > 0 AS result structresult:boolean
org.apache.spark.sql.catalyst.expressions.UnixDate unix_date SELECT unix_date(DATE("1970-01-02")) struct<unix_date(1970-01-02):int>
org.apache.spark.sql.catalyst.expressions.UnixMicros unix_micros SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_micros(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixMillis unix_millis SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_millis(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixSeconds unix_seconds SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) struct<unix_seconds(1970-01-01 00:00:01Z):bigint>
org.apache.spark.sql.catalyst.expressions.UnixTimestamp unix_timestamp SELECT unix_timestamp() struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint>
org.apache.spark.sql.catalyst.expressions.Upper ucase SELECT ucase('SparkSql') struct<ucase(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.Upper upper SELECT upper('SparkSql') struct<upper(SparkSql):string>
org.apache.spark.sql.catalyst.expressions.UrlDecode url_decode SELECT url_decode('https%3A%2F%2Fspark.apache.org') struct<url_decode(https%3A%2F%2Fspark.apache.org):string>
org.apache.spark.sql.catalyst.expressions.UrlEncode url_encode SELECT url_encode('https://spark.apache.org') struct<url_encode(https://spark.apache.org):string>
org.apache.spark.sql.catalyst.expressions.Uuid uuid SELECT uuid() struct<uuid():string>
org.apache.spark.sql.catalyst.expressions.ValidateUTF8 validate_utf8 SELECT validate_utf8('Spark') struct<validate_utf8(Spark):string>
org.apache.spark.sql.catalyst.expressions.WeekDay weekday SELECT weekday('2009-07-30') struct<weekday(2009-07-30):int>
org.apache.spark.sql.catalyst.expressions.WeekOfYear weekofyear SELECT weekofyear('2008-02-20') struct<weekofyear(2008-02-20):int>
org.apache.spark.sql.catalyst.expressions.WidthBucket width_bucket SELECT width_bucket(5.3, 0.2, 10.6, 5) struct<width_bucket(5.3, 0.2, 10.6, 5):bigint>
org.apache.spark.sql.catalyst.expressions.WindowTime window_time SELECT a, window.start as start, window.end as end, window_time(window), cnt FROM (SELECT a, window, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, window.start) struct<a:string,start:timestamp,end:timestamp,window_time(window):timestamp,cnt:bigint>
org.apache.spark.sql.catalyst.expressions.XmlToStructs from_xml SELECT from_xml('

10.8

', 'a INT, b DOUBLE')
struct<from_xml(

10.8

):struct<a:int,b:double>>
org.apache.spark.sql.catalyst.expressions.XxHash64 xxhash64 SELECT xxhash64('Spark', array(123), 2) struct<xxhash64(Spark, array(123), 2):bigint>
org.apache.spark.sql.catalyst.expressions.Year year SELECT year('2016-07-30') struct<year(2016-07-30):int>
org.apache.spark.sql.catalyst.expressions.ZeroIfNull zeroifnull SELECT zeroifnull(NULL) struct<zeroifnull(NULL):int>
org.apache.spark.sql.catalyst.expressions.ZipWith zip_with SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>>
org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue any_value SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) struct<any_value(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile approx_percentile SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array>
org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile percentile_approx SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array>
org.apache.spark.sql.catalyst.expressions.aggregate.Average avg SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) struct<avg(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Average mean SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) struct<mean(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg bit_and SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) struct<bit_and(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg bit_or SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) struct<bit_or(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg bit_xor SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) struct<bit_xor(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd bool_and SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) struct<bool_and(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd every SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) struct<every(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr any SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) struct<any(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr bool_or SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) struct<bool_or(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr some SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) struct<some(col):boolean>
org.apache.spark.sql.catalyst.expressions.aggregate.CollectList array_agg SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) struct<collect_list(col):array>
org.apache.spark.sql.catalyst.expressions.aggregate.CollectList collect_list SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) struct<collect_list(col):array>
org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet collect_set SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) struct<collect_set(col):array>
org.apache.spark.sql.catalyst.expressions.aggregate.Corr corr SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) struct<corr(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Count count SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) struct<count(1):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.CountIf count_if SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) struct<count_if(((col % 2) = 0)):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAggExpressionBuilder count_min_sketch SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string>
org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation covar_pop SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) struct<covar_pop(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.CovSample covar_samp SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) struct<covar_samp(c1, c2):double>
org.apache.spark.sql.catalyst.expressions.aggregate.First first SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) struct<first(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.First first_value SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) struct<first_value(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.HistogramNumeric histogram_numeric SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col) struct<histogram_numeric(col, 5):array<struct<x:int,y:double>>>
org.apache.spark.sql.catalyst.expressions.aggregate.HllSketchAgg hll_sketch_agg SELECT hll_sketch_estimate(hll_sketch_agg(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col) struct<hll_sketch_estimate(hll_sketch_agg(col, 12)):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.HllUnionAgg hll_union_agg SELECT hll_sketch_estimate(hll_union_agg(sketch, true)) FROM (SELECT hll_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT hll_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col)) struct<hll_sketch_estimate(hll_union_agg(sketch, true)):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus approx_count_distinct SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) struct<approx_count_distinct(col1):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis kurtosis SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) struct<kurtosis(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Last last SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) struct<last(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.Last last_value SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) struct<last_value(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.ListAgg listagg SELECT listagg(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col) struct<listagg(col, NULL):string>
org.apache.spark.sql.catalyst.expressions.aggregate.ListAgg string_agg SELECT string_agg(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col) struct<string_agg(col, NULL):string>
org.apache.spark.sql.catalyst.expressions.aggregate.Max max SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) struct<max(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy max_by SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y) struct<max_by(x, y):string>
org.apache.spark.sql.catalyst.expressions.aggregate.Median median SELECT median(col) FROM VALUES (0), (10) AS tab(col) struct<median(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Min min SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) struct<min(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.MinBy min_by SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y) struct<min_by(x, y):string>
org.apache.spark.sql.catalyst.expressions.aggregate.ModeBuilder mode SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col) struct<mode(col):int>
org.apache.spark.sql.catalyst.expressions.aggregate.Percentile percentile SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) struct<percentile(col, 0.3, 1):double>
org.apache.spark.sql.catalyst.expressions.aggregate.PercentileContBuilder percentile_cont SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (0), (10) AS tab(col) struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.PercentileDiscBuilder percentile_disc SELECT percentile_disc(0.25) WITHIN GROUP (ORDER BY col) FROM VALUES (0), (10) AS tab(col) struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgX regr_avgx SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_avgx(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgY regr_avgy SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_avgy(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrCount regr_count SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_count(y, x):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrIntercept regr_intercept SELECT regr_intercept(y, x) FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x) struct<regr_intercept(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrR2 regr_r2 SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_r2(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXX regr_sxx SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_sxx(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXY regr_sxy SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_sxy(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrSYY regr_syy SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) struct<regr_syy(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.RegrSlope regr_slope SELECT regr_slope(y, x) FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x) struct<regr_slope(y, x):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Skewness skewness SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) struct<skewness(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop stddev_pop SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev_pop(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp std SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) struct<std(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp stddev SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp stddev_samp SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) struct<stddev_samp(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.Sum sum SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) struct<sum(col):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.TryAverageExpressionBuilder try_avg SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) struct<try_avg(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.TrySumExpressionBuilder try_sum SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) struct<try_sum(col):bigint>
org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop var_pop SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) struct<var_pop(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp var_samp SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) struct<var_samp(col):double>
org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp variance SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) struct<variance(col):double>
org.apache.spark.sql.catalyst.expressions.variant.IsVariantNull is_variant_null SELECT is_variant_null(parse_json('null')) struct<is_variant_null(parse_json(null)):boolean>
org.apache.spark.sql.catalyst.expressions.variant.ParseJsonExpressionBuilder parse_json SELECT parse_json('{"a":1,"b":0.8}') struct<parse_json({"a":1,"b":0.8}):variant>
org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariant schema_of_variant SELECT schema_of_variant(parse_json('null')) struct<schema_of_variant(parse_json(null)):string>
org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariantAgg schema_of_variant_agg SELECT schema_of_variant_agg(parse_json(j)) FROM VALUES ('1'), ('2'), ('3') AS tab(j) struct<schema_of_variant_agg(parse_json(j)):string>
org.apache.spark.sql.catalyst.expressions.variant.ToVariantObject to_variant_object SELECT to_variant_object(named_struct('a', 1, 'b', 2)) struct<to_variant_object(named_struct(a, 1, b, 2)):variant>
org.apache.spark.sql.catalyst.expressions.variant.TryParseJsonExpressionBuilder try_parse_json SELECT try_parse_json('{"a":1,"b":0.8}') struct<try_parse_json({"a":1,"b":0.8}):variant>
org.apache.spark.sql.catalyst.expressions.variant.TryVariantGetExpressionBuilder try_variant_get SELECT try_variant_get(parse_json('{"a": 1}'), '$.a', 'int') struct<try_variant_get(parse_json({"a": 1}), $.a):int>
org.apache.spark.sql.catalyst.expressions.variant.VariantGetExpressionBuilder variant_get SELECT variant_get(parse_json('{"a": 1}'), '$.a', 'int') struct<variant_get(parse_json({"a": 1}), $.a):int>
org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean xpath_boolean SELECT xpath_boolean('1','a/b') struct<xpath_boolean(1, a/b):boolean>
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble xpath_double SELECT xpath_double('12', 'sum(a/b)') struct<xpath_double(12, sum(a/b)):double>
org.apache.spark.sql.catalyst.expressions.xml.XPathDouble xpath_number SELECT xpath_number('12', 'sum(a/b)') struct<xpath_number(12, sum(a/b)):double>
org.apache.spark.sql.catalyst.expressions.xml.XPathFloat xpath_float SELECT xpath_float('12', 'sum(a/b)') struct<xpath_float(12, sum(a/b)):float>
org.apache.spark.sql.catalyst.expressions.xml.XPathInt xpath_int SELECT xpath_int('12', 'sum(a/b)') struct<xpath_int(12, sum(a/b)):int>
org.apache.spark.sql.catalyst.expressions.xml.XPathList xpath SELECT xpath('b1b2b3c1c2','a/b/text()') struct<xpath(b1b2b3c1c2, a/b/text()):array>
org.apache.spark.sql.catalyst.expressions.xml.XPathLong xpath_long SELECT xpath_long('12', 'sum(a/b)') struct<xpath_long(12, sum(a/b)):bigint>
org.apache.spark.sql.catalyst.expressions.xml.XPathShort xpath_short SELECT xpath_short('12', 'sum(a/b)') struct<xpath_short(12, sum(a/b)):smallint>
org.apache.spark.sql.catalyst.expressions.xml.XPathString xpath_string SELECT xpath_string('bcc','a/c') struct<xpath_string(bcc, a/c):string>