From b0f043b4ce40df16eabd6895e656ff14b09b2534 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Wed, 18 Sep 2019 16:32:21 -0700 Subject: [PATCH 1/7] Made separate queries to leverage arrow based and hardware acceleration based performance --- benchmark/csharp/Tpch/Tpch.csproj | 11 +--- .../csharp/Tpch/TpchFunctionalQueries.cs | 60 ++++++++++++++++++- .../csharp/Tpch/VectorFunctions.intrinsics.cs | 2 +- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index 499fd7916..75d266228 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -8,7 +8,7 @@ Uncomment the following line to build for netcoreapp3.0. Don't build for netcoreapp3.0, by default, because it requires VS 16.3. --> - + $(TargetFrameworks);netcoreapp3.0 Tpch Tpch @@ -26,16 +26,7 @@ true - - - - - - - - - diff --git a/benchmark/csharp/Tpch/TpchFunctionalQueries.cs b/benchmark/csharp/Tpch/TpchFunctionalQueries.cs index cb53fdabd..3c433b501 100644 --- a/benchmark/csharp/Tpch/TpchFunctionalQueries.cs +++ b/benchmark/csharp/Tpch/TpchFunctionalQueries.cs @@ -60,7 +60,7 @@ internal void Q1() .Show(); } - internal void Q1v() + internal void Q1a() { Func discPrice = VectorUdf( (price, discount) => VectorFunctions.ComputeDiscountPrice(price, discount)); @@ -82,6 +82,28 @@ internal void Q1v() .Show(); } + internal void Q1ha() + { + Func discPrice = VectorUdf( + (price, discount) => VectorFunctionsIntrinsics.ComputeDiscountPrice(price, discount)); + + Func total = VectorUdf( + (price, discount, tax) => VectorFunctionsIntrinsics.ComputeTotal(price, discount, tax)); + + _lineitem.Filter(Col("l_shipdate") <= "1998-09-02") + .GroupBy(Col("l_returnflag"), Col("l_linestatus")) + .Agg(Sum(Col("l_quantity")).As("sum_qty"), Sum(Col("l_extendedprice")).As("sum_base_price"), + Sum(discPrice(Col("l_extendedprice"), Col("l_discount"))).As("sum_disc_price"), + Sum(total(Col("l_extendedprice"), Col("l_discount"), Col("l_tax"))).As("sum_charge"), + Avg(Col("l_quantity")).As("avg_qty"), + Avg(Col("l_extendedprice")).As("avg_price"), + Avg(Col("l_discount")).As("avg_disc"), + Count(Col("l_quantity")).As("count_order") + ) + .Sort(Col("l_returnflag"), Col("l_linestatus")) + .Show(); + } + internal void Q2() { DataFrame europe = _region.Filter(Col("r_name") == "EUROPE") @@ -227,7 +249,7 @@ internal void Q8() .Show(); } - internal void Q8v() + internal void Q8a() { Func getYear = Udf(x => x.Substring(0, 4)); Func discPrice = VectorUdf( @@ -261,6 +283,40 @@ internal void Q8v() .Show(); } + internal void Q8ha() + { + Func getYear = Udf(x => x.Substring(0, 4)); + Func discPrice = VectorUdf( + (price, discount) => VectorFunctionsIntrinsics.ComputeDiscountPrice(price, discount)); + + Func isBrazil = Udf((x, y) => x == "BRAZIL" ? y : 0); + + DataFrame fregion = _region.Filter(Col("r_name") == "AMERICA"); + DataFrame forder = _orders.Filter(Col("o_orderdate") <= "1996-12-31" & Col("o_orderdate") >= "1995-01-01"); + DataFrame fpart = _part.Filter(Col("p_type") == "ECONOMY ANODIZED STEEL"); + + DataFrame nat = _nation.Join(_supplier, Col("n_nationkey") == _supplier["s_nationkey"]); + + DataFrame line = _lineitem.Select(Col("l_partkey"), Col("l_suppkey"), Col("l_orderkey"), + discPrice(Col("l_extendedprice"), Col("l_discount")).As("volume")) + .Join(fpart, Col("l_partkey") == fpart["p_partkey"]) + .Join(nat, Col("l_suppkey") == nat["s_suppkey"]); + + _nation.Join(fregion, Col("n_regionkey") == fregion["r_regionkey"]) + .Select(Col("n_nationkey")) + .Join(_customer, Col("n_nationkey") == _customer["c_nationkey"]) + .Select(Col("c_custkey")) + .Join(forder, Col("c_custkey") == forder["o_custkey"]) + .Select(Col("o_orderkey"), Col("o_orderdate")) + .Join(line, Col("o_orderkey") == line["l_orderkey"]) + .Select(getYear(Col("o_orderdate")).As("o_year"), Col("volume"), + isBrazil(Col("n_name"), Col("volume")).As("case_volume")) + .GroupBy(Col("o_year")) + .Agg((Sum(Col("case_volume")) / Sum("volume")).As("mkt_share")) + .Sort(Col("o_year")) + .Show(); + } + internal void Q9() { Func getYear = Udf(x => x.Substring(0, 4)); diff --git a/benchmark/csharp/Tpch/VectorFunctions.intrinsics.cs b/benchmark/csharp/Tpch/VectorFunctions.intrinsics.cs index e63849a3b..9a709c2ed 100644 --- a/benchmark/csharp/Tpch/VectorFunctions.intrinsics.cs +++ b/benchmark/csharp/Tpch/VectorFunctions.intrinsics.cs @@ -9,7 +9,7 @@ namespace Tpch { - internal static class VectorFunctions + internal static class VectorFunctionsIntrinsics { internal static unsafe DoubleArray ComputeTotal(DoubleArray price, DoubleArray discount, DoubleArray tax) { From e2db3a56e79649436742af4acbb644735aee9079 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Thu, 26 Sep 2019 11:55:21 -0700 Subject: [PATCH 2/7] Refactored code for combining common part of queries 1 and 8 --- .../csharp/Tpch/TpchFunctionalQueries.cs | 79 ++++++------------- 1 file changed, 25 insertions(+), 54 deletions(-) diff --git a/benchmark/csharp/Tpch/TpchFunctionalQueries.cs b/benchmark/csharp/Tpch/TpchFunctionalQueries.cs index 3c433b501..6a965476b 100644 --- a/benchmark/csharp/Tpch/TpchFunctionalQueries.cs +++ b/benchmark/csharp/Tpch/TpchFunctionalQueries.cs @@ -60,14 +60,8 @@ internal void Q1() .Show(); } - internal void Q1a() - { - Func discPrice = VectorUdf( - (price, discount) => VectorFunctions.ComputeDiscountPrice(price, discount)); - - Func total = VectorUdf( - (price, discount, tax) => VectorFunctions.ComputeTotal(price, discount, tax)); - + internal void Q1aCommon(Func discPrice, Func total) + { _lineitem.Filter(Col("l_shipdate") <= "1998-09-02") .GroupBy(Col("l_returnflag"), Col("l_linestatus")) .Agg(Sum(Col("l_quantity")).As("sum_qty"), Sum(Col("l_extendedprice")).As("sum_base_price"), @@ -82,6 +76,17 @@ internal void Q1a() .Show(); } + internal void Q1a() + { + Func discPrice = VectorUdf( + (price, discount) => VectorFunctions.ComputeDiscountPrice(price, discount)); + + Func total = VectorUdf( + (price, discount, tax) => VectorFunctions.ComputeTotal(price, discount, tax)); + + Q1aCommon(discPrice, total); + } + internal void Q1ha() { Func discPrice = VectorUdf( @@ -90,18 +95,7 @@ internal void Q1ha() Func total = VectorUdf( (price, discount, tax) => VectorFunctionsIntrinsics.ComputeTotal(price, discount, tax)); - _lineitem.Filter(Col("l_shipdate") <= "1998-09-02") - .GroupBy(Col("l_returnflag"), Col("l_linestatus")) - .Agg(Sum(Col("l_quantity")).As("sum_qty"), Sum(Col("l_extendedprice")).As("sum_base_price"), - Sum(discPrice(Col("l_extendedprice"), Col("l_discount"))).As("sum_disc_price"), - Sum(total(Col("l_extendedprice"), Col("l_discount"), Col("l_tax"))).As("sum_charge"), - Avg(Col("l_quantity")).As("avg_qty"), - Avg(Col("l_extendedprice")).As("avg_price"), - Avg(Col("l_discount")).As("avg_disc"), - Count(Col("l_quantity")).As("count_order") - ) - .Sort(Col("l_returnflag"), Col("l_linestatus")) - .Show(); + Q1aCommon(discPrice, total); } internal void Q2() @@ -249,12 +243,9 @@ internal void Q8() .Show(); } - internal void Q8a() + internal void Q8aCommon(Func discPrice) { - Func getYear = Udf(x => x.Substring(0, 4)); - Func discPrice = VectorUdf( - (price, discount) => VectorFunctions.ComputeDiscountPrice(price, discount)); - + Func getYear = Udf(x => x.Substring(0, 4)); Func isBrazil = Udf((x, y) => x == "BRAZIL" ? y : 0); DataFrame fregion = _region.Filter(Col("r_name") == "AMERICA"); @@ -283,38 +274,18 @@ internal void Q8a() .Show(); } + internal void Q8a() + { + Func discPrice = VectorUdf( + (price, discount) => VectorFunctions.ComputeDiscountPrice(price, discount)); + Q8aCommon(discPrice); + } + internal void Q8ha() - { - Func getYear = Udf(x => x.Substring(0, 4)); + { Func discPrice = VectorUdf( (price, discount) => VectorFunctionsIntrinsics.ComputeDiscountPrice(price, discount)); - - Func isBrazil = Udf((x, y) => x == "BRAZIL" ? y : 0); - - DataFrame fregion = _region.Filter(Col("r_name") == "AMERICA"); - DataFrame forder = _orders.Filter(Col("o_orderdate") <= "1996-12-31" & Col("o_orderdate") >= "1995-01-01"); - DataFrame fpart = _part.Filter(Col("p_type") == "ECONOMY ANODIZED STEEL"); - - DataFrame nat = _nation.Join(_supplier, Col("n_nationkey") == _supplier["s_nationkey"]); - - DataFrame line = _lineitem.Select(Col("l_partkey"), Col("l_suppkey"), Col("l_orderkey"), - discPrice(Col("l_extendedprice"), Col("l_discount")).As("volume")) - .Join(fpart, Col("l_partkey") == fpart["p_partkey"]) - .Join(nat, Col("l_suppkey") == nat["s_suppkey"]); - - _nation.Join(fregion, Col("n_regionkey") == fregion["r_regionkey"]) - .Select(Col("n_nationkey")) - .Join(_customer, Col("n_nationkey") == _customer["c_nationkey"]) - .Select(Col("c_custkey")) - .Join(forder, Col("c_custkey") == forder["o_custkey"]) - .Select(Col("o_orderkey"), Col("o_orderdate")) - .Join(line, Col("o_orderkey") == line["l_orderkey"]) - .Select(getYear(Col("o_orderdate")).As("o_year"), Col("volume"), - isBrazil(Col("n_name"), Col("volume")).As("case_volume")) - .GroupBy(Col("o_year")) - .Agg((Sum(Col("case_volume")) / Sum("volume")).As("mkt_share")) - .Sort(Col("o_year")) - .Show(); + Q8aCommon(discPrice); } internal void Q9() From efd5cf9d07f9865323eb0cf98783902b1408a585 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Fri, 27 Sep 2019 10:51:10 -0700 Subject: [PATCH 3/7] Updated to comment .NET core 3.0 framework as it is not supported --- benchmark/csharp/Tpch/Tpch.csproj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index 75d266228..e7f1e320d 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -7,8 +7,9 @@ + $(TargetFrameworks);netcoreapp3.0 + --> Tpch Tpch From 183c3a6162a1f19d22f25b8f7b7a36457524fb2b Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Fri, 10 Jan 2020 17:25:04 -0800 Subject: [PATCH 4/7] Latest changes from master for targetting netcoreapp3.1 --- benchmark/csharp/Tpch/Tpch.csproj | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index c28843126..ea2db62e9 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -2,14 +2,8 @@ Exe - net461;netcoreapp2.1 - netcoreapp2.1 - + net461;netcoreapp3.1 + netcoreapp3.1 Tpch Tpch @@ -17,7 +11,7 @@ - + @@ -27,7 +21,7 @@ true - + @@ -38,5 +32,5 @@ - + From d3c512ac03c15f4d990ab16e76e5edb44242be88 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Mon, 13 Jan 2020 15:19:47 -0800 Subject: [PATCH 5/7] Removing net461 --- benchmark/csharp/Tpch/Tpch.csproj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index ea2db62e9..e9b321673 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -2,7 +2,7 @@ Exe - net461;netcoreapp3.1 + netcoreapp3.1 netcoreapp3.1 Tpch Tpch @@ -11,7 +11,6 @@ - From 3b78d2eede2923f947769ef9c7164eda37298531 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Mon, 13 Jan 2020 16:38:31 -0800 Subject: [PATCH 6/7] Testing removing compile groups --- benchmark/csharp/Tpch/Tpch.csproj | 9 --------- 1 file changed, 9 deletions(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index e9b321673..55877c728 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -21,15 +21,6 @@ true - - - - - - - - - From 445b38a0e8c47957055b041d81210dc3c5bc42c4 Mon Sep 17 00:00:00 2001 From: Niharika Dutta Date: Mon, 13 Jan 2020 16:53:18 -0800 Subject: [PATCH 7/7] Adding closing when tag --- benchmark/csharp/Tpch/Tpch.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index 55877c728..815dca6f3 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -20,7 +20,7 @@ true - +