From b9d8f2e08067e9e939b472b4d8cbd4acc0492a0d Mon Sep 17 00:00:00 2001 From: Amogh-Bharadwaj Date: Wed, 9 Apr 2025 23:10:10 +0530 Subject: [PATCH 01/10] add mysql datatype doc --- .../clickpipes/mysql/datatypes.md | 31 +++++++++++++++++++ sidebars.js | 1 + 2 files changed, 32 insertions(+) create mode 100644 docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md new file mode 100644 index 00000000000..02fb3a951a3 --- /dev/null +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -0,0 +1,31 @@ +--- +title: 'ClickPipes for MySQL: Supported data types' +slug: /integrations/clickpipes/mysql/datatypes +description: 'Page describing MySQL ClickPipe datatype mapping from MySQL to ClickHouse' +--- + +Here is the supported data-type mapping for the MySQL ClickPipe: + +| MySQL Type | ClickHouse type | Notes | +| -------------------------------------------------------------------------- | ------------------------------------------ | -------------------------------------------------------------------------------------- | +| Enum | LowCardinality(String) | MySQL doesn't have user-defined types for enums; instead, columns have type `enum('a','b','c')`. | +| Set | String | `set` is like `enum`, except from `set('a','b','c')` value can be `'a,b'`. `set` can only have 64 items, as it's internally a 64-bit bitset. | +| Decimal | Decimal | `numeric` in MySQL is the same as `decimal`, with a maximum of 65 digits. Could use more boundary checking. | +| TinyInt | Int8 | Supports unsigned. | +| SmallInt | Int16 | Supports unsigned. | +| MediumInt, Int | Int32 | Supports unsigned. | +| BigInt | Int64 | Supports unsigned. | +| Year | Int16 | This type is limited; it only supports `0000` and `1900` to `2155`. | +| TinyText, Text, MediumText, LongText | String | | +| TinyBlob, Blob, MediumBlob, LongBlob | String | | +| Char, Varchar | String | | +| Binary, VarBinary | String | | +| TinyInt(1) | Bool | This is a display hint; MySQL has `boolean` aliased to `tinyint(1)`. | +| JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | +| Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from potential precision loss; subtypes need testing. | +| Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | +| Float | Float32 | May lose a bit of precision during initial load due to text protocols. | +| Double | Float64 | May lose a bit of precision during initial load due to text protocols. | +| Date | Date32 | | +| Time | DateTime64(6) | The date portion is Unix epoch. | +| Datetime, Timestamp | DateTime64(6) | | diff --git a/sidebars.js b/sidebars.js index cc0c7b0ae0d..cfd7df97f5a 100644 --- a/sidebars.js +++ b/sidebars.js @@ -718,6 +718,7 @@ const sidebars = { "integrations/data-ingestion/clickpipes/mysql/source/aurora", ], }, + "integrations/data-ingestion/clickpipes/mysql/datatypes" ], }, ], From eb958344d4156278cf11ef261a441783db73da87 Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:29:47 +0530 Subject: [PATCH 02/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index 02fb3a951a3..572a7b68469 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -22,7 +22,7 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | Binary, VarBinary | String | | | TinyInt(1) | Bool | This is a display hint; MySQL has `boolean` aliased to `tinyint(1)`. | | JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | -| Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from potential precision loss; subtypes need testing. | +| Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from small precision loss. | | Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | | Float | Float32 | May lose a bit of precision during initial load due to text protocols. | | Double | Float64 | May lose a bit of precision during initial load due to text protocols. | From 161e912003bea106b5768a8feb841494f04608af Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:30:00 +0530 Subject: [PATCH 03/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index 572a7b68469..ea05dd57781 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -15,7 +15,7 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | SmallInt | Int16 | Supports unsigned. | | MediumInt, Int | Int32 | Supports unsigned. | | BigInt | Int64 | Supports unsigned. | -| Year | Int16 | This type is limited; it only supports `0000` and `1900` to `2155`. | +| Year | Int16 | | | TinyText, Text, MediumText, LongText | String | | | TinyBlob, Blob, MediumBlob, LongBlob | String | | | Char, Varchar | String | | From 466e84e03d7e0c06020c3b231a34115f0813317f Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:30:17 +0530 Subject: [PATCH 04/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index ea05dd57781..5b61a1caa70 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -10,7 +10,7 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | -------------------------------------------------------------------------- | ------------------------------------------ | -------------------------------------------------------------------------------------- | | Enum | LowCardinality(String) | MySQL doesn't have user-defined types for enums; instead, columns have type `enum('a','b','c')`. | | Set | String | `set` is like `enum`, except from `set('a','b','c')` value can be `'a,b'`. `set` can only have 64 items, as it's internally a 64-bit bitset. | -| Decimal | Decimal | `numeric` in MySQL is the same as `decimal`, with a maximum of 65 digits. Could use more boundary checking. | +| Decimal | Decimal | | | TinyInt | Int8 | Supports unsigned. | | SmallInt | Int16 | Supports unsigned. | | MediumInt, Int | Int32 | Supports unsigned. | From af1a463783d9e9ef097ffccf857f40f0cfd434f9 Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:30:25 +0530 Subject: [PATCH 05/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- .../integrations/data-ingestion/clickpipes/mysql/datatypes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index 5b61a1caa70..e035a2a0b2b 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -8,8 +8,8 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | MySQL Type | ClickHouse type | Notes | | -------------------------------------------------------------------------- | ------------------------------------------ | -------------------------------------------------------------------------------------- | -| Enum | LowCardinality(String) | MySQL doesn't have user-defined types for enums; instead, columns have type `enum('a','b','c')`. | -| Set | String | `set` is like `enum`, except from `set('a','b','c')` value can be `'a,b'`. `set` can only have 64 items, as it's internally a 64-bit bitset. | +| Enum | LowCardinality(String) | | +| Set | String | | | Decimal | Decimal | | | TinyInt | Int8 | Supports unsigned. | | SmallInt | Int16 | Supports unsigned. | From 1017ceb6edb612e01f13402e7510f349e627162a Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:30:33 +0530 Subject: [PATCH 06/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index e035a2a0b2b..fb2dbca7741 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -20,7 +20,7 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | TinyBlob, Blob, MediumBlob, LongBlob | String | | | Char, Varchar | String | | | Binary, VarBinary | String | | -| TinyInt(1) | Bool | This is a display hint; MySQL has `boolean` aliased to `tinyint(1)`. | +| TinyInt(1) | Bool | | | JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | | Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from small precision loss. | | Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | From f05a862d7a48d1ee8d2a0728003460613278e736 Mon Sep 17 00:00:00 2001 From: Amogh-Bharadwaj Date: Fri, 11 Apr 2025 19:32:28 +0530 Subject: [PATCH 07/10] change float, double note --- .../integrations/data-ingestion/clickpipes/mysql/datatypes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index fb2dbca7741..73a3db9ed86 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -24,8 +24,8 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | | Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from small precision loss. | | Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | -| Float | Float32 | May lose a bit of precision during initial load due to text protocols. | -| Double | Float64 | May lose a bit of precision during initial load due to text protocols. | +| Float | Float32 | Precision on ClickHouse may vary from MySQL from during initial load due to text protocols. | +| Double | Float64 | Precision on ClickHouse may vary from MySQL from during initial load due to text protocols. | | Date | Date32 | | | Time | DateTime64(6) | The date portion is Unix epoch. | | Datetime, Timestamp | DateTime64(6) | | From 923e5fe462d4ddf796830d71fa0ee0fd7186c503 Mon Sep 17 00:00:00 2001 From: Amogh-Bharadwaj Date: Fri, 11 Apr 2025 19:33:27 +0530 Subject: [PATCH 08/10] change to differ --- .../integrations/data-ingestion/clickpipes/mysql/datatypes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index 73a3db9ed86..440374d7ac6 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -24,8 +24,8 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | | Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from small precision loss. | | Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | -| Float | Float32 | Precision on ClickHouse may vary from MySQL from during initial load due to text protocols. | -| Double | Float64 | Precision on ClickHouse may vary from MySQL from during initial load due to text protocols. | +| Float | Float32 | Precision on ClickHouse may differ from MySQL from during initial load due to text protocols. | +| Double | Float64 | Precision on ClickHouse may differ from MySQL from during initial load due to text protocols. | | Date | Date32 | | | Time | DateTime64(6) | The date portion is Unix epoch. | | Datetime, Timestamp | DateTime64(6) | | From 171ef30dc391582a4db0162dab1e08e3d82748c8 Mon Sep 17 00:00:00 2001 From: Amogh-Bharadwaj Date: Fri, 11 Apr 2025 19:37:00 +0530 Subject: [PATCH 09/10] add to dictionary --- scripts/aspell-dict-file.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/aspell-dict-file.txt b/scripts/aspell-dict-file.txt index 53f887ae2e4..a0ddec11a4e 100644 --- a/scripts/aspell-dict-file.txt +++ b/scripts/aspell-dict-file.txt @@ -263,6 +263,19 @@ autovacuum VACUUM resync Resync +--docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md-- +BigInt +LongBlob +LongText +MediumBlob +MediumInt +MediumText +SmallInt +TinyBlob +TinyInt +TinyText +VarBinary +Varchar --docs/cloud/security/cmek.md-- Poller --docs/integrations/data-ingestion/dbms/postgresql/postgres-vs-clickhouse.md-- From fd4514c698073be4fd5b2cfe557dd2fa5a48a9f5 Mon Sep 17 00:00:00 2001 From: Amogh Bharadwaj <65964360+Amogh-Bharadwaj@users.noreply.github.com> Date: Fri, 11 Apr 2025 19:38:36 +0530 Subject: [PATCH 10/10] Update docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Dubé --- .../integrations/data-ingestion/clickpipes/mysql/datatypes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md index 440374d7ac6..9f1f62222b6 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/datatypes.md @@ -24,8 +24,8 @@ Here is the supported data-type mapping for the MySQL ClickPipe: | JSON | String | MySQL only; MariaDB `json` is just an alias for `text` with a constraint. | | Geometry & Geometry Types | String | WKT (Well-Known Text). WKT may suffer from small precision loss. | | Vector | Array(Float32) | MySQL only; MariaDB is adding support soon. | -| Float | Float32 | Precision on ClickHouse may differ from MySQL from during initial load due to text protocols. | -| Double | Float64 | Precision on ClickHouse may differ from MySQL from during initial load due to text protocols. | +| Float | Float32 | Precision on ClickHouse may differ from MySQL during initial load due to text protocols. | +| Double | Float64 | Precision on ClickHouse may differ from MySQL during initial load due to text protocols. | | Date | Date32 | | | Time | DateTime64(6) | The date portion is Unix epoch. | | Datetime, Timestamp | DateTime64(6) | |