From 866951801c75f56a3df9bbf9b9e7f7b23568c322 Mon Sep 17 00:00:00 2001 From: jaydxn1 Date: Sun, 13 Apr 2025 19:46:31 +0100 Subject: [PATCH 1/4] refactor extensionarrays to use from_sequence, use df from_mgr to resolve warnings --- connectorx-python/connectorx/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index 55e95d3ef..1b7f164bc 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -459,23 +459,27 @@ def reconstruct_pandas(df_infos: _DataframeInfos) -> pd.DataFrame: pd.core.internals.make_block(block_data, placement=binfo.cids) ) elif binfo.dt == 1: # IntegerArray + integer_array = pd.core.arrays.IntegerArray._from_sequence(block_data[0]) + integer_array._mask = block_data[1] blocks.append( pd.core.internals.make_block( - pd.core.arrays.IntegerArray(block_data[0], block_data[1]), + integer_array, placement=binfo.cids[0], ) ) elif binfo.dt == 2: # BooleanArray + bool_array = pd.core.arrays.BooleanArray._from_sequence(block_data[0]) + bool_array._mask = block_data[1] blocks.append( pd.core.internals.make_block( - pd.core.arrays.BooleanArray(block_data[0], block_data[1]), + bool_array, placement=binfo.cids[0], ) ) elif binfo.dt == 3: # DatetimeArray blocks.append( pd.core.internals.make_block( - pd.core.arrays.DatetimeArray(block_data), placement=binfo.cids + pd.core.arrays.DatetimeArray._from_sequence(block_data), placement=binfo.cids ) ) else: @@ -484,7 +488,7 @@ def reconstruct_pandas(df_infos: _DataframeInfos) -> pd.DataFrame: block_manager = pd.core.internals.BlockManager( blocks, [pd.Index(headers), pd.RangeIndex(start=0, stop=nrows, step=1)] ) - df = pd.DataFrame(block_manager) + df = pd.DataFrame._from_mgr(block_manager, axes=[headers, range(nrows)]) return df From ba5dbace82d94adac26d0726e4775ed251b61b5b Mon Sep 17 00:00:00 2001 From: jaydxn1 Date: Sun, 13 Apr 2025 19:57:42 +0100 Subject: [PATCH 2/4] updated install readme for linux arm64 release --- docs/install.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/install.md b/docs/install.md index 0a77c3e29..e585c2f68 100644 --- a/docs/install.md +++ b/docs/install.md @@ -10,6 +10,10 @@ The easiest way to install ConnectorX is using pip, with the following command: pip install connectorx ``` +```{note} +For AArch64 or ARM64 Linux users, connectorx==0.4.3 and above is only available for distributions using `glibc 2.35` and above. Specifically, the re-release for this architecture was tested on Ubuntu 22.04. For older distributions, the latest version available is connectorx==0.2.3 due to dependency limitations. +``` + ### Build from source code * Step 0: Install tools. From a7f708b6cf58d52790a06065fe1ec5b87d4338df Mon Sep 17 00:00:00 2001 From: jaydxn1 Date: Sun, 13 Apr 2025 20:02:40 +0100 Subject: [PATCH 3/4] update readme for linux arm64 release --- README.md | 2 ++ docs/install.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e21c329c3..de04b077e 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,8 @@ Check out more detailed usage and examples [here](https://sfu-db.github.io/conne pip install connectorx ``` +_For AArch64 or ARM64 Linux users, `connectorx==0.4.3 & above` is only available for distributions using `glibc 2.35` and above. Specifically, the re-release for this architecture was tested on Ubuntu 22.04. For older distributions, the latest version available is `connectorx==0.2.3` due to dependency limitations._ + Check out [here](https://sfu-db.github.io/connector-x/install.html#build-from-source-code) to see how to build python wheel from source. # Performance diff --git a/docs/install.md b/docs/install.md index e585c2f68..155e7d90b 100644 --- a/docs/install.md +++ b/docs/install.md @@ -11,7 +11,7 @@ pip install connectorx ``` ```{note} -For AArch64 or ARM64 Linux users, connectorx==0.4.3 and above is only available for distributions using `glibc 2.35` and above. Specifically, the re-release for this architecture was tested on Ubuntu 22.04. For older distributions, the latest version available is connectorx==0.2.3 due to dependency limitations. +For AArch64 or ARM64 Linux users, `connectorx==0.4.3 & above` is only available for distributions using `glibc 2.35` and above. Specifically, the re-release for this architecture was tested on Ubuntu 22.04. For older distributions, the latest version available is `connectorx==0.2.3` due to dependency limitations. ``` ### Build from source code From e37a96a2a58b604388eec2fab539590284b5e22c Mon Sep 17 00:00:00 2001 From: jaydxn1 Date: Mon, 28 Apr 2025 07:42:18 +0800 Subject: [PATCH 4/4] revert changes for integerarray & booleanarray --- connectorx-python/connectorx/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index 1b7f164bc..643a80467 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -459,20 +459,16 @@ def reconstruct_pandas(df_infos: _DataframeInfos) -> pd.DataFrame: pd.core.internals.make_block(block_data, placement=binfo.cids) ) elif binfo.dt == 1: # IntegerArray - integer_array = pd.core.arrays.IntegerArray._from_sequence(block_data[0]) - integer_array._mask = block_data[1] blocks.append( pd.core.internals.make_block( - integer_array, + pd.core.arrays.IntegerArray(block_data[0], block_data[1]), placement=binfo.cids[0], ) ) elif binfo.dt == 2: # BooleanArray - bool_array = pd.core.arrays.BooleanArray._from_sequence(block_data[0]) - bool_array._mask = block_data[1] blocks.append( pd.core.internals.make_block( - bool_array, + pd.core.arrays.BooleanArray(block_data[0], block_data[1]), placement=binfo.cids[0], ) )