aws · Jun 27, 2023
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎awswrangler/__init__.py
Lines changed: 2 additions & 0 deletions b/‎awswrangler/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎awswrangler/_utils.py
Lines changed: 12 additions & 0 deletions b/‎awswrangler/_utils.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎awswrangler/cleanrooms/__init__.py
Lines changed: 9 additions & 0 deletions b/‎awswrangler/cleanrooms/__init__.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎awswrangler/cleanrooms/_read.py
Lines changed: 128 additions & 0 deletions b/‎awswrangler/cleanrooms/_read.py
Lines changed: 128 additions & 0 deletions
diff --git a/‎awswrangler/cleanrooms/_utils.py
Lines changed: 60 additions & 0 deletions b/‎awswrangler/cleanrooms/_utils.py
Lines changed: 60 additions & 0 deletions
diff --git a/‎docs/source/api.rst
Lines changed: 12 additions & 0 deletions b/‎docs/source/api.rst
Lines changed: 12 additions & 0 deletions
diff --git a/‎poetry.lock
Lines changed: 19 additions & 2 deletions b/‎poetry.lock
Lines changed: 19 additions & 2 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎test_infra/app.py
Lines changed: 7 additions & 0 deletions b/‎test_infra/app.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎test_infra/poetry.lock
Lines changed: 61 additions & 52 deletions b/‎test_infra/poetry.lock
Lines changed: 61 additions & 52 deletions
diff --git a/‎test_infra/pyproject.toml
Lines changed: 4 additions & 4 deletions b/‎test_infra/pyproject.toml
Lines changed: 4 additions & 4 deletions
diff --git a/‎test_infra/stacks/cleanrooms_stack.py
Lines changed: 241 additions & 0 deletions b/‎test_infra/stacks/cleanrooms_stack.py
Lines changed: 241 additions & 0 deletions
diff --git a/‎tests/_utils.py
Lines changed: 7 additions & 1 deletion b/‎tests/_utils.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎tests/conftest.py
Lines changed: 15 additions & 0 deletions b/‎tests/conftest.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎tests/unit/test_cleanrooms.py
Lines changed: 73 additions & 0 deletions b/‎tests/unit/test_cleanrooms.py
Lines changed: 73 additions & 0 deletions
@@ -153,9 +153,12 @@ building/lambda/arrow
 *.swp
 
 # CDK
+node_modules
+*package.json
 *package-lock.json
 *.cdk.staging
 *cdk.out
+*cdk.context.json
 
 # ruff
 .ruff_cache/
@@ -11,6 +11,7 @@
     athena,
     catalog,
     chime,
+    cleanrooms,
     cloudwatch,
     data_api,
     data_quality,
@@ -43,6 +44,7 @@
     "athena",
     "catalog",
     "chime",
+    "cleanrooms",
     "cloudwatch",
     "emr",
     "emr_serverless",
 
@@ -45,6 +45,7 @@
     from boto3.resources.base import ServiceResource
     from botocore.client import BaseClient
     from mypy_boto3_athena import AthenaClient
+    from mypy_boto3_cleanrooms import CleanRoomsServiceClient
     from mypy_boto3_dynamodb import DynamoDBClient, DynamoDBServiceResource
     from mypy_boto3_ec2 import EC2Client
     from mypy_boto3_emr.client import EMRClient
@@ -68,6 +69,7 @@
 
     ServiceName = Literal[
         "athena",
+        "cleanrooms",
         "dynamodb",
         "ec2",
         "emr",
@@ -286,6 +288,16 @@ def client(
     ...
 
 
+@overload
+def client(
+    service_name: 'Literal["cleanrooms"]',
+    session: Optional[boto3.Session] = None,
+    botocore_config: Optional[Config] = None,
+    verify: Optional[Union[str, bool]] = None,
+) -> "CleanRoomsServiceClient":
+    ...
+
+
 @overload
 def client(
     service_name: 'Literal["lakeformation"]',
 
@@ -0,0 +1,9 @@
+"""Amazon Clean Rooms Module."""
+
+from awswrangler.cleanrooms._read import read_sql_query
+from awswrangler.cleanrooms._utils import wait_query
+
+__all__ = [
+    "read_sql_query",
+    "wait_query",
+]
@@ -0,0 +1,128 @@
+"""Amazon Clean Rooms Module hosting read_* functions."""
+
+import logging
+from typing import Any, Dict, Iterator, Optional, Union
+
+import boto3
+
+import awswrangler.pandas as pd
+from awswrangler import _utils, s3
+from awswrangler._sql_formatter import _process_sql_params
+from awswrangler.cleanrooms._utils import wait_query
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+def _delete_after_iterate(
+    dfs: Iterator[pd.DataFrame], keep_files: bool, kwargs: Dict[str, Any]
+) -> Iterator[pd.DataFrame]:
+    for df in dfs:
+        yield df
+    if keep_files is False:
+        s3.delete_objects(**kwargs)
+
+
+def read_sql_query(
+    sql: str,
+    membership_id: str,
+    output_bucket: str,
+    output_prefix: str,
+    keep_files: bool = True,
+    params: Optional[Dict[str, Any]] = None,
+    chunksize: Optional[Union[int, bool]] = None,
+    use_threads: Union[bool, int] = True,
+    boto3_session: Optional[boto3.Session] = None,
+    pyarrow_additional_kwargs: Optional[Dict[str, Any]] = None,
+) -> Union[Iterator[pd.DataFrame], pd.DataFrame]:
+    """Execute Clean Rooms Protected SQL query and return the results as a Pandas DataFrame.
+
+    Parameters
+    ----------
+    sql : str
+        SQL query
+    membership_id : str
+        Membership ID
+    output_bucket : str
+        S3 output bucket name
+    output_prefix : str
+        S3 output prefix
+    keep_files : bool, optional
+        Whether files in S3 output bucket/prefix are retained. 'True' by default
+    params : Dict[str, any], optional
+        Dict of parameters used for constructing the SQL query. Only named parameters are supported.
+        The dict must be in the form {'name': 'value'} and the SQL query must contain
+        `:name`. Note that for varchar columns and similar, you must surround the value in single quotes
+    chunksize : Union[int, bool], optional
+        If passed, the data is split into an iterable of DataFrames (Memory friendly).
+        If `True` an iterable of DataFrames is returned without guarantee of chunksize.
+        If an `INTEGER` is passed, an iterable of DataFrames is returned with maximum rows
+        equal to the received INTEGER
+    use_threads : Union[bool, int], optional
+        True to enable concurrent requests, False to disable multiple threads.
+        If enabled os.cpu_count() is used as the maximum number of threads.
+        If integer is provided, specified number is used
+    boto3_session : boto3.Session, optional
+        Boto3 Session. If None, the default boto3 session is used
+    pyarrow_additional_kwargs : Optional[Dict[str, Any]]
+        Forwarded to `to_pandas` method converting from PyArrow tables to Pandas DataFrame.
+        Valid values include "split_blocks", "self_destruct", "ignore_metadata".
+        e.g. pyarrow_additional_kwargs={'split_blocks': True}
+
+    Returns
+    -------
+    Union[Iterator[pd.DataFrame], pd.DataFrame]
+        Pandas DataFrame or Generator of Pandas DataFrames if chunksize is provided.
+
+    Examples
+    --------
+    >>> import awswrangler as wr
+    >>> df = wr.cleanrooms.read_sql_query(
+    >>>     sql='SELECT DISTINCT...',
+    >>>     membership_id='membership-id',
+    >>>     output_bucket='output-bucket',
+    >>>     output_prefix='output-prefix',
+    >>> )
+    """
+    client_cleanrooms = _utils.client(service_name="cleanrooms", session=boto3_session)
+
+    query_id: str = client_cleanrooms.start_protected_query(
+        type="SQL",
+        membershipIdentifier=membership_id,
+        sqlParameters={"queryString": _process_sql_params(sql, params, engine_type="partiql")},
+        resultConfiguration={
+            "outputConfiguration": {
+                "s3": {
+                    "bucket": output_bucket,
+                    "keyPrefix": output_prefix,
+                    "resultFormat": "PARQUET",
+                }
+            }
+        },
+    )["protectedQuery"]["id"]
+
+    _logger.debug("query_id: %s", query_id)
+    path: str = wait_query(membership_id=membership_id, query_id=query_id)["protectedQuery"]["result"]["output"]["s3"][
+        "location"
+    ]
+
+    _logger.debug("path: %s", path)
+    chunked: Union[bool, int] = False if chunksize is None else chunksize
+    ret = s3.read_parquet(
+        path=path,
+        use_threads=use_threads,
+        chunked=chunked,
+        boto3_session=boto3_session,
+        pyarrow_additional_kwargs=pyarrow_additional_kwargs,
+    )
+
+    _logger.debug("type(ret): %s", type(ret))
+    kwargs: Dict[str, Any] = {
+        "path": path,
+        "use_threads": use_threads,
+        "boto3_session": boto3_session,
+    }
+    if chunked is False:
+        if keep_files is False:
+            s3.delete_objects(**kwargs)
+        return ret
+    return _delete_after_iterate(ret, keep_files, kwargs)
@@ -0,0 +1,60 @@
+"""Utilities Module for Amazon Clean Rooms."""
+import logging
+import time
+from typing import TYPE_CHECKING, List, Optional
+
+import boto3
+
+from awswrangler import _utils, exceptions
+
+if TYPE_CHECKING:
+    from mypy_boto3_cleanrooms.type_defs import GetProtectedQueryOutputTypeDef
+
+_QUERY_FINAL_STATES: List[str] = ["CANCELLED", "FAILED", "SUCCESS", "TIMED_OUT"]
+_QUERY_WAIT_POLLING_DELAY: float = 2  # SECONDS
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+def wait_query(
+    membership_id: str, query_id: str, boto3_session: Optional[boto3.Session] = None
+) -> "GetProtectedQueryOutputTypeDef":
+    """Wait for the Clean Rooms protected query to end.
+
+    Parameters
+    ----------
+    membership_id : str
+        Membership ID
+    query_id : str
+        Protected query execution ID
+    boto3_session : boto3.Session, optional
+        Boto3 Session. If None, the default boto3 session is used
+    Returns
+    -------
+    Dict[str, Any]
+        Dictionary with the get_protected_query response.
+
+    Raises
+    ------
+    exceptions.QueryFailed
+        Raises exception with error message if protected query is cancelled, times out or fails.
+
+    Examples
+    --------
+    >>> import awswrangler as wr
+    >>> res = wr.cleanrooms.wait_query(membership_id='membership-id', query_id='query-id')
+    """
+    client_cleanrooms = _utils.client(service_name="cleanrooms", session=boto3_session)
+    state = "SUBMITTED"
+
+    while state not in _QUERY_FINAL_STATES:
+        time.sleep(_QUERY_WAIT_POLLING_DELAY)
+        response = client_cleanrooms.get_protected_query(
+            membershipIdentifier=membership_id, protectedQueryIdentifier=query_id
+        )
+        state = response["protectedQuery"].get("status")  # type: ignore[assignment]
+
+    _logger.debug("state: %s", state)
+    if state != "SUCCESS":
+        raise exceptions.QueryFailed(response["protectedQuery"].get("Error"))
+    return response
@@ -17,6 +17,7 @@ API Reference
 * `Amazon Neptune`_
 * `DynamoDB`_
 * `Amazon Timestream`_
+* `AWS Clean Rooms`_
 * `Amazon EMR`_
 * `Amazon EMR Serverless`_
 * `Amazon CloudWatch Logs`_
@@ -351,6 +352,17 @@ Amazon Timestream
     unload_to_files
     unload
 
+AWS Clean Rooms
+-----------------
+
+.. currentmodule:: awswrangler.cleanrooms
+
+.. autosummary::
+    :toctree: stubs
+
+    read_sql_query
+    wait_query
+
 Amazon EMR
 ----------
 
 
@@ -85,7 +85,7 @@ wheel = "^0.38.1"
 
 # Lint
 black = "^23.1.0"
-boto3-stubs = {version = "1.26.151", extras = ["athena", "chime", "cloudwatch", "dynamodb", "ec2", "emr", "emr-serverless", "glue", "kms", "lakeformation", "logs", "neptune", "opensearch", "opensearchserverless", "quicksight", "rds", "rds-data", "redshift", "redshift-data", "s3", "secretsmanager", "ssm", "sts", "timestream-query", "timestream-write"]}
+boto3-stubs = {version = "^1.26.151", extras = ["athena", "cleanrooms", "chime", "cloudwatch", "dynamodb", "ec2", "emr", "emr-serverless", "glue", "kms", "lakeformation", "logs", "neptune", "opensearch", "opensearchserverless", "quicksight", "rds", "rds-data", "redshift", "redshift-data", "s3", "secretsmanager", "ssm", "sts", "timestream-query", "timestream-write"]}
 doc8 = "^1.0"
 mypy = "^1.0"
 pylint = "^2.17"
 
@@ -3,6 +3,7 @@
 
 from aws_cdk import App, Environment
 from stacks.base_stack import BaseStack
+from stacks.cleanrooms_stack import CleanRoomsStack
 from stacks.databases_stack import DatabasesStack
 from stacks.glueray_stack import GlueRayStack
 from stacks.opensearch_stack import OpenSearchStack
@@ -42,4 +43,10 @@
     **env,
 )
 
+CleanRoomsStack(
+    app,
+    "aws-sdk-pandas-cleanrooms",
+    **env,
+)
+
 app.synth()
@@ -7,8 +7,8 @@ license = "Apache License 2.0"
 
 [tool.poetry.dependencies]
 python = ">=3.7.1, <4.0"
-"aws-cdk-lib" = "^2.64.0"
+"aws-cdk-lib" = "^2.85.0"
 "constructs" = ">=10.0.0,<11.0.0"
-"aws-cdk.aws-glue-alpha" = "^2.64.0a0"
-"aws-cdk.aws-redshift-alpha" = "^2.64.0a0"
-"aws-cdk.aws-neptune-alpha" = "^2.64.0a0"
+"aws-cdk.aws-glue-alpha" = "^2.85.0a0"
+"aws-cdk.aws-redshift-alpha" = "^2.85.0a0"
+"aws-cdk.aws-neptune-alpha" = "^2.85.0a0"
@@ -0,0 +1,241 @@
+from aws_cdk import CfnOutput, Duration, Stack
+from aws_cdk import aws_cleanrooms as cleanrooms
+from aws_cdk import aws_glue_alpha as glue
+from aws_cdk import aws_iam as iam
+from aws_cdk import aws_s3 as s3
+from aws_cdk import aws_ssm as ssm
+from constructs import Construct
+
+
+class CleanRoomsStack(Stack):  # type: ignore
+    def __init__(
+        self,
+        scope: Construct,
+        construct_id: str,
+        **kwargs: str,
+    ) -> None:
+        super().__init__(scope, construct_id, **kwargs)
+
+        self.collaboration = cleanrooms.CfnCollaboration(
+            self,
+            "Collaboration",
+            name="AWS SDK for pandas - Testing",
+            creator_display_name="Collaborator Creator",
+            creator_member_abilities=["CAN_QUERY", "CAN_RECEIVE_RESULTS"],
+            description="Collaboration Room for AWS SDK for pandas test infrastructure",
+            members=[],
+            query_log_status="ENABLED",
+        )
+
+        self.membership = cleanrooms.CfnMembership(
+            self,
+            "Membership",
+            collaboration_identifier=self.collaboration.attr_collaboration_identifier,
+            query_log_status="ENABLED",
+        )
+
+        self.cleanrooms_service_role = iam.Role(
+            self,
+            "Service Role",
+            assumed_by=iam.CompositePrincipal(
+                iam.ServicePrincipal("cleanrooms.amazonaws.com").with_conditions(
+                    {
+                        "StringLike": {
+                            "sts:ExternalId": f"arn:aws:*:{self.region}:*:dbuser:*/{self.membership.attr_membership_identifier}*"
+                        }
+                    }
+                ),
+                iam.ServicePrincipal("cleanrooms.amazonaws.com").with_conditions(
+                    {
+                        "ForAnyValue:ArnEquals": {
+                            "aws:SourceArn": f"arn:aws:cleanrooms:{self.region}:{self.account}:membership/{self.membership.attr_membership_identifier}"
+                        }
+                    }
+                ),
+            ),
+            managed_policies=[
+                iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSGlueServiceRole"),
+                iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3ReadOnlyAccess"),
+            ],
+        )
+
+        self.bucket = s3.Bucket(
+            self,
+            "Bucket",
+            block_public_access=s3.BlockPublicAccess(
+                block_public_acls=True,
+                block_public_policy=True,
+                ignore_public_acls=True,
+                restrict_public_buckets=True,
+            ),
+            lifecycle_rules=[
+                s3.LifecycleRule(
+                    id="CleaningUp",
+                    enabled=True,
+                    expiration=Duration.days(1),
+                    abort_incomplete_multipart_upload_after=Duration.days(1),
+                ),
+            ],
+            versioned=True,
+        )
+
+        self.database = glue.Database(
+            self,
+            id="Glue Database",
+            database_name="aws_sdk_pandas_cleanrooms",
+            location_uri=f"s3://{self.bucket.bucket_name}",
+        )
+
+        self.users_table = glue.Table(
+            self,
+            "Users Table",
+            database=self.database,
+            table_name="users",
+            columns=[
+                glue.Column(name="user_id", type=glue.Type(input_string="int", is_primitive=True)),
+                glue.Column(name="city", type=glue.Type(input_string="string", is_primitive=True)),
+            ],
+            bucket=self.bucket,
+            s3_prefix="users",
+            data_format=glue.DataFormat(
+                input_format=glue.InputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+                output_format=glue.OutputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+                serialization_library=glue.SerializationLibrary(
+                    "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+                ),
+            ),
+        )
+
+        self.purchases_table = glue.Table(
+            self,
+            "Purchases Table",
+            database=self.database,
+            table_name="purchases",
+            columns=[
+                glue.Column(name="purchase_id", type=glue.Type(input_string="int", is_primitive=True)),
+                glue.Column(name="user_id", type=glue.Type(input_string="int", is_primitive=True)),
+                glue.Column(name="sale_value", type=glue.Type(input_string="float", is_primitive=True)),
+            ],
+            bucket=self.bucket,
+            s3_prefix="purchases",
+            data_format=glue.DataFormat(
+                input_format=glue.InputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+                output_format=glue.OutputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+                serialization_library=glue.SerializationLibrary(
+                    "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+                ),
+            ),
+        )
+
+        self.users_configured_table = cleanrooms.CfnConfiguredTable(
+            self,
+            "Users Configured Table",
+            allowed_columns=["user_id", "city"],
+            analysis_method="DIRECT_QUERY",
+            name="users",
+            table_reference=cleanrooms.CfnConfiguredTable.TableReferenceProperty(
+                glue=cleanrooms.CfnConfiguredTable.GlueTableReferenceProperty(
+                    database_name=self.database.database_name,
+                    table_name=self.users_table.table_name,
+                )
+            ),
+            analysis_rules=[
+                cleanrooms.CfnConfiguredTable.AnalysisRuleProperty(
+                    policy=cleanrooms.CfnConfiguredTable.ConfiguredTableAnalysisRulePolicyProperty(
+                        v1=cleanrooms.CfnConfiguredTable.ConfiguredTableAnalysisRulePolicyV1Property(
+                            aggregation=cleanrooms.CfnConfiguredTable.AnalysisRuleAggregationProperty(
+                                aggregate_columns=[
+                                    cleanrooms.CfnConfiguredTable.AggregateColumnProperty(
+                                        column_names=["user_id"], function="COUNT"
+                                    )
+                                ],
+                                dimension_columns=["city"],
+                                join_columns=["user_id"],
+                                output_constraints=[
+                                    cleanrooms.CfnConfiguredTable.AggregationConstraintProperty(
+                                        column_name="user_id", minimum=2, type="COUNT_DISTINCT"
+                                    )
+                                ],
+                                scalar_functions=["LOWER"],
+                                join_required="QUERY_RUNNER",
+                            ),
+                        )
+                    ),
+                    type="AGGREGATION",
+                )
+            ],
+        )
+
+        self.purchases_configured_table = cleanrooms.CfnConfiguredTable(
+            self,
+            "Purchases Configured Table",
+            allowed_columns=["purchase_id", "user_id", "sale_value"],
+            analysis_method="DIRECT_QUERY",
+            name="purchases",
+            table_reference=cleanrooms.CfnConfiguredTable.TableReferenceProperty(
+                glue=cleanrooms.CfnConfiguredTable.GlueTableReferenceProperty(
+                    database_name=self.database.database_name,
+                    table_name=self.purchases_table.table_name,
+                )
+            ),
+            analysis_rules=[
+                cleanrooms.CfnConfiguredTable.AnalysisRuleProperty(
+                    policy=cleanrooms.CfnConfiguredTable.ConfiguredTableAnalysisRulePolicyProperty(
+                        v1=cleanrooms.CfnConfiguredTable.ConfiguredTableAnalysisRulePolicyV1Property(
+                            aggregation=cleanrooms.CfnConfiguredTable.AnalysisRuleAggregationProperty(
+                                aggregate_columns=[
+                                    cleanrooms.CfnConfiguredTable.AggregateColumnProperty(
+                                        column_names=["purchase_id"], function="COUNT"
+                                    ),
+                                    cleanrooms.CfnConfiguredTable.AggregateColumnProperty(
+                                        column_names=["sale_value"], function="AVG"
+                                    ),
+                                    cleanrooms.CfnConfiguredTable.AggregateColumnProperty(
+                                        column_names=["sale_value"], function="SUM"
+                                    ),
+                                ],
+                                dimension_columns=[],
+                                join_columns=["user_id"],
+                                output_constraints=[
+                                    cleanrooms.CfnConfiguredTable.AggregationConstraintProperty(
+                                        column_name="user_id", minimum=2, type="COUNT_DISTINCT"
+                                    )
+                                ],
+                                scalar_functions=[],
+                                join_required="QUERY_RUNNER",
+                            ),
+                        )
+                    ),
+                    type="AGGREGATION",
+                )
+            ],
+        )
+
+        self.users_configured_table_association = cleanrooms.CfnConfiguredTableAssociation(
+            self,
+            "Users Configured Table Association",
+            configured_table_identifier=self.users_configured_table.attr_configured_table_identifier,
+            membership_identifier=self.membership.attr_membership_identifier,
+            name="users",
+            role_arn=self.cleanrooms_service_role.role_arn,
+        )
+
+        self.purchases_configured_table_association = cleanrooms.CfnConfiguredTableAssociation(
+            self,
+            "Purchases Configured Table Association",
+            configured_table_identifier=self.purchases_configured_table.attr_configured_table_identifier,
+            membership_identifier=self.membership.attr_membership_identifier,
+            name="purchases",
+            role_arn=self.cleanrooms_service_role.role_arn,
+        )
+
+        CfnOutput(self, "CleanRoomsMembershipId", value=self.membership.attr_membership_identifier)
+        CfnOutput(self, "CleanRoomsGlueDatabaseName", value=self.database.database_name)
+        CfnOutput(self, "CleanRoomsS3BucketName", value=self.bucket.bucket_name)
+
+        ssm.StringParameter(
+            self,
+            "SSM BucketName",
+            parameter_name="/sdk-pandas/cleanrooms/BucketName",
+            string_value=self.bucket.bucket_name,
+        )
@@ -489,7 +489,13 @@ def path_generator(bucket: str) -> Iterator[str]:
 def extract_cloudformation_outputs():
     outputs = {}
     client = boto3.client("cloudformation")
-    stacks = ["aws-sdk-pandas-base", "aws-sdk-pandas-databases", "aws-sdk-pandas-opensearch", "aws-sdk-pandas-glueray"]
+    stacks = [
+        "aws-sdk-pandas-base",
+        "aws-sdk-pandas-databases",
+        "aws-sdk-pandas-opensearch",
+        "aws-sdk-pandas-glueray",
+        "aws-sdk-pandas-cleanrooms",
+    ]
     response = try_it(client.describe_stacks, botocore.exceptions.ClientError, max_num_tries=5)
     for stack in response.get("Stacks"):
         if (stack["StackName"] in stacks) and (stack["StackStatus"] in CFN_VALID_STATUS):
 
@@ -449,6 +449,21 @@ def glue_data_quality_role(cloudformation_outputs):
     return cloudformation_outputs["GlueDataQualityRole"]
 
 
+@pytest.fixture(scope="session")
+def cleanrooms_membership_id(cloudformation_outputs):
+    return cloudformation_outputs["CleanRoomsMembershipId"]
+
+
+@pytest.fixture(scope="session")
+def cleanrooms_glue_database_name(cloudformation_outputs):
+    return cloudformation_outputs["CleanRoomsGlueDatabaseName"]
+
+
+@pytest.fixture(scope="session")
+def cleanrooms_s3_bucket_name(cloudformation_outputs):
+    return cloudformation_outputs["CleanRoomsS3BucketName"]
+
+
 @pytest.fixture(scope="function")
 def local_filename() -> Iterator[str]:
     filename = os.path.join(".", f"{get_time_str_with_random_suffix()}.data")
 
@@ -0,0 +1,73 @@
+import pytest
+
+import awswrangler as wr
+import awswrangler.pandas as pd
+
+pytestmark = pytest.mark.distributed
+
+
+@pytest.fixture()
+def data(cleanrooms_s3_bucket_name: str, cleanrooms_glue_database_name: str) -> None:
+    df_purchases = pd.DataFrame(
+        {
+            "purchase_id": list(range(100, 109)),
+            "user_id": [1, 2, 3, 1, 2, 3, 4, 5, 6],
+            "sale_value": [2.2, 1.1, 6.2, 2.3, 7.8, 9.9, 7.3, 9.7, 0.7],
+        }
+    )
+    wr.s3.to_parquet(
+        df_purchases,
+        f"s3://{cleanrooms_s3_bucket_name}/purchases/",
+        dataset=True,
+        database=cleanrooms_glue_database_name,
+        table="purchases",
+        mode="overwrite",
+    )
+
+    df_users = pd.DataFrame(
+        {
+            "user_id": list(range(1, 9)),
+            "city": ["LA", "NYC", "Chicago", "NYC", "NYC", "LA", "Seattle", "Seattle"],
+        }
+    )
+    wr.s3.to_parquet(
+        df_users,
+        f"s3://{cleanrooms_s3_bucket_name}/users/",
+        dataset=True,
+        database=cleanrooms_glue_database_name,
+        table="users",
+        mode="overwrite",
+    )
+
+
+def test_read_sql_query(data: None, cleanrooms_membership_id: str, cleanrooms_s3_bucket_name: str):
+    sql = """SELECT city, AVG(p.sale_value)
+    FROM users u
+        INNER JOIN purchases p ON u.user_id = p.user_id
+    GROUP BY city
+    """
+    chunksize = 2
+    df_chunked = wr.cleanrooms.read_sql_query(
+        sql=sql,
+        membership_id=cleanrooms_membership_id,
+        output_bucket=cleanrooms_s3_bucket_name,
+        output_prefix="results",
+        chunksize=chunksize,
+        keep_files=False,
+    )
+    for df in df_chunked:
+        assert df.shape == (chunksize, 2)
+
+    sql = """SELECT COUNT(p.purchase_id), SUM(p.sale_value), city
+    FROM users u
+        INNER JOIN purchases p ON u.user_id = p.user_id
+    GROUP BY city
+    """
+    df = wr.cleanrooms.read_sql_query(
+        sql=sql,
+        membership_id=cleanrooms_membership_id,
+        output_bucket=cleanrooms_s3_bucket_name,
+        output_prefix="results",
+        keep_files=False,
+    )
+    assert df.shape == (2, 3)