From 24e6e5c22efac130583ec4143d4c3fa10bbb700a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 15 May 2025 15:26:06 +0200 Subject: [PATCH 01/67] perf(alembic): adds integration providers --- ...9d146e05e1ee_adds_integration_providers.py | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 alembic/versions/9d146e05e1ee_adds_integration_providers.py diff --git a/alembic/versions/9d146e05e1ee_adds_integration_providers.py b/alembic/versions/9d146e05e1ee_adds_integration_providers.py new file mode 100644 index 00000000..5acb2e71 --- /dev/null +++ b/alembic/versions/9d146e05e1ee_adds_integration_providers.py @@ -0,0 +1,109 @@ +"""adds integration providers + +Revision ID: 9d146e05e1ee +Revises: eb96f9b82cc1 +Create Date: 2025-05-15 13:25:34.353936 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "9d146e05e1ee" +down_revision = "eb96f9b82cc1" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "integration_organization_access", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("integration_type", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_organization_access_created_by"), + "integration_organization_access", + ["created_by"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_organization_access_organization_id"), + "integration_organization_access", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "third_party_integration", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("state", sa.String(), nullable=True), + sa.Column("type", sa.String(), nullable=True), + sa.Column("config", sa.JSON(), nullable=True), + sa.Column("llm_config", sa.JSON(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_third_party_integration_created_by"), + "third_party_integration", + ["created_by"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_third_party_integration_project_id"), + "third_party_integration", + ["project_id"], + unique=False, + schema="cognition", + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index( + op.f("ix_cognition_third_party_integration_project_id"), + table_name="third_party_integration", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_third_party_integration_created_by"), + table_name="third_party_integration", + schema="cognition", + ) + op.drop_table("third_party_integration", schema="cognition") + op.drop_index( + op.f("ix_cognition_integration_organization_access_organization_id"), + table_name="integration_organization_access", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_integration_organization_access_created_by"), + table_name="integration_organization_access", + schema="cognition", + ) + op.drop_table("integration_organization_access", schema="cognition") + # ### end Alembic commands ### From f7efa5d834b5573537eff3eb64bf9819b8d27e55 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 15 May 2025 15:26:11 +0200 Subject: [PATCH 02/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 9d0ecff3..4d9970a0 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 9d0ecff36599cf1bc79da80c6db788ba36208171 +Subproject commit 4d9970a043be39672eacf72dc54b93e0b0e5fe0e From e425c8433e73b35a6cb5c57265c5b24a29e96b84 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 08:57:13 +0200 Subject: [PATCH 03/67] perf: rename integration providers --- ...c00d99b37aa_adds_integration_providers.py} | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) rename alembic/versions/{9d146e05e1ee_adds_integration_providers.py => 3c00d99b37aa_adds_integration_providers.py} (69%) diff --git a/alembic/versions/9d146e05e1ee_adds_integration_providers.py b/alembic/versions/3c00d99b37aa_adds_integration_providers.py similarity index 69% rename from alembic/versions/9d146e05e1ee_adds_integration_providers.py rename to alembic/versions/3c00d99b37aa_adds_integration_providers.py index 5acb2e71..ed1552a4 100644 --- a/alembic/versions/9d146e05e1ee_adds_integration_providers.py +++ b/alembic/versions/3c00d99b37aa_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 9d146e05e1ee +Revision ID: 3c00d99b37aa Revises: eb96f9b82cc1 -Create Date: 2025-05-15 13:25:34.353936 +Create Date: 2025-05-16 06:56:43.813256 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "9d146e05e1ee" +revision = "3c00d99b37aa" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -20,7 +20,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( - "integration_organization_access", + "integration_access", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), @@ -34,21 +34,21 @@ def upgrade(): schema="cognition", ) op.create_index( - op.f("ix_cognition_integration_organization_access_created_by"), - "integration_organization_access", + op.f("ix_cognition_integration_access_created_by"), + "integration_access", ["created_by"], unique=False, schema="cognition", ) op.create_index( - op.f("ix_cognition_integration_organization_access_organization_id"), - "integration_organization_access", + op.f("ix_cognition_integration_access_organization_id"), + "integration_access", ["organization_id"], unique=False, schema="cognition", ) op.create_table( - "third_party_integration", + "integration", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), @@ -66,15 +66,15 @@ def upgrade(): schema="cognition", ) op.create_index( - op.f("ix_cognition_third_party_integration_created_by"), - "third_party_integration", + op.f("ix_cognition_integration_created_by"), + "integration", ["created_by"], unique=False, schema="cognition", ) op.create_index( - op.f("ix_cognition_third_party_integration_project_id"), - "third_party_integration", + op.f("ix_cognition_integration_project_id"), + "integration", ["project_id"], unique=False, schema="cognition", @@ -85,25 +85,25 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_index( - op.f("ix_cognition_third_party_integration_project_id"), - table_name="third_party_integration", + op.f("ix_cognition_integration_project_id"), + table_name="integration", schema="cognition", ) op.drop_index( - op.f("ix_cognition_third_party_integration_created_by"), - table_name="third_party_integration", + op.f("ix_cognition_integration_created_by"), + table_name="integration", schema="cognition", ) - op.drop_table("third_party_integration", schema="cognition") + op.drop_table("integration", schema="cognition") op.drop_index( - op.f("ix_cognition_integration_organization_access_organization_id"), - table_name="integration_organization_access", + op.f("ix_cognition_integration_access_organization_id"), + table_name="integration_access", schema="cognition", ) op.drop_index( - op.f("ix_cognition_integration_organization_access_created_by"), - table_name="integration_organization_access", + op.f("ix_cognition_integration_access_created_by"), + table_name="integration_access", schema="cognition", ) - op.drop_table("integration_organization_access", schema="cognition") + op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### From 2d54ed6e5185b05dd47d15c3cb6d0b38822551de Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 08:57:20 +0200 Subject: [PATCH 04/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 4d9970a0..80712fce 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 4d9970a043be39672eacf72dc54b93e0b0e5fe0e +Subproject commit 80712fce5257513a3139584a1dec555ff0bb15ba From b246d1de43bade416439f47aaf7d6f83dcccda14 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 11:50:43 +0200 Subject: [PATCH 05/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 80712fce..fbd5a856 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 80712fce5257513a3139584a1dec555ff0bb15ba +Subproject commit fbd5a856dd9f13746f9f4c4a72894a40cffb8983 From 3d36c652957725af6b194902c40213fbee04b2b5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 11:50:56 +0200 Subject: [PATCH 06/67] perf: update integration last_extraction --- ...iders.py => 37daf20132b9_adds_integration_providers.py} | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) rename alembic/versions/{3c00d99b37aa_adds_integration_providers.py => 37daf20132b9_adds_integration_providers.py} (95%) diff --git a/alembic/versions/3c00d99b37aa_adds_integration_providers.py b/alembic/versions/37daf20132b9_adds_integration_providers.py similarity index 95% rename from alembic/versions/3c00d99b37aa_adds_integration_providers.py rename to alembic/versions/37daf20132b9_adds_integration_providers.py index ed1552a4..017ad428 100644 --- a/alembic/versions/3c00d99b37aa_adds_integration_providers.py +++ b/alembic/versions/37daf20132b9_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 3c00d99b37aa +Revision ID: 37daf20132b9 Revises: eb96f9b82cc1 -Create Date: 2025-05-16 06:56:43.813256 +Create Date: 2025-05-16 09:50:22.035523 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "3c00d99b37aa" +revision = "37daf20132b9" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -60,6 +60,7 @@ def upgrade(): sa.Column("config", sa.JSON(), nullable=True), sa.Column("llm_config", sa.JSON(), nullable=True), sa.Column("error_message", sa.String(), nullable=True), + sa.Column("last_extraction", sa.JSON(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("id"), From 4b0ab79bc2051a58eab40b1cc7678ec790e6fe5d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:24:38 +0200 Subject: [PATCH 07/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index fbd5a856..458e4dcf 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit fbd5a856dd9f13746f9f4c4a72894a40cffb8983 +Subproject commit 458e4dcf2255079d56259376339ce7e2e75e57f1 From 5620cd06adfb25f31554a7f70a6d512bbf934891 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:25:09 +0200 Subject: [PATCH 08/67] perf(alembic): use list integration access types --- ...viders.py => 339fbaedd8c9_adds_integration_providers.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename alembic/versions/{37daf20132b9_adds_integration_providers.py => 339fbaedd8c9_adds_integration_providers.py} (97%) diff --git a/alembic/versions/37daf20132b9_adds_integration_providers.py b/alembic/versions/339fbaedd8c9_adds_integration_providers.py similarity index 97% rename from alembic/versions/37daf20132b9_adds_integration_providers.py rename to alembic/versions/339fbaedd8c9_adds_integration_providers.py index 017ad428..d0063c37 100644 --- a/alembic/versions/37daf20132b9_adds_integration_providers.py +++ b/alembic/versions/339fbaedd8c9_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 37daf20132b9 +Revision ID: 339fbaedd8c9 Revises: eb96f9b82cc1 -Create Date: 2025-05-16 09:50:22.035523 +Create Date: 2025-05-16 13:24:46.782353 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "37daf20132b9" +revision = "339fbaedd8c9" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None From fcce2c704aa903c190cd95c451b0113321a51246 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:56:48 +0200 Subject: [PATCH 09/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 458e4dcf..bbf643e5 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 458e4dcf2255079d56259376339ce7e2e75e57f1 +Subproject commit bbf643e53a210daf4aff6abead0ea8b5af362839 From bb5746c6be17bc1c3b2d70e31cb1e1a3a30afdb9 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 15:57:01 +0200 Subject: [PATCH 10/67] perf: add integration providers --- ...ders.py => 693e19c50093_adds_integration_providers.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename alembic/versions/{339fbaedd8c9_adds_integration_providers.py => 693e19c50093_adds_integration_providers.py} (95%) diff --git a/alembic/versions/339fbaedd8c9_adds_integration_providers.py b/alembic/versions/693e19c50093_adds_integration_providers.py similarity index 95% rename from alembic/versions/339fbaedd8c9_adds_integration_providers.py rename to alembic/versions/693e19c50093_adds_integration_providers.py index d0063c37..2903bf88 100644 --- a/alembic/versions/339fbaedd8c9_adds_integration_providers.py +++ b/alembic/versions/693e19c50093_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 339fbaedd8c9 +Revision ID: 693e19c50093 Revises: eb96f9b82cc1 -Create Date: 2025-05-16 13:24:46.782353 +Create Date: 2025-05-16 13:56:28.125607 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "339fbaedd8c9" +revision = "693e19c50093" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -25,7 +25,7 @@ def upgrade(): sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("integration_type", sa.String(), nullable=True), + sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["organization_id"], ["organization.id"], ondelete="CASCADE" From ae22650c954cf5a50b182f976062bd17ac23d09c Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:06:27 +0200 Subject: [PATCH 11/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index bbf643e5..cc57cb4d 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit bbf643e53a210daf4aff6abead0ea8b5af362839 +Subproject commit cc57cb4d021809015d71dd4d31e23cd0869ab9dd From d78ab1aa0b8110d2ad3ae5482b734ff22002afb0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 16:06:41 +0200 Subject: [PATCH 12/67] perf(alembic): adds integration providers --- ...ders.py => d40171369e79_adds_integration_providers.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename alembic/versions/{693e19c50093_adds_integration_providers.py => d40171369e79_adds_integration_providers.py} (95%) diff --git a/alembic/versions/693e19c50093_adds_integration_providers.py b/alembic/versions/d40171369e79_adds_integration_providers.py similarity index 95% rename from alembic/versions/693e19c50093_adds_integration_providers.py rename to alembic/versions/d40171369e79_adds_integration_providers.py index 2903bf88..c4bb2902 100644 --- a/alembic/versions/693e19c50093_adds_integration_providers.py +++ b/alembic/versions/d40171369e79_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 693e19c50093 +Revision ID: d40171369e79 Revises: eb96f9b82cc1 -Create Date: 2025-05-16 13:56:28.125607 +Create Date: 2025-05-16 14:06:04.794313 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "693e19c50093" +revision = "d40171369e79" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -60,7 +60,7 @@ def upgrade(): sa.Column("config", sa.JSON(), nullable=True), sa.Column("llm_config", sa.JSON(), nullable=True), sa.Column("error_message", sa.String(), nullable=True), - sa.Column("last_extraction", sa.JSON(), nullable=True), + sa.Column("extract_history", sa.JSON(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("id"), From a7764382fd73fd0a873113e0c6a9403b2045b9fa Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:12:56 +0200 Subject: [PATCH 13/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index cc57cb4d..03842fda 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit cc57cb4d021809015d71dd4d31e23cd0869ab9dd +Subproject commit 03842fda4fe9188fc22cb2fa0bd18405e5b55281 From b3eae84ec20808ae1770abb47b1257c034509908 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:13:08 +0200 Subject: [PATCH 14/67] perf: update integration providers --- ...viders.py => 29cbd76cbd09_adds_integration_providers.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename alembic/versions/{d40171369e79_adds_integration_providers.py => 29cbd76cbd09_adds_integration_providers.py} (97%) diff --git a/alembic/versions/d40171369e79_adds_integration_providers.py b/alembic/versions/29cbd76cbd09_adds_integration_providers.py similarity index 97% rename from alembic/versions/d40171369e79_adds_integration_providers.py rename to alembic/versions/29cbd76cbd09_adds_integration_providers.py index c4bb2902..56c6960b 100644 --- a/alembic/versions/d40171369e79_adds_integration_providers.py +++ b/alembic/versions/29cbd76cbd09_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: d40171369e79 +Revision ID: 29cbd76cbd09 Revises: eb96f9b82cc1 -Create Date: 2025-05-16 14:06:04.794313 +Create Date: 2025-05-16 14:57:44.107504 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "d40171369e79" +revision = "29cbd76cbd09" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None From 1b0bd868e33e53f6c0192a6ec914e80e6a041778 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:13:19 +0200 Subject: [PATCH 15/67] perf: task manipulation --- controller/monitor/manager.py | 12 ++++++++++++ fast_api/routes/misc.py | 2 ++ 2 files changed, 14 insertions(+) diff --git a/controller/monitor/manager.py b/controller/monitor/manager.py index 289f65ce..036541b9 100644 --- a/controller/monitor/manager.py +++ b/controller/monitor/manager.py @@ -115,3 +115,15 @@ def cancel_parse_cognition_file_task( transformation_key, with_commit=True, ) + + +def cancel_integration_task( + task_info: Dict[str, Any], +) -> None: + + integration_id = task_info.get("integrationId") + + task_monitor.set_integration_task_to_failed( + integration_id, + with_commit=True, + ) diff --git a/fast_api/routes/misc.py b/fast_api/routes/misc.py index d8f5ed09..30033783 100644 --- a/fast_api/routes/misc.py +++ b/fast_api/routes/misc.py @@ -129,6 +129,8 @@ def cancel_task( controller_manager.cancel_parse_cognition_file_task( task_entity.organization_id, task_info ) + elif task_type == enums.TaskType.INTEGRATION.value: + controller_manager.cancel_integration_task(task_info) else: raise ValueError(f"{task_type} is no valid task type") From 2728ba407f6450512a847075be17571d720aeea6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 16 May 2025 22:14:13 +0200 Subject: [PATCH 16/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 03842fda..0386f6cb 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 03842fda4fe9188fc22cb2fa0bd18405e5b55281 +Subproject commit 0386f6cb777eea730581a76e4382d510097a6b16 From c7a2b7ca195239322763713ec3ca9e8e8aa0bd14 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 11:33:28 +0200 Subject: [PATCH 17/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 0386f6cb..6fd36565 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 0386f6cb777eea730581a76e4382d510097a6b16 +Subproject commit 6fd36565c03d8133b0a7d308656214966a3b0fd5 From 5171baae64e6d4889d6363e453970da8e7c1840f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 20 May 2025 11:33:40 +0200 Subject: [PATCH 18/67] perf(alembic): integration provider --- ...9933f2b894d_adds_integration_providers.py} | 31 +++---------------- 1 file changed, 4 insertions(+), 27 deletions(-) rename alembic/versions/{29cbd76cbd09_adds_integration_providers.py => 29933f2b894d_adds_integration_providers.py} (80%) diff --git a/alembic/versions/29cbd76cbd09_adds_integration_providers.py b/alembic/versions/29933f2b894d_adds_integration_providers.py similarity index 80% rename from alembic/versions/29cbd76cbd09_adds_integration_providers.py rename to alembic/versions/29933f2b894d_adds_integration_providers.py index 56c6960b..f2a42458 100644 --- a/alembic/versions/29cbd76cbd09_adds_integration_providers.py +++ b/alembic/versions/29933f2b894d_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 29cbd76cbd09 +Revision ID: 29933f2b894d Revises: eb96f9b82cc1 -Create Date: 2025-05-16 14:57:44.107504 +Create Date: 2025-05-20 09:30:01.172135 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "29cbd76cbd09" +revision = "29933f2b894d" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -33,13 +33,6 @@ def upgrade(): sa.PrimaryKeyConstraint("id"), schema="cognition", ) - op.create_index( - op.f("ix_cognition_integration_access_created_by"), - "integration_access", - ["created_by"], - unique=False, - schema="cognition", - ) op.create_index( op.f("ix_cognition_integration_access_organization_id"), "integration_access", @@ -55,6 +48,7 @@ def upgrade(): sa.Column("created_at", sa.DateTime(), nullable=True), sa.Column("name", sa.String(), nullable=True), sa.Column("description", sa.String(), nullable=True), + sa.Column("tokenizer", sa.String(), nullable=True), sa.Column("state", sa.String(), nullable=True), sa.Column("type", sa.String(), nullable=True), sa.Column("config", sa.JSON(), nullable=True), @@ -66,13 +60,6 @@ def upgrade(): sa.PrimaryKeyConstraint("id"), schema="cognition", ) - op.create_index( - op.f("ix_cognition_integration_created_by"), - "integration", - ["created_by"], - unique=False, - schema="cognition", - ) op.create_index( op.f("ix_cognition_integration_project_id"), "integration", @@ -90,21 +77,11 @@ def downgrade(): table_name="integration", schema="cognition", ) - op.drop_index( - op.f("ix_cognition_integration_created_by"), - table_name="integration", - schema="cognition", - ) op.drop_table("integration", schema="cognition") op.drop_index( op.f("ix_cognition_integration_access_organization_id"), table_name="integration_access", schema="cognition", ) - op.drop_index( - op.f("ix_cognition_integration_access_created_by"), - table_name="integration_access", - schema="cognition", - ) op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### From e781ccbf1eca57b343dd15e223519ff4d840ad66 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 10:15:21 +0200 Subject: [PATCH 19/67] perf: add org_id to integration provider --- ...44a020884f9_adds_integration_providers.py} | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) rename alembic/versions/{29933f2b894d_adds_integration_providers.py => 544a020884f9_adds_integration_providers.py} (83%) diff --git a/alembic/versions/29933f2b894d_adds_integration_providers.py b/alembic/versions/544a020884f9_adds_integration_providers.py similarity index 83% rename from alembic/versions/29933f2b894d_adds_integration_providers.py rename to alembic/versions/544a020884f9_adds_integration_providers.py index f2a42458..4ad245a8 100644 --- a/alembic/versions/29933f2b894d_adds_integration_providers.py +++ b/alembic/versions/544a020884f9_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 29933f2b894d +Revision ID: 544a020884f9 Revises: eb96f9b82cc1 -Create Date: 2025-05-20 09:30:01.172135 +Create Date: 2025-05-26 07:53:40.708814 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "29933f2b894d" +revision = "544a020884f9" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -43,6 +43,7 @@ def upgrade(): op.create_table( "integration", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), @@ -56,10 +57,20 @@ def upgrade(): sa.Column("error_message", sa.String(), nullable=True), sa.Column("extract_history", sa.JSON(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), sa.PrimaryKeyConstraint("id"), schema="cognition", ) + op.create_index( + op.f("ix_cognition_integration_organization_id"), + "integration", + ["organization_id"], + unique=False, + schema="cognition", + ) op.create_index( op.f("ix_cognition_integration_project_id"), "integration", @@ -77,6 +88,11 @@ def downgrade(): table_name="integration", schema="cognition", ) + op.drop_index( + op.f("ix_cognition_integration_organization_id"), + table_name="integration", + schema="cognition", + ) op.drop_table("integration", schema="cognition") op.drop_index( op.f("ix_cognition_integration_access_organization_id"), From 3893760add36912dbef660c9a52db7967bbafc71 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 10:15:31 +0200 Subject: [PATCH 20/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 6fd36565..4cd321f2 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 6fd36565c03d8133b0a7d308656214966a3b0fd5 +Subproject commit 4cd321f21b83c8fa10728fd0b296f6b65a484059 From 0f8a518eeaaf823229e2f2baf69fff5e763364f8 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 10:16:10 +0200 Subject: [PATCH 21/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 4cd321f2..0b6a9fa8 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 4cd321f21b83c8fa10728fd0b296f6b65a484059 +Subproject commit 0b6a9fa806641afc28c4c4fe229960dea4320391 From 02e8655f896cfd7316610be1e8d3db8e3477aff1 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:13:11 +0200 Subject: [PATCH 22/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 0b6a9fa8..99494f8b 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 0b6a9fa806641afc28c4c4fe229960dea4320391 +Subproject commit 99494f8bbf8dff860e821f29c4f302fb1593b421 From 5632966ce6af5a62b765c89d3d29f57a3261d5be Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:13:27 +0200 Subject: [PATCH 23/67] perf(alembic): recreate integration providers --- ... a9d19af1cc5d_adds_integration_providers.py} | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) rename alembic/versions/{544a020884f9_adds_integration_providers.py => a9d19af1cc5d_adds_integration_providers.py} (87%) diff --git a/alembic/versions/544a020884f9_adds_integration_providers.py b/alembic/versions/a9d19af1cc5d_adds_integration_providers.py similarity index 87% rename from alembic/versions/544a020884f9_adds_integration_providers.py rename to alembic/versions/a9d19af1cc5d_adds_integration_providers.py index 4ad245a8..4bb747a4 100644 --- a/alembic/versions/544a020884f9_adds_integration_providers.py +++ b/alembic/versions/a9d19af1cc5d_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 544a020884f9 +Revision ID: a9d19af1cc5d Revises: eb96f9b82cc1 -Create Date: 2025-05-26 07:53:40.708814 +Create Date: 2025-05-26 15:11:26.608774 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "544a020884f9" +revision = "a9d19af1cc5d" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -47,6 +47,7 @@ def upgrade(): sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("finished_at", sa.DateTime(), nullable=True), sa.Column("name", sa.String(), nullable=True), sa.Column("description", sa.String(), nullable=True), sa.Column("tokenizer", sa.String(), nullable=True), @@ -78,11 +79,21 @@ def upgrade(): unique=False, schema="cognition", ) + op.add_column( + "embedding", + sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), + ) + op.add_column( + "embedding", + sa.Column("current_delta_record_count", sa.Integer(), nullable=True), + ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("embedding", "current_delta_record_count") + op.drop_column("embedding", "delta_full_recalculation_threshold") op.drop_index( op.f("ix_cognition_integration_project_id"), table_name="integration", From d5c72e91e77eaeb60d4ba6d49c5556552cc4ad5e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:18:48 +0200 Subject: [PATCH 24/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 99494f8b..59898010 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 99494f8bbf8dff860e821f29c4f302fb1593b421 +Subproject commit 5989801037d1f8778c1136965f461ea2693b13cc From 5bfeb6bda91503ba03be067ff5880095c2f636ce Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 26 May 2025 17:18:59 +0200 Subject: [PATCH 25/67] perf(alembic): add started_at --- ...iders.py => cee210baa0ea_adds_integration_providers.py} | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) rename alembic/versions/{a9d19af1cc5d_adds_integration_providers.py => cee210baa0ea_adds_integration_providers.py} (96%) diff --git a/alembic/versions/a9d19af1cc5d_adds_integration_providers.py b/alembic/versions/cee210baa0ea_adds_integration_providers.py similarity index 96% rename from alembic/versions/a9d19af1cc5d_adds_integration_providers.py rename to alembic/versions/cee210baa0ea_adds_integration_providers.py index 4bb747a4..92beb0a0 100644 --- a/alembic/versions/a9d19af1cc5d_adds_integration_providers.py +++ b/alembic/versions/cee210baa0ea_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: a9d19af1cc5d +Revision ID: cee210baa0ea Revises: eb96f9b82cc1 -Create Date: 2025-05-26 15:11:26.608774 +Create Date: 2025-05-26 15:18:32.868727 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "a9d19af1cc5d" +revision = "cee210baa0ea" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -47,6 +47,7 @@ def upgrade(): sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("started_at", sa.DateTime(), nullable=True), sa.Column("finished_at", sa.DateTime(), nullable=True), sa.Column("name", sa.String(), nullable=True), sa.Column("description", sa.String(), nullable=True), From d1494c607f6236569a4255d5959ab25433ddeb14 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 10:12:16 +0200 Subject: [PATCH 26/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 59898010..8898a3cb 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 5989801037d1f8778c1136965f461ea2693b13cc +Subproject commit 8898a3cb6650f2087d7cb368e5615e6b12745ca6 From 550332fd3ee4a06d3c4c381d44413a5f1f159c5b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 10:12:28 +0200 Subject: [PATCH 27/67] perf(alembic): add integration records --- ...9f8e5aec59ea_adds_integration_providers.py | 294 ++++++++++++++++++ ...cee210baa0ea_adds_integration_providers.py | 115 ------- 2 files changed, 294 insertions(+), 115 deletions(-) create mode 100644 alembic/versions/9f8e5aec59ea_adds_integration_providers.py delete mode 100644 alembic/versions/cee210baa0ea_adds_integration_providers.py diff --git a/alembic/versions/9f8e5aec59ea_adds_integration_providers.py b/alembic/versions/9f8e5aec59ea_adds_integration_providers.py new file mode 100644 index 00000000..7bbce09f --- /dev/null +++ b/alembic/versions/9f8e5aec59ea_adds_integration_providers.py @@ -0,0 +1,294 @@ +"""adds integration providers + +Revision ID: 9f8e5aec59ea +Revises: eb96f9b82cc1 +Create Date: 2025-05-27 07:52:57.549364 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "9f8e5aec59ea" +down_revision = "eb96f9b82cc1" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("create schema integration;") + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "integration_access", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_access_organization_id"), + "integration_access", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "integration", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("started_at", sa.DateTime(), nullable=True), + sa.Column("finished_at", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("tokenizer", sa.String(), nullable=True), + sa.Column("state", sa.String(), nullable=True), + sa.Column("type", sa.String(), nullable=True), + sa.Column("config", sa.JSON(), nullable=True), + sa.Column("llm_config", sa.JSON(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_organization_id"), + "integration", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_project_id"), + "integration", + ["project_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "github_file", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("updated_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=True), + sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("running_id", sa.Integer(), nullable=True), + sa.Column("source", sa.String(), nullable=True), + sa.Column("path", sa.String(), nullable=True), + sa.Column("sha", sa.String(), nullable=True), + sa.Column("delta_criteria", sa.JSON(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_file_integration_id"), + "github_file", + ["integration_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_file_running_id"), + "github_file", + ["running_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_file_source"), + "github_file", + ["source"], + unique=False, + schema="integration", + ) + op.create_table( + "github_issue", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("updated_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=True), + sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("running_id", sa.Integer(), nullable=True), + sa.Column("source", sa.String(), nullable=True), + sa.Column("url", sa.String(), nullable=True), + sa.Column("state", sa.String(), nullable=True), + sa.Column("assignee", sa.String(), nullable=True), + sa.Column("milestone", sa.String(), nullable=True), + sa.Column("number", sa.Integer(), nullable=True), + sa.Column("delta_criteria", sa.JSON(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_issue_integration_id"), + "github_issue", + ["integration_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_issue_running_id"), + "github_issue", + ["running_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_github_issue_source"), + "github_issue", + ["source"], + unique=False, + schema="integration", + ) + op.create_table( + "pdf", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("updated_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=True), + sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("running_id", sa.Integer(), nullable=True), + sa.Column("source", sa.String(), nullable=True), + sa.Column("file_path", sa.String(), nullable=True), + sa.Column("page", sa.Integer(), nullable=True), + sa.Column("total_pages", sa.Integer(), nullable=True), + sa.Column("title", sa.String(), nullable=True), + sa.Column("delta_criteria", sa.JSON(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + schema="integration", + ) + op.create_index( + op.f("ix_integration_pdf_integration_id"), + "pdf", + ["integration_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_pdf_running_id"), + "pdf", + ["running_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_pdf_source"), + "pdf", + ["source"], + unique=False, + schema="integration", + ) + op.add_column( + "embedding", + sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), + ) + op.add_column( + "embedding", + sa.Column("current_delta_record_count", sa.Integer(), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("embedding", "current_delta_record_count") + op.drop_column("embedding", "delta_full_recalculation_threshold") + op.drop_index( + op.f("ix_integration_pdf_source"), table_name="pdf", schema="integration" + ) + op.drop_index( + op.f("ix_integration_pdf_running_id"), table_name="pdf", schema="integration" + ) + op.drop_index( + op.f("ix_integration_pdf_integration_id"), + table_name="pdf", + schema="integration", + ) + op.drop_table("pdf", schema="integration") + op.drop_index( + op.f("ix_integration_github_issue_source"), + table_name="github_issue", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_github_issue_running_id"), + table_name="github_issue", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_github_issue_integration_id"), + table_name="github_issue", + schema="integration", + ) + op.drop_table("github_issue", schema="integration") + op.drop_index( + op.f("ix_integration_github_file_source"), + table_name="github_file", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_github_file_running_id"), + table_name="github_file", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_github_file_integration_id"), + table_name="github_file", + schema="integration", + ) + op.drop_table("github_file", schema="integration") + op.drop_index( + op.f("ix_cognition_integration_project_id"), + table_name="integration", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_integration_organization_id"), + table_name="integration", + schema="cognition", + ) + op.drop_table("integration", schema="cognition") + op.drop_index( + op.f("ix_cognition_integration_access_organization_id"), + table_name="integration_access", + schema="cognition", + ) + op.drop_table("integration_access", schema="cognition") + # ### end Alembic commands ### + op.execute("drop schema integration;") diff --git a/alembic/versions/cee210baa0ea_adds_integration_providers.py b/alembic/versions/cee210baa0ea_adds_integration_providers.py deleted file mode 100644 index 92beb0a0..00000000 --- a/alembic/versions/cee210baa0ea_adds_integration_providers.py +++ /dev/null @@ -1,115 +0,0 @@ -"""adds integration providers - -Revision ID: cee210baa0ea -Revises: eb96f9b82cc1 -Create Date: 2025-05-26 15:18:32.868727 - -""" - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "cee210baa0ea" -down_revision = "eb96f9b82cc1" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "integration_access", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_access_organization_id"), - "integration_access", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_table( - "integration", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("started_at", sa.DateTime(), nullable=True), - sa.Column("finished_at", sa.DateTime(), nullable=True), - sa.Column("name", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("tokenizer", sa.String(), nullable=True), - sa.Column("state", sa.String(), nullable=True), - sa.Column("type", sa.String(), nullable=True), - sa.Column("config", sa.JSON(), nullable=True), - sa.Column("llm_config", sa.JSON(), nullable=True), - sa.Column("error_message", sa.String(), nullable=True), - sa.Column("extract_history", sa.JSON(), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_organization_id"), - "integration", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_project_id"), - "integration", - ["project_id"], - unique=False, - schema="cognition", - ) - op.add_column( - "embedding", - sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), - ) - op.add_column( - "embedding", - sa.Column("current_delta_record_count", sa.Integer(), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("embedding", "current_delta_record_count") - op.drop_column("embedding", "delta_full_recalculation_threshold") - op.drop_index( - op.f("ix_cognition_integration_project_id"), - table_name="integration", - schema="cognition", - ) - op.drop_index( - op.f("ix_cognition_integration_organization_id"), - table_name="integration", - schema="cognition", - ) - op.drop_table("integration", schema="cognition") - op.drop_index( - op.f("ix_cognition_integration_access_organization_id"), - table_name="integration_access", - schema="cognition", - ) - op.drop_table("integration_access", schema="cognition") - # ### end Alembic commands ### From 308df12feac6ec66bf3aa736d1e96ca937370392 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 10:13:38 +0200 Subject: [PATCH 28/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 8898a3cb..4aeea837 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 8898a3cb6650f2087d7cb368e5615e6b12745ca6 +Subproject commit 4aeea83717923713bd9bc36da6ed4b7619a72013 From b4e8cd2c41348c27ce69a8a0a377988410267306 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 14:02:10 +0200 Subject: [PATCH 29/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 4aeea837..2ec00dba 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 4aeea83717923713bd9bc36da6ed4b7619a72013 +Subproject commit 2ec00dba5c537267a748778f8744ef8caaa1022b From ecd7a8b90c033839fa43e5d6f45580ad284baa3d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 27 May 2025 14:02:32 +0200 Subject: [PATCH 30/67] perf: update integration providers --- ...ers.py => 55c1cc06c0a8_adds_integration_providers.py} | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) rename alembic/versions/{9f8e5aec59ea_adds_integration_providers.py => 55c1cc06c0a8_adds_integration_providers.py} (98%) diff --git a/alembic/versions/9f8e5aec59ea_adds_integration_providers.py b/alembic/versions/55c1cc06c0a8_adds_integration_providers.py similarity index 98% rename from alembic/versions/9f8e5aec59ea_adds_integration_providers.py rename to alembic/versions/55c1cc06c0a8_adds_integration_providers.py index 7bbce09f..5654f131 100644 --- a/alembic/versions/9f8e5aec59ea_adds_integration_providers.py +++ b/alembic/versions/55c1cc06c0a8_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 9f8e5aec59ea +Revision ID: 55c1cc06c0a8 Revises: eb96f9b82cc1 -Create Date: 2025-05-27 07:52:57.549364 +Create Date: 2025-05-27 12:01:43.286456 """ @@ -11,14 +11,14 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "9f8e5aec59ea" +revision = "55c1cc06c0a8" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None def upgrade(): - op.execute("create schema integration;") + op.execute("create schema integration") # ### commands auto generated by Alembic - please adjust! ### op.create_table( "integration_access", @@ -92,6 +92,7 @@ def upgrade(): sa.Column("source", sa.String(), nullable=True), sa.Column("path", sa.String(), nullable=True), sa.Column("sha", sa.String(), nullable=True), + sa.Column("code_language", sa.String(), nullable=True), sa.Column("delta_criteria", sa.JSON(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), From 6406398292928ac4de6a214c99f24bfcc36c2477 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 28 May 2025 00:27:58 +0200 Subject: [PATCH 31/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 2ec00dba..614d7067 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 2ec00dba5c537267a748778f8744ef8caaa1022b +Subproject commit 614d70677cd8d771220ee679b3d2f6ec8e77d12d From c904383f830c5f103d923fdac3a8248f32dc155a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 28 May 2025 00:28:10 +0200 Subject: [PATCH 32/67] perf(alembic): sharepoint integration --- ...aa766944229_adds_integration_providers.py} | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) rename alembic/versions/{55c1cc06c0a8_adds_integration_providers.py => 5aa766944229_adds_integration_providers.py} (83%) diff --git a/alembic/versions/55c1cc06c0a8_adds_integration_providers.py b/alembic/versions/5aa766944229_adds_integration_providers.py similarity index 83% rename from alembic/versions/55c1cc06c0a8_adds_integration_providers.py rename to alembic/versions/5aa766944229_adds_integration_providers.py index 5654f131..6643f52f 100644 --- a/alembic/versions/55c1cc06c0a8_adds_integration_providers.py +++ b/alembic/versions/5aa766944229_adds_integration_providers.py @@ -1,8 +1,8 @@ """adds integration providers -Revision ID: 55c1cc06c0a8 +Revision ID: 5aa766944229 Revises: eb96f9b82cc1 -Create Date: 2025-05-27 12:01:43.286456 +Create Date: 2025-05-27 22:27:23.198297 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "55c1cc06c0a8" +revision = "5aa766944229" down_revision = "eb96f9b82cc1" branch_labels = None depends_on = None @@ -215,6 +215,47 @@ def upgrade(): unique=False, schema="integration", ) + op.create_table( + "sharepoint", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("updated_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=True), + sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("running_id", sa.Integer(), nullable=True), + sa.Column("source", sa.String(), nullable=True), + sa.Column("delta_criteria", sa.JSON(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + schema="integration", + ) + op.create_index( + op.f("ix_integration_sharepoint_integration_id"), + "sharepoint", + ["integration_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_sharepoint_running_id"), + "sharepoint", + ["running_id"], + unique=False, + schema="integration", + ) + op.create_index( + op.f("ix_integration_sharepoint_source"), + "sharepoint", + ["source"], + unique=False, + schema="integration", + ) op.add_column( "embedding", sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), @@ -230,6 +271,22 @@ def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_column("embedding", "current_delta_record_count") op.drop_column("embedding", "delta_full_recalculation_threshold") + op.drop_index( + op.f("ix_integration_sharepoint_source"), + table_name="sharepoint", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_sharepoint_running_id"), + table_name="sharepoint", + schema="integration", + ) + op.drop_index( + op.f("ix_integration_sharepoint_integration_id"), + table_name="sharepoint", + schema="integration", + ) + op.drop_table("sharepoint", schema="integration") op.drop_index( op.f("ix_integration_pdf_source"), table_name="pdf", schema="integration" ) @@ -292,4 +349,4 @@ def downgrade(): ) op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### - op.execute("drop schema integration;") + op.execute("drop schema integration") From 9e5a363991df2863871d7ed74a5300baf435866b Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 16:45:42 +0200 Subject: [PATCH 33/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 614d7067..359187eb 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 614d70677cd8d771220ee679b3d2f6ec8e77d12d +Subproject commit 359187eb28d57bee2ffd37cc59cd4dcaaf16a767 From 775d78826f267b796a135cbe773c0f1324d453d2 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 16:45:54 +0200 Subject: [PATCH 34/67] perf(alembic): add integrations --- ...f8e00_adds_cognition_integration_tables.py | 114 ++++++++++++++++++ ...s.py => 9507990cb116_adds_integrations.py} | 113 +++-------------- 2 files changed, 134 insertions(+), 93 deletions(-) create mode 100644 alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py rename alembic/versions/{5aa766944229_adds_integration_providers.py => 9507990cb116_adds_integrations.py} (72%) diff --git a/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py b/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py new file mode 100644 index 00000000..fb33cdc2 --- /dev/null +++ b/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py @@ -0,0 +1,114 @@ +"""adds cognition integration tables + +Revision ID: 74f6e03f8e00 +Revises: eb96f9b82cc1 +Create Date: 2025-05-29 14:42:57.186302 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "74f6e03f8e00" +down_revision = "eb96f9b82cc1" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "integration_access", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_access_organization_id"), + "integration_access", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "integration", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("started_at", sa.DateTime(), nullable=True), + sa.Column("finished_at", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("tokenizer", sa.String(), nullable=True), + sa.Column("state", sa.String(), nullable=True), + sa.Column("type", sa.String(), nullable=True), + sa.Column("config", sa.JSON(), nullable=True), + sa.Column("llm_config", sa.JSON(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_organization_id"), + "integration", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_project_id"), + "integration", + ["project_id"], + unique=False, + schema="cognition", + ) + op.add_column( + "embedding", + sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), + ) + op.add_column( + "embedding", + sa.Column("current_delta_record_count", sa.Integer(), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("embedding", "current_delta_record_count") + op.drop_column("embedding", "delta_full_recalculation_threshold") + op.drop_index( + op.f("ix_cognition_integration_project_id"), + table_name="integration", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_integration_organization_id"), + table_name="integration", + schema="cognition", + ) + op.drop_table("integration", schema="cognition") + op.drop_index( + op.f("ix_cognition_integration_access_organization_id"), + table_name="integration_access", + schema="cognition", + ) + op.drop_table("integration_access", schema="cognition") + # ### end Alembic commands ### diff --git a/alembic/versions/5aa766944229_adds_integration_providers.py b/alembic/versions/9507990cb116_adds_integrations.py similarity index 72% rename from alembic/versions/5aa766944229_adds_integration_providers.py rename to alembic/versions/9507990cb116_adds_integrations.py index 6643f52f..8ae59d45 100644 --- a/alembic/versions/5aa766944229_adds_integration_providers.py +++ b/alembic/versions/9507990cb116_adds_integrations.py @@ -1,8 +1,8 @@ -"""adds integration providers +"""adds integrations -Revision ID: 5aa766944229 -Revises: eb96f9b82cc1 -Create Date: 2025-05-27 22:27:23.198297 +Revision ID: 9507990cb116 +Revises: 74f6e03f8e00 +Create Date: 2025-05-29 14:43:23.627964 """ @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "5aa766944229" -down_revision = "eb96f9b82cc1" +revision = "9507990cb116" +down_revision = "74f6e03f8e00" branch_labels = None depends_on = None @@ -20,66 +20,6 @@ def upgrade(): op.execute("create schema integration") # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "integration_access", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_access_organization_id"), - "integration_access", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_table( - "integration", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("started_at", sa.DateTime(), nullable=True), - sa.Column("finished_at", sa.DateTime(), nullable=True), - sa.Column("name", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("tokenizer", sa.String(), nullable=True), - sa.Column("state", sa.String(), nullable=True), - sa.Column("type", sa.String(), nullable=True), - sa.Column("config", sa.JSON(), nullable=True), - sa.Column("llm_config", sa.JSON(), nullable=True), - sa.Column("error_message", sa.String(), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_organization_id"), - "integration", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_project_id"), - "integration", - ["project_id"], - unique=False, - schema="cognition", - ) op.create_table( "github_file", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -225,6 +165,20 @@ def upgrade(): sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), + sa.Column("extension", sa.String(), nullable=True), + sa.Column("object_id", sa.String(), nullable=True), + sa.Column("parent_path", sa.String(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("web_url", sa.String(), nullable=True), + sa.Column("sharepoint_created_by", sa.String(), nullable=True), + sa.Column("modified_by", sa.String(), nullable=True), + sa.Column("created", sa.String(), nullable=True), + sa.Column("modified", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("size", sa.String(), nullable=True), + sa.Column("mime_type", sa.String(), nullable=True), + sa.Column("hashes", sa.JSON(), nullable=True), + sa.Column("permissions", sa.JSON(), nullable=True), sa.Column("delta_criteria", sa.JSON(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), @@ -256,21 +210,11 @@ def upgrade(): unique=False, schema="integration", ) - op.add_column( - "embedding", - sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), - ) - op.add_column( - "embedding", - sa.Column("current_delta_record_count", sa.Integer(), nullable=True), - ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("embedding", "current_delta_record_count") - op.drop_column("embedding", "delta_full_recalculation_threshold") op.drop_index( op.f("ix_integration_sharepoint_source"), table_name="sharepoint", @@ -331,22 +275,5 @@ def downgrade(): schema="integration", ) op.drop_table("github_file", schema="integration") - op.drop_index( - op.f("ix_cognition_integration_project_id"), - table_name="integration", - schema="cognition", - ) - op.drop_index( - op.f("ix_cognition_integration_organization_id"), - table_name="integration", - schema="cognition", - ) - op.drop_table("integration", schema="cognition") - op.drop_index( - op.f("ix_cognition_integration_access_organization_id"), - table_name="integration_access", - schema="cognition", - ) - op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### op.execute("drop schema integration") From c440d537be029db72cececdda4b7b4f7ee08c1ba Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 21:12:51 +0200 Subject: [PATCH 35/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 359187eb..18990bbd 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 359187eb28d57bee2ffd37cc59cd4dcaaf16a767 +Subproject commit 18990bbd7147942c558eac0ee0e448aeef646d6a From d849ef5a101fe40a9e7b786d7ca0cb83069e60de Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 29 May 2025 21:13:08 +0200 Subject: [PATCH 36/67] perf(alembic): integration tables --- ...s.py => 89aeb979559f_adds_integrations.py} | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) rename alembic/versions/{9507990cb116_adds_integrations.py => 89aeb979559f_adds_integrations.py} (92%) diff --git a/alembic/versions/9507990cb116_adds_integrations.py b/alembic/versions/89aeb979559f_adds_integrations.py similarity index 92% rename from alembic/versions/9507990cb116_adds_integrations.py rename to alembic/versions/89aeb979559f_adds_integrations.py index 8ae59d45..82cc3230 100644 --- a/alembic/versions/9507990cb116_adds_integrations.py +++ b/alembic/versions/89aeb979559f_adds_integrations.py @@ -1,8 +1,8 @@ """adds integrations -Revision ID: 9507990cb116 +Revision ID: 89aeb979559f Revises: 74f6e03f8e00 -Create Date: 2025-05-29 14:43:23.627964 +Create Date: 2025-05-29 19:11:50.782335 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "9507990cb116" +revision = "89aeb979559f" down_revision = "74f6e03f8e00" branch_labels = None depends_on = None @@ -41,6 +41,9 @@ def upgrade(): ), sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "integration_id", "running_id", "source", name="unique_github_file_source" + ), schema="integration", ) op.create_index( @@ -74,19 +77,21 @@ def upgrade(): sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), sa.Column("url", sa.String(), nullable=True), sa.Column("state", sa.String(), nullable=True), sa.Column("assignee", sa.String(), nullable=True), sa.Column("milestone", sa.String(), nullable=True), sa.Column("number", sa.Integer(), nullable=True), - sa.Column("delta_criteria", sa.JSON(), nullable=True), - sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" ), sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "integration_id", "running_id", "source", name="unique_github_issue_source" + ), schema="integration", ) op.create_index( @@ -120,18 +125,20 @@ def upgrade(): sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), sa.Column("file_path", sa.String(), nullable=True), sa.Column("page", sa.Integer(), nullable=True), sa.Column("total_pages", sa.Integer(), nullable=True), sa.Column("title", sa.String(), nullable=True), - sa.Column("delta_criteria", sa.JSON(), nullable=True), - sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" ), sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "integration_id", "running_id", "source", name="unique_pdf_source" + ), schema="integration", ) op.create_index( @@ -165,6 +172,7 @@ def upgrade(): sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), sa.Column("extension", sa.String(), nullable=True), sa.Column("object_id", sa.String(), nullable=True), sa.Column("parent_path", sa.String(), nullable=True), @@ -172,21 +180,22 @@ def upgrade(): sa.Column("web_url", sa.String(), nullable=True), sa.Column("sharepoint_created_by", sa.String(), nullable=True), sa.Column("modified_by", sa.String(), nullable=True), - sa.Column("created", sa.String(), nullable=True), - sa.Column("modified", sa.String(), nullable=True), + sa.Column("created", sa.DateTime(), nullable=True), + sa.Column("modified", sa.DateTime(), nullable=True), sa.Column("description", sa.String(), nullable=True), - sa.Column("size", sa.String(), nullable=True), + sa.Column("size", sa.Integer(), nullable=True), sa.Column("mime_type", sa.String(), nullable=True), sa.Column("hashes", sa.JSON(), nullable=True), sa.Column("permissions", sa.JSON(), nullable=True), - sa.Column("delta_criteria", sa.JSON(), nullable=True), - sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" ), sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "integration_id", "running_id", "source", name="unique_sharepoint_source" + ), schema="integration", ) op.create_index( From fd0267d488680e11e5a60b7e766a36ed0cd42d91 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 11:05:51 +0200 Subject: [PATCH 37/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 18990bbd..10932571 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 18990bbd7147942c558eac0ee0e448aeef646d6a +Subproject commit 10932571955c379b3b4f053f310775b93ef5e286 From a14138af0e495683df563db8eb251164df83d7b1 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 12:18:19 +0200 Subject: [PATCH 38/67] perf(alembic): db update --- ...f8e00_adds_cognition_integration_tables.py | 114 ------------------ ...> a6faf9b0c59c_adds_integration_tables.py} | 89 +++++++++++++- 2 files changed, 83 insertions(+), 120 deletions(-) delete mode 100644 alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py rename alembic/versions/{89aeb979559f_adds_integrations.py => a6faf9b0c59c_adds_integration_tables.py} (76%) diff --git a/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py b/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py deleted file mode 100644 index fb33cdc2..00000000 --- a/alembic/versions/74f6e03f8e00_adds_cognition_integration_tables.py +++ /dev/null @@ -1,114 +0,0 @@ -"""adds cognition integration tables - -Revision ID: 74f6e03f8e00 -Revises: eb96f9b82cc1 -Create Date: 2025-05-29 14:42:57.186302 - -""" - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "74f6e03f8e00" -down_revision = "eb96f9b82cc1" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "integration_access", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_access_organization_id"), - "integration_access", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_table( - "integration", - sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), - sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("created_at", sa.DateTime(), nullable=True), - sa.Column("started_at", sa.DateTime(), nullable=True), - sa.Column("finished_at", sa.DateTime(), nullable=True), - sa.Column("name", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("tokenizer", sa.String(), nullable=True), - sa.Column("state", sa.String(), nullable=True), - sa.Column("type", sa.String(), nullable=True), - sa.Column("config", sa.JSON(), nullable=True), - sa.Column("llm_config", sa.JSON(), nullable=True), - sa.Column("error_message", sa.String(), nullable=True), - sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint( - ["organization_id"], ["organization.id"], ondelete="CASCADE" - ), - sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), - sa.PrimaryKeyConstraint("id"), - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_organization_id"), - "integration", - ["organization_id"], - unique=False, - schema="cognition", - ) - op.create_index( - op.f("ix_cognition_integration_project_id"), - "integration", - ["project_id"], - unique=False, - schema="cognition", - ) - op.add_column( - "embedding", - sa.Column("delta_full_recalculation_threshold", sa.Float(), nullable=True), - ) - op.add_column( - "embedding", - sa.Column("current_delta_record_count", sa.Integer(), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("embedding", "current_delta_record_count") - op.drop_column("embedding", "delta_full_recalculation_threshold") - op.drop_index( - op.f("ix_cognition_integration_project_id"), - table_name="integration", - schema="cognition", - ) - op.drop_index( - op.f("ix_cognition_integration_organization_id"), - table_name="integration", - schema="cognition", - ) - op.drop_table("integration", schema="cognition") - op.drop_index( - op.f("ix_cognition_integration_access_organization_id"), - table_name="integration_access", - schema="cognition", - ) - op.drop_table("integration_access", schema="cognition") - # ### end Alembic commands ### diff --git a/alembic/versions/89aeb979559f_adds_integrations.py b/alembic/versions/a6faf9b0c59c_adds_integration_tables.py similarity index 76% rename from alembic/versions/89aeb979559f_adds_integrations.py rename to alembic/versions/a6faf9b0c59c_adds_integration_tables.py index 82cc3230..62ee8c25 100644 --- a/alembic/versions/89aeb979559f_adds_integrations.py +++ b/alembic/versions/a6faf9b0c59c_adds_integration_tables.py @@ -1,8 +1,8 @@ -"""adds integrations +"""adds integration tables -Revision ID: 89aeb979559f -Revises: 74f6e03f8e00 -Create Date: 2025-05-29 19:11:50.782335 +Revision ID: a6faf9b0c59c +Revises: 96fbb404381e +Create Date: 2025-06-03 10:17:24.723183 """ @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "89aeb979559f" -down_revision = "74f6e03f8e00" +revision = "a6faf9b0c59c" +down_revision = "96fbb404381e" branch_labels = None depends_on = None @@ -20,6 +20,66 @@ def upgrade(): op.execute("create schema integration") # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "integration_access", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("integration_types", sa.ARRAY(sa.String()), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_access_organization_id"), + "integration_access", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "integration", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("started_at", sa.DateTime(), nullable=True), + sa.Column("finished_at", sa.DateTime(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("tokenizer", sa.String(), nullable=True), + sa.Column("state", sa.String(), nullable=True), + sa.Column("type", sa.String(), nullable=True), + sa.Column("config", sa.JSON(), nullable=True), + sa.Column("llm_config", sa.JSON(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_organization_id"), + "integration", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_integration_project_id"), + "integration", + ["project_id"], + unique=False, + schema="cognition", + ) op.create_table( "github_file", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -284,5 +344,22 @@ def downgrade(): schema="integration", ) op.drop_table("github_file", schema="integration") + op.drop_index( + op.f("ix_cognition_integration_project_id"), + table_name="integration", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_integration_organization_id"), + table_name="integration", + schema="cognition", + ) + op.drop_table("integration", schema="cognition") + op.drop_index( + op.f("ix_cognition_integration_access_organization_id"), + table_name="integration_access", + schema="cognition", + ) + op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### op.execute("drop schema integration") From 99fd4380e289e5ca1e9a0a86ea80996a33acb0e2 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 14:28:52 +0200 Subject: [PATCH 39/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 6b7bc0b6..c18a6eba 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 6b7bc0b67c7c7e7f40ac887063b91284e445b468 +Subproject commit c18a6eba094c90db9586a46b9a466231d6382d56 From 00c36675fd745994f1f25dff3faefbcb98be802f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 14:29:00 +0200 Subject: [PATCH 40/67] perf(alembic): db update --- ..._tables.py => 33a31b185da1_adds_integration_tables.py} | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) rename alembic/versions/{a6faf9b0c59c_adds_integration_tables.py => 33a31b185da1_adds_integration_tables.py} (98%) diff --git a/alembic/versions/a6faf9b0c59c_adds_integration_tables.py b/alembic/versions/33a31b185da1_adds_integration_tables.py similarity index 98% rename from alembic/versions/a6faf9b0c59c_adds_integration_tables.py rename to alembic/versions/33a31b185da1_adds_integration_tables.py index 62ee8c25..8ef0ad92 100644 --- a/alembic/versions/a6faf9b0c59c_adds_integration_tables.py +++ b/alembic/versions/33a31b185da1_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: a6faf9b0c59c +Revision ID: 33a31b185da1 Revises: 96fbb404381e -Create Date: 2025-06-03 10:17:24.723183 +Create Date: 2025-06-03 12:28:26.368699 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "a6faf9b0c59c" +revision = "33a31b185da1" down_revision = "96fbb404381e" branch_labels = None depends_on = None @@ -58,6 +58,8 @@ def upgrade(): sa.Column("config", sa.JSON(), nullable=True), sa.Column("llm_config", sa.JSON(), nullable=True), sa.Column("error_message", sa.String(), nullable=True), + sa.Column("is_synced", sa.Boolean(), nullable=True), + sa.Column("last_synced_at", sa.DateTime(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["organization_id"], ["organization.id"], ondelete="CASCADE" From c4f3466f153a52aa1ab6e350406944a093373d26 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 15:24:18 +0200 Subject: [PATCH 41/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index c18a6eba..dc1e5e82 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit c18a6eba094c90db9586a46b9a466231d6382d56 +Subproject commit dc1e5e8242bfc8760fc389cf53918435b69e95f6 From c8f51183d914f7eed0f27f7436eecb92c6572284 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 3 Jun 2025 15:24:34 +0200 Subject: [PATCH 42/67] perf(alembic): integration sync updates --- ...on_tables.py => f526452985b3_adds_integration_tables.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename alembic/versions/{33a31b185da1_adds_integration_tables.py => f526452985b3_adds_integration_tables.py} (99%) diff --git a/alembic/versions/33a31b185da1_adds_integration_tables.py b/alembic/versions/f526452985b3_adds_integration_tables.py similarity index 99% rename from alembic/versions/33a31b185da1_adds_integration_tables.py rename to alembic/versions/f526452985b3_adds_integration_tables.py index 8ef0ad92..ac16cd79 100644 --- a/alembic/versions/33a31b185da1_adds_integration_tables.py +++ b/alembic/versions/f526452985b3_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: 33a31b185da1 +Revision ID: f526452985b3 Revises: 96fbb404381e -Create Date: 2025-06-03 12:28:26.368699 +Create Date: 2025-06-03 13:23:51.843158 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "33a31b185da1" +revision = "f526452985b3" down_revision = "96fbb404381e" branch_labels = None depends_on = None From d998b489c52b1c4b58788d3de7f0de928739b6a5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 12:20:14 +0200 Subject: [PATCH 43/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index dc1e5e82..342b221b 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit dc1e5e8242bfc8760fc389cf53918435b69e95f6 +Subproject commit 342b221b7b2aa7a2f437b308e2b642f853ab5bac From 3f0aa18bf0fe9e1f98f665dde20a63a8a17761b9 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 13 Jun 2025 12:20:35 +0200 Subject: [PATCH 44/67] perf(alembic): added column --- alembic/versions/f526452985b3_adds_integration_tables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alembic/versions/f526452985b3_adds_integration_tables.py b/alembic/versions/f526452985b3_adds_integration_tables.py index ac16cd79..cfe6434f 100644 --- a/alembic/versions/f526452985b3_adds_integration_tables.py +++ b/alembic/versions/f526452985b3_adds_integration_tables.py @@ -232,6 +232,7 @@ def upgrade(): sa.Column("created_at", sa.DateTime(), nullable=True), sa.Column("updated_at", sa.DateTime(), nullable=True), sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("delta_url", sa.String(), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), From 841a9684a2f6cabdd5a25248fa35ef4840ebd78c Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 16 Jun 2025 15:06:33 +0200 Subject: [PATCH 45/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 342b221b..7c023eee 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 342b221b7b2aa7a2f437b308e2b642f853ab5bac +Subproject commit 7c023eee7fabd4f2e0e0aae64d673ed2be9f8c39 From 3fa53418c978d6653b203fb9efe51e0dd8ad4bd4 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Mon, 16 Jun 2025 15:06:42 +0200 Subject: [PATCH 46/67] perf(alembic): db upgrade --- ...les.py => bac11032012d_adds_integration_tables.py} | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) rename alembic/versions/{f526452985b3_adds_integration_tables.py => bac11032012d_adds_integration_tables.py} (97%) diff --git a/alembic/versions/f526452985b3_adds_integration_tables.py b/alembic/versions/bac11032012d_adds_integration_tables.py similarity index 97% rename from alembic/versions/f526452985b3_adds_integration_tables.py rename to alembic/versions/bac11032012d_adds_integration_tables.py index cfe6434f..3203274e 100644 --- a/alembic/versions/f526452985b3_adds_integration_tables.py +++ b/alembic/versions/bac11032012d_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: f526452985b3 +Revision ID: bac11032012d Revises: 96fbb404381e -Create Date: 2025-06-03 13:23:51.843158 +Create Date: 2025-06-16 13:05:55.961555 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "f526452985b3" +revision = "bac11032012d" down_revision = "96fbb404381e" branch_labels = None depends_on = None @@ -48,6 +48,8 @@ def upgrade(): sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("updated_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("updated_at", sa.DateTime(), nullable=True), sa.Column("started_at", sa.DateTime(), nullable=True), sa.Column("finished_at", sa.DateTime(), nullable=True), sa.Column("name", sa.String(), nullable=True), @@ -60,11 +62,13 @@ def upgrade(): sa.Column("error_message", sa.String(), nullable=True), sa.Column("is_synced", sa.Boolean(), nullable=True), sa.Column("last_synced_at", sa.DateTime(), nullable=True), + sa.Column("delta_criteria", sa.JSON(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["organization_id"], ["organization.id"], ondelete="CASCADE" ), sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), schema="cognition", ) @@ -232,7 +236,6 @@ def upgrade(): sa.Column("created_at", sa.DateTime(), nullable=True), sa.Column("updated_at", sa.DateTime(), nullable=True), sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), - sa.Column("delta_url", sa.String(), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), From 74c25b37ac652d5897495191113ea6f2b1c12a46 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 24 Jun 2025 02:47:19 +0200 Subject: [PATCH 47/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 7c023eee..2e769dd8 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 7c023eee7fabd4f2e0e0aae64d673ed2be9f8c39 +Subproject commit 2e769dd801018670f06575d84f1cd79c353d001f From 43c8a450689f969bfd49206b29abb6a75c469ce9 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 09:21:59 +0200 Subject: [PATCH 48/67] perf: add project deletion internal endpoint --- app.py | 5 +++++ fast_api/models.py | 4 ++++ fast_api/routes/project_internal.py | 31 +++++++++++++++++++++++++++++ route_prefix.py | 1 + 4 files changed, 41 insertions(+) create mode 100644 fast_api/routes/project_internal.py diff --git a/app.py b/app.py index d64c37ed..a0130e56 100644 --- a/app.py +++ b/app.py @@ -16,6 +16,7 @@ from fast_api.routes.organization import router as org_router from fast_api.routes.project import router as project_router from fast_api.routes.project_setting import router as project_setting_router +from fast_api.routes.project_internal import router as project_internal_router from fast_api.routes.misc import router as misc_router from fast_api.routes.comment import router as comment_router from fast_api.routes.attribute import router as attribute_router @@ -43,6 +44,7 @@ PREFIX_ORGANIZATION, PREFIX_PROJECT, PREFIX_PROJECT_SETTING, + PREFIX_PROJECT_INTERNAL, PREFIX_MISC, PREFIX_COMMENT, PREFIX_ATTRIBUTE, @@ -121,6 +123,9 @@ fastapi_app_internal.include_router( record_internal_router, prefix=PREFIX_RECORD_INTERNAL, tags=["record-internal"] ) +fastapi_app_internal.include_router( + project_internal_router, prefix=PREFIX_PROJECT_INTERNAL, tags=["project-internal"] +) routes = [ diff --git a/fast_api/models.py b/fast_api/models.py index 9d0f348a..ea5deb29 100644 --- a/fast_api/models.py +++ b/fast_api/models.py @@ -515,3 +515,7 @@ class CheckInviteUsersBody(BaseModel): class RecordDeletion(BaseModel): record_ids: List[str] + + +class ProjectDeletion(BaseModel): + user_id: str diff --git a/fast_api/routes/project_internal.py b/fast_api/routes/project_internal.py new file mode 100644 index 00000000..d8e7c1f4 --- /dev/null +++ b/fast_api/routes/project_internal.py @@ -0,0 +1,31 @@ +from fastapi import APIRouter, Request + +from controller.auth import manager as auth_manager +from controller.project import manager +from fast_api.models import ProjectDeletion +from fast_api.routes.client_response import get_silent_success +from util import notification + +from submodules.model import enums +from submodules.model.business_objects import notification as notification_model + +router = APIRouter() + + +@router.delete( + "/{project_id}/delete-projects", +) +def delete_project(request: Request, project_id: str, data: ProjectDeletion): + manager.update_project(project_id, status=enums.ProjectStatus.IN_DELETION.value) + user = auth_manager.get_user_by_info(data.user_id) + project_item = manager.get_project(project_id) + organization_id = str(project_item.organization_id) + notification.create_notification( + enums.NotificationType.PROJECT_DELETED, user.id, None, project_item.name + ) + notification_model.remove_project_connection_for_last_x(project_id) + manager.delete_project(project_id) + notification.send_organization_update( + project_id, f"project_deleted:{project_id}:{user.id}", True, organization_id + ) + return get_silent_success() diff --git a/route_prefix.py b/route_prefix.py index 96918c8c..890fab2d 100644 --- a/route_prefix.py +++ b/route_prefix.py @@ -2,6 +2,7 @@ PREFIX_ORGANIZATION = PREFIX + "/organization" PREFIX_PROJECT = PREFIX + "/project" PREFIX_PROJECT_SETTING = PREFIX + "/project-setting" +PREFIX_PROJECT_INTERNAL = PREFIX + "/project" PREFIX_MISC = PREFIX + "/misc" PREFIX_COMMENT = PREFIX + "/comment" PREFIX_ATTRIBUTE = PREFIX + "/attribute" From d33d1ad6a3ff945c612b6684d25d5ae7a53cc630 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 15:39:22 +0200 Subject: [PATCH 49/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 2e769dd8..2c35db6b 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 2e769dd801018670f06575d84f1cd79c353d001f +Subproject commit 2c35db6b6fa19e8521de4d3502cd41db521c0baf From 70e3f2f97055a3ff26dccf1af7dbe48f1923df8f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 15:50:57 +0200 Subject: [PATCH 50/67] perf: update internal projects delete endpoint --- fast_api/models.py | 1 + fast_api/routes/project_internal.py | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fast_api/models.py b/fast_api/models.py index ea5deb29..d60d022f 100644 --- a/fast_api/models.py +++ b/fast_api/models.py @@ -519,3 +519,4 @@ class RecordDeletion(BaseModel): class ProjectDeletion(BaseModel): user_id: str + project_ids: List[str] diff --git a/fast_api/routes/project_internal.py b/fast_api/routes/project_internal.py index d8e7c1f4..db62330a 100644 --- a/fast_api/routes/project_internal.py +++ b/fast_api/routes/project_internal.py @@ -13,19 +13,20 @@ @router.delete( - "/{project_id}/delete-projects", + "/delete-projects", ) -def delete_project(request: Request, project_id: str, data: ProjectDeletion): - manager.update_project(project_id, status=enums.ProjectStatus.IN_DELETION.value) - user = auth_manager.get_user_by_info(data.user_id) - project_item = manager.get_project(project_id) - organization_id = str(project_item.organization_id) - notification.create_notification( - enums.NotificationType.PROJECT_DELETED, user.id, None, project_item.name - ) - notification_model.remove_project_connection_for_last_x(project_id) - manager.delete_project(project_id) - notification.send_organization_update( - project_id, f"project_deleted:{project_id}:{user.id}", True, organization_id - ) +def delete_project(request: Request, data: ProjectDeletion): + for project_id in data.project_ids: + manager.update_project(project_id, status=enums.ProjectStatus.IN_DELETION.value) + user = auth_manager.get_user_by_info(data.user_id) + project_item = manager.get_project(project_id) + organization_id = str(project_item.organization_id) + notification.create_notification( + enums.NotificationType.PROJECT_DELETED, user.id, None, project_item.name + ) + notification_model.remove_project_connection_for_last_x(project_id) + manager.delete_project(project_id) + notification.send_organization_update( + project_id, f"project_deleted:{project_id}:{user.id}", True, organization_id + ) return get_silent_success() From 53930c419b0536acd39d1eccc5439c54f2d1b2f0 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 16:26:55 +0200 Subject: [PATCH 51/67] perf(alembic): db update perf: rename internal delete endpoint --- ...> deb19e3e620b_adds_integration_tables.py} | 136 +++++++++++++++++- fast_api/routes/project_internal.py | 10 +- 2 files changed, 138 insertions(+), 8 deletions(-) rename alembic/versions/{bac11032012d_adds_integration_tables.py => deb19e3e620b_adds_integration_tables.py} (80%) diff --git a/alembic/versions/bac11032012d_adds_integration_tables.py b/alembic/versions/deb19e3e620b_adds_integration_tables.py similarity index 80% rename from alembic/versions/bac11032012d_adds_integration_tables.py rename to alembic/versions/deb19e3e620b_adds_integration_tables.py index 3203274e..16f4110b 100644 --- a/alembic/versions/bac11032012d_adds_integration_tables.py +++ b/alembic/versions/deb19e3e620b_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: bac11032012d +Revision ID: deb19e3e620b Revises: 96fbb404381e -Create Date: 2025-06-16 13:05:55.961555 +Create Date: 2025-06-25 14:26:11.212310 """ @@ -11,15 +11,15 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "bac11032012d" +revision = "deb19e3e620b" down_revision = "96fbb404381e" branch_labels = None depends_on = None def upgrade(): - op.execute("create schema integration") # ### commands auto generated by Alembic - please adjust! ### + op.execute("create schema integration") op.create_table( "integration_access", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -34,6 +34,13 @@ def upgrade(): sa.PrimaryKeyConstraint("id"), schema="cognition", ) + op.create_index( + op.f("ix_cognition_integration_access_created_by"), + "integration_access", + ["created_by"], + unique=False, + schema="cognition", + ) op.create_index( op.f("ix_cognition_integration_access_organization_id"), "integration_access", @@ -72,6 +79,13 @@ def upgrade(): sa.PrimaryKeyConstraint("id"), schema="cognition", ) + op.create_index( + op.f("ix_cognition_integration_created_by"), + "integration", + ["created_by"], + unique=False, + schema="cognition", + ) op.create_index( op.f("ix_cognition_integration_organization_id"), "integration", @@ -86,6 +100,13 @@ def upgrade(): unique=False, schema="cognition", ) + op.create_index( + op.f("ix_cognition_integration_updated_by"), + "integration", + ["updated_by"], + unique=False, + schema="cognition", + ) op.create_table( "github_file", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -112,6 +133,13 @@ def upgrade(): ), schema="integration", ) + op.create_index( + op.f("ix_integration_github_file_created_by"), + "github_file", + ["created_by"], + unique=False, + schema="integration", + ) op.create_index( op.f("ix_integration_github_file_integration_id"), "github_file", @@ -133,6 +161,13 @@ def upgrade(): unique=False, schema="integration", ) + op.create_index( + op.f("ix_integration_github_file_updated_by"), + "github_file", + ["updated_by"], + unique=False, + schema="integration", + ) op.create_table( "github_issue", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -160,6 +195,13 @@ def upgrade(): ), schema="integration", ) + op.create_index( + op.f("ix_integration_github_issue_created_by"), + "github_issue", + ["created_by"], + unique=False, + schema="integration", + ) op.create_index( op.f("ix_integration_github_issue_integration_id"), "github_issue", @@ -181,6 +223,13 @@ def upgrade(): unique=False, schema="integration", ) + op.create_index( + op.f("ix_integration_github_issue_updated_by"), + "github_issue", + ["updated_by"], + unique=False, + schema="integration", + ) op.create_table( "pdf", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -207,6 +256,13 @@ def upgrade(): ), schema="integration", ) + op.create_index( + op.f("ix_integration_pdf_created_by"), + "pdf", + ["created_by"], + unique=False, + schema="integration", + ) op.create_index( op.f("ix_integration_pdf_integration_id"), "pdf", @@ -228,6 +284,13 @@ def upgrade(): unique=False, schema="integration", ) + op.create_index( + op.f("ix_integration_pdf_updated_by"), + "pdf", + ["updated_by"], + unique=False, + schema="integration", + ) op.create_table( "sharepoint", sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), @@ -264,6 +327,13 @@ def upgrade(): ), schema="integration", ) + op.create_index( + op.f("ix_integration_sharepoint_created_by"), + "sharepoint", + ["created_by"], + unique=False, + schema="integration", + ) op.create_index( op.f("ix_integration_sharepoint_integration_id"), "sharepoint", @@ -285,11 +355,23 @@ def upgrade(): unique=False, schema="integration", ) + op.create_index( + op.f("ix_integration_sharepoint_updated_by"), + "sharepoint", + ["updated_by"], + unique=False, + schema="integration", + ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### + op.drop_index( + op.f("ix_integration_sharepoint_updated_by"), + table_name="sharepoint", + schema="integration", + ) op.drop_index( op.f("ix_integration_sharepoint_source"), table_name="sharepoint", @@ -305,7 +387,15 @@ def downgrade(): table_name="sharepoint", schema="integration", ) + op.drop_index( + op.f("ix_integration_sharepoint_created_by"), + table_name="sharepoint", + schema="integration", + ) op.drop_table("sharepoint", schema="integration") + op.drop_index( + op.f("ix_integration_pdf_updated_by"), table_name="pdf", schema="integration" + ) op.drop_index( op.f("ix_integration_pdf_source"), table_name="pdf", schema="integration" ) @@ -317,7 +407,15 @@ def downgrade(): table_name="pdf", schema="integration", ) + op.drop_index( + op.f("ix_integration_pdf_created_by"), table_name="pdf", schema="integration" + ) op.drop_table("pdf", schema="integration") + op.drop_index( + op.f("ix_integration_github_issue_updated_by"), + table_name="github_issue", + schema="integration", + ) op.drop_index( op.f("ix_integration_github_issue_source"), table_name="github_issue", @@ -333,7 +431,17 @@ def downgrade(): table_name="github_issue", schema="integration", ) + op.drop_index( + op.f("ix_integration_github_issue_created_by"), + table_name="github_issue", + schema="integration", + ) op.drop_table("github_issue", schema="integration") + op.drop_index( + op.f("ix_integration_github_file_updated_by"), + table_name="github_file", + schema="integration", + ) op.drop_index( op.f("ix_integration_github_file_source"), table_name="github_file", @@ -349,7 +457,17 @@ def downgrade(): table_name="github_file", schema="integration", ) + op.drop_index( + op.f("ix_integration_github_file_created_by"), + table_name="github_file", + schema="integration", + ) op.drop_table("github_file", schema="integration") + op.drop_index( + op.f("ix_cognition_integration_updated_by"), + table_name="integration", + schema="cognition", + ) op.drop_index( op.f("ix_cognition_integration_project_id"), table_name="integration", @@ -360,12 +478,22 @@ def downgrade(): table_name="integration", schema="cognition", ) + op.drop_index( + op.f("ix_cognition_integration_created_by"), + table_name="integration", + schema="cognition", + ) op.drop_table("integration", schema="cognition") op.drop_index( op.f("ix_cognition_integration_access_organization_id"), table_name="integration_access", schema="cognition", ) + op.drop_index( + op.f("ix_cognition_integration_access_created_by"), + table_name="integration_access", + schema="cognition", + ) op.drop_table("integration_access", schema="cognition") # ### end Alembic commands ### op.execute("drop schema integration") diff --git a/fast_api/routes/project_internal.py b/fast_api/routes/project_internal.py index db62330a..4f85fe5d 100644 --- a/fast_api/routes/project_internal.py +++ b/fast_api/routes/project_internal.py @@ -1,13 +1,15 @@ from fastapi import APIRouter, Request -from controller.auth import manager as auth_manager from controller.project import manager from fast_api.models import ProjectDeletion from fast_api.routes.client_response import get_silent_success from util import notification from submodules.model import enums -from submodules.model.business_objects import notification as notification_model +from submodules.model.business_objects import ( + notification as notification_model, + user as user_db_bo, +) router = APIRouter() @@ -15,10 +17,10 @@ @router.delete( "/delete-projects", ) -def delete_project(request: Request, data: ProjectDeletion): +def delete_projects(request: Request, data: ProjectDeletion): for project_id in data.project_ids: manager.update_project(project_id, status=enums.ProjectStatus.IN_DELETION.value) - user = auth_manager.get_user_by_info(data.user_id) + user = user_db_bo.get(data.user_id) project_item = manager.get_project(project_id) organization_id = str(project_item.organization_id) notification.create_notification( From a48e0c339d8222e19144da7bf895e8212dc041d5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 16:38:14 +0200 Subject: [PATCH 52/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 2c35db6b..f1b40ebb 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 2c35db6b6fa19e8521de4d3502cd41db521c0baf +Subproject commit f1b40ebbe7000ff8cff502c956cc207d6e119b57 From 5f158f8595009bcdfe14e66b93edbca77a0ebc94 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 16:38:23 +0200 Subject: [PATCH 53/67] perf(alembic): db updates --- ..._tables.py => af727833577c_adds_integration_tables.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename alembic/versions/{deb19e3e620b_adds_integration_tables.py => af727833577c_adds_integration_tables.py} (99%) diff --git a/alembic/versions/deb19e3e620b_adds_integration_tables.py b/alembic/versions/af727833577c_adds_integration_tables.py similarity index 99% rename from alembic/versions/deb19e3e620b_adds_integration_tables.py rename to alembic/versions/af727833577c_adds_integration_tables.py index 16f4110b..e34b5c14 100644 --- a/alembic/versions/deb19e3e620b_adds_integration_tables.py +++ b/alembic/versions/af727833577c_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: deb19e3e620b +Revision ID: af727833577c Revises: 96fbb404381e -Create Date: 2025-06-25 14:26:11.212310 +Create Date: 2025-06-25 14:37:40.733075 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "deb19e3e620b" +revision = "af727833577c" down_revision = "96fbb404381e" branch_labels = None depends_on = None @@ -74,7 +74,7 @@ def upgrade(): sa.ForeignKeyConstraint( ["organization_id"], ["organization.id"], ondelete="CASCADE" ), - sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint(["updated_by"], ["user.id"], ondelete="SET NULL"), sa.PrimaryKeyConstraint("id"), schema="cognition", From fec24187424a972980e6fad435a9a66667dd98f6 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 25 Jun 2025 17:50:46 +0200 Subject: [PATCH 54/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index f1b40ebb..4fa69bc3 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit f1b40ebbe7000ff8cff502c956cc207d6e119b57 +Subproject commit 4fa69bc3e8911a61bcd82a5a11177e4d40a9ccdc From bd53656ee7dc2da1129f8bb24647f268751f5731 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:33:41 +0200 Subject: [PATCH 55/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 4fa69bc3..3020f36e 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 4fa69bc3e8911a61bcd82a5a11177e4d40a9ccdc +Subproject commit 3020f36e5f4056d54ac4ee602d78f8457af582ed From c6726e748679ce937dedb06242bdded25090f6bf Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:33:51 +0200 Subject: [PATCH 56/67] perf(alembic): db update --- ...s.py => c17de4d14030_adds_integration_tables.py} | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) rename alembic/versions/{af727833577c_adds_integration_tables.py => c17de4d14030_adds_integration_tables.py} (98%) diff --git a/alembic/versions/af727833577c_adds_integration_tables.py b/alembic/versions/c17de4d14030_adds_integration_tables.py similarity index 98% rename from alembic/versions/af727833577c_adds_integration_tables.py rename to alembic/versions/c17de4d14030_adds_integration_tables.py index e34b5c14..a0597a1d 100644 --- a/alembic/versions/af727833577c_adds_integration_tables.py +++ b/alembic/versions/c17de4d14030_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: af727833577c +Revision ID: c17de4d14030 Revises: 96fbb404381e -Create Date: 2025-06-25 14:37:40.733075 +Create Date: 2025-06-26 12:14:54.827953 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "af727833577c" +revision = "c17de4d14030" down_revision = "96fbb404381e" branch_labels = None depends_on = None @@ -117,11 +117,11 @@ def upgrade(): sa.Column("integration_id", postgresql.UUID(as_uuid=True), nullable=True), sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), + sa.Column("minio_file_name", sa.String(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), sa.Column("path", sa.String(), nullable=True), sa.Column("sha", sa.String(), nullable=True), sa.Column("code_language", sa.String(), nullable=True), - sa.Column("delta_criteria", sa.JSON(), nullable=True), - sa.Column("minio_file_name", sa.String(), nullable=True), sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), sa.ForeignKeyConstraint( ["integration_id"], ["cognition.integration.id"], ondelete="CASCADE" @@ -179,6 +179,7 @@ def upgrade(): sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), sa.Column("url", sa.String(), nullable=True), sa.Column("state", sa.String(), nullable=True), sa.Column("assignee", sa.String(), nullable=True), @@ -241,6 +242,7 @@ def upgrade(): sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), sa.Column("file_path", sa.String(), nullable=True), sa.Column("page", sa.Integer(), nullable=True), sa.Column("total_pages", sa.Integer(), nullable=True), @@ -302,6 +304,7 @@ def upgrade(): sa.Column("running_id", sa.Integer(), nullable=True), sa.Column("source", sa.String(), nullable=True), sa.Column("minio_file_name", sa.String(), nullable=True), + sa.Column("error_message", sa.String(), nullable=True), sa.Column("extension", sa.String(), nullable=True), sa.Column("object_id", sa.String(), nullable=True), sa.Column("parent_path", sa.String(), nullable=True), From 53393a11abd8d10a17c659ecc58739a66197d76d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:41:51 +0200 Subject: [PATCH 57/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 3020f36e..a0420da2 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 3020f36e5f4056d54ac4ee602d78f8457af582ed +Subproject commit a0420da2ab5e2c4bf5a3aa49fa4bafd8c47875de From 10eb22933a68f3ec886b13ca29e26d914da0dbcc Mon Sep 17 00:00:00 2001 From: lumburovskalina <104008550+lumburovskalina@users.noreply.github.com> Date: Thu, 26 Jun 2025 14:46:31 +0200 Subject: [PATCH 58/67] Adding groups for access management (#304) * Oidc identifier migrated to the users table * Search for user only if there is not oidc identifier * migrate * model * alembic merge * model * projects with access management * add access management attribute * deactivate mock up * deactivate access management * add groups/users to records * fix smaller issues * embeddings * model * update payloads * alembic fix * endpoint for sync internal * model * update logic * error handling * chore: update submodules * chore: update submodules * chore: update submodules --------- Co-authored-by: LennartSchmidtKern Co-authored-by: andhreljaKern --- ...2978f669_add_cognition_group_management.py | 59 ++++++++ ..._added_oidc_identifier_column_to_users_.py | 28 ++++ .../f75bfdd547f3_add_group_metadata.py | 28 ++++ controller/auth/kratos.py | 12 ++ controller/project/manager.py | 45 ++++++ controller/record/manager.py | 133 +++++++++++++++--- controller/user/manager.py | 16 +++ fast_api/routes/project.py | 33 ++++- fast_api/routes/record.py | 21 +++ fast_api/routes/record_internal.py | 34 ++++- submodules/model | 2 +- 11 files changed, 388 insertions(+), 23 deletions(-) create mode 100644 alembic/versions/4da42978f669_add_cognition_group_management.py create mode 100644 alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py create mode 100644 alembic/versions/f75bfdd547f3_add_group_metadata.py diff --git a/alembic/versions/4da42978f669_add_cognition_group_management.py b/alembic/versions/4da42978f669_add_cognition_group_management.py new file mode 100644 index 00000000..402afc52 --- /dev/null +++ b/alembic/versions/4da42978f669_add_cognition_group_management.py @@ -0,0 +1,59 @@ +"""add cognition group management + +Revision ID: 4da42978f669 +Revises: 5a22c0039b8e +Create Date: 2025-06-03 13:37:57.131295 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '4da42978f669' +down_revision = '5a22c0039b8e' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('group', + sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('organization_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('name', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True), + sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['organization_id'], ['organization.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name'), + schema='cognition' + ) + op.create_index(op.f('ix_cognition_group_created_by'), 'group', ['created_by'], unique=False, schema='cognition') + op.create_index(op.f('ix_cognition_group_organization_id'), 'group', ['organization_id'], unique=False, schema='cognition') + op.create_table('group_member', + sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('group_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('user_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint(['group_id'], ['cognition.group.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['user_id'], ['user.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + schema='cognition' + ) + op.create_index(op.f('ix_cognition_group_member_group_id'), 'group_member', ['group_id'], unique=False, schema='cognition') + op.create_index(op.f('ix_cognition_group_member_user_id'), 'group_member', ['user_id'], unique=False, schema='cognition') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_cognition_group_member_user_id'), table_name='group_member', schema='cognition') + op.drop_index(op.f('ix_cognition_group_member_group_id'), table_name='group_member', schema='cognition') + op.drop_table('group_member', schema='cognition') + op.drop_index(op.f('ix_cognition_group_organization_id'), table_name='group', schema='cognition') + op.drop_index(op.f('ix_cognition_group_created_by'), table_name='group', schema='cognition') + op.drop_table('group', schema='cognition') + # ### end Alembic commands ### diff --git a/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py b/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py new file mode 100644 index 00000000..378772a5 --- /dev/null +++ b/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py @@ -0,0 +1,28 @@ +"""Added oidc identifier column to users table + +Revision ID: 5a22c0039b8e +Revises: eb96f9b82cc1 +Create Date: 2025-05-21 08:56:58.320996 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '5a22c0039b8e' +down_revision = 'bac11032012d' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('user', sa.Column('oidc_identifier', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('user', 'oidc_identifier') + # ### end Alembic commands ### diff --git a/alembic/versions/f75bfdd547f3_add_group_metadata.py b/alembic/versions/f75bfdd547f3_add_group_metadata.py new file mode 100644 index 00000000..83a5cc2f --- /dev/null +++ b/alembic/versions/f75bfdd547f3_add_group_metadata.py @@ -0,0 +1,28 @@ +"""add group metadata + +Revision ID: f75bfdd547f3 +Revises: 4da42978f669 +Create Date: 2025-06-19 22:55:22.603225 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'f75bfdd547f3' +down_revision = '4da42978f669' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('group', sa.Column('meta_data', sa.JSON(), nullable=True), schema='cognition') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('group', 'meta_data', schema='cognition') + # ### end Alembic commands ### diff --git a/controller/auth/kratos.py b/controller/auth/kratos.py index 22e51c1d..89e013b4 100644 --- a/controller/auth/kratos.py +++ b/controller/auth/kratos.py @@ -263,3 +263,15 @@ def check_user_exists(email: str) -> bool: if i["traits"]["email"].lower() == email.lower(): return True return False + + +def get_user_from_search(email: str) -> bool: + request = requests.get( + f"{KRATOS_ADMIN_URL}/identities?preview_credentials_identifier_similar={quote(email)}" + ) + if request.ok: + identities = request.json() + for i in identities: + if i["traits"]["email"].lower() == email.lower(): + return i + return None diff --git a/controller/project/manager.py b/controller/project/manager.py index ff440877..4fac0a83 100644 --- a/controller/project/manager.py +++ b/controller/project/manager.py @@ -15,6 +15,8 @@ data_slice, information_source, general, + attribute, + embedding ) from submodules.model import daemon from fast_api.types import HuddleData, ProjectSize @@ -25,6 +27,8 @@ from service.search import search from controller.auth import kratos from submodules.model.util import sql_alchemy_to_dict +from controller.embedding import connector + ALL_PROJECTS_WHITELIST = { "id", @@ -53,6 +57,47 @@ def get_all_projects(organization_id: str) -> List[Project]: return project.get_all(organization_id) +def get_all_projects_with_access_management(organization_id: str) -> List[Project]: + return project.get_all_with_access_management(organization_id) + + +def activate_access_management(project_id): + relative_position = attribute.get_relative_position(project_id) + if relative_position is None: + relative_position = 1 + else: + relative_position += 1 + filter_attributes = ["__ACCESS_GROUPS", "__ACCESS_USERS"] + attribute.create(project_id=project_id, relative_position=relative_position, name=filter_attributes[0], data_type=enums.DataTypes.PERMISSION.value, user_created=False, visibility=enums.AttributeVisibility.HIDE.value, with_commit=True, state=enums.AttributeState.AUTOMATICALLY_CREATED.value) + attribute.create(project_id=project_id, relative_position=relative_position + 1, name=filter_attributes[1], data_type=enums.DataTypes.PERMISSION.value, user_created=False, visibility=enums.AttributeVisibility.HIDE.value, with_commit=True, state=enums.AttributeState.AUTOMATICALLY_CREATED.value) + all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) + for embedding_item in all_embeddings: + prev_filter_attributes = embedding_item.filter_attributes or [] + new_filter_attributes = list(set(prev_filter_attributes + filter_attributes)) + embedding_item.filter_attributes = new_filter_attributes + general.commit() + if connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id)): + embedding.update_embedding_filter_attributes( + project_id, str(embedding_item.id), new_filter_attributes, with_commit=True + ) + + +def deactivate_access_management(project_id: str) -> None: + record.delete_access_management_attributes(project_id) + access_groups_attribute = attribute.get_by_name(project_id, "__ACCESS_GROUPS") + access_users_attribute = attribute.get_by_name(project_id, "__ACCESS_USERS") + if access_groups_attribute: + attribute.delete(project_id, access_groups_attribute.id, with_commit=True) + if access_users_attribute: + attribute.delete(project_id, access_users_attribute.id, with_commit=True) + + +def is_access_management_activated(project_id: str) -> bool: + access_groups = attribute.get_by_name(project_id, "__ACCESS_GROUPS") + access_users = attribute.get_by_name(project_id, "__ACCESS_USERS") + return access_groups is not None and access_users is not None + + def get_all_projects_by_user(organization_id) -> List[Project]: projects = project.get_all_by_user_organization_id(organization_id) project_dicts = sql_alchemy_to_dict( diff --git a/controller/record/manager.py b/controller/record/manager.py index 857ed576..a0bff9ed 100644 --- a/controller/record/manager.py +++ b/controller/record/manager.py @@ -13,6 +13,7 @@ task_queue, record_label_association, comments, + project ) from service.search import search from submodules.model import enums @@ -29,6 +30,9 @@ from util import notification import time import traceback +from controller.embedding import connector +from submodules.model.cognition_objects import group as group_db +import json def get_record(project_id: str, record_id: str) -> Record: @@ -138,7 +142,7 @@ def get_unique_values_by_attributes(project_id: str) -> Dict[str, List[str]]: def edit_records( - user_id: str, project_id: str, changes: Dict[str, Any] + user_id: str, project_id: str, changes: Dict[str, Any], only_access_management_update: Optional[bool] = False ) -> Optional[List[str]]: prepped = __check_and_prep_edit_records(project_id, changes) if "errors_found" in prepped: @@ -164,22 +168,23 @@ def edit_records( record_label_association.delete_by_record_attribute_tuples(project_id, chunk) general.commit() - - try: - # tokenization currently with a complete rebuild of the docbins of touched records - # optimization possible by only rebuilding the changed record & attribute combinations and reuploading - tokenization.delete_record_docbins_by_id(project_id, records.keys(), True) - tokenization.delete_token_statistics_by_id(project_id, records.keys(), True) - tokenization_service.request_tokenize_project(project_id, user_id) - time.sleep(1) - # wait for tokenization to finish, the endpoint itself handles missing docbins - while tokenization.is_doc_bin_creation_running_or_queued(project_id): - time.sleep(0.5) - - except Exception: - __revert_record_data_changes(records, prepped["record_data_backup"]) - print(traceback.format_exc(), flush=True) - return ["tokenization failed"] + # TODO check if this is still needed for access management updates + if not only_access_management_update: + try: + # tokenization currently with a complete rebuild of the docbins of touched records + # optimization possible by only rebuilding the changed record & attribute combinations and reuploading + tokenization.delete_record_docbins_by_id(project_id, records.keys(), True) + tokenization.delete_token_statistics_by_id(project_id, records.keys(), True) + tokenization_service.request_tokenize_project(project_id, user_id) + time.sleep(1) + # wait for tokenization to finish, the endpoint itself handles missing docbins + while tokenization.is_doc_bin_creation_running_or_queued(project_id): + time.sleep(0.5) + + except Exception: + __revert_record_data_changes(records, prepped["record_data_backup"]) + print(traceback.format_exc(), flush=True) + return ["tokenization failed"] try: embedding_connector.request_re_embed_records( @@ -331,6 +336,100 @@ def delete_records( __delete_records(project_id, record_ids) +def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str, permissions_users: Dict[str, Any], record_ids: Optional[List[str]]) -> None: + try: + if record_ids: + project_records = record.get_by_record_ids(project_id, record_ids) + else: + project_records = record.get_all(project_id) + organization_id = project.get_org_id(project_id) + integration_groups_by_permission_id = group_db.get_all_by_integration_id_permission_grouped(organization_id, integration_id) + record_change_dict = {} + for record_item in project_records: + if not record_item.data.get("__ACCESS_GROUPS"): + current_group_ids = [] + else: + current_group_ids = record_item.data["__ACCESS_GROUPS"] + + meta_data_dict = json.loads(record_item.data.get("metadata", "{}")) + permission_ids = meta_data_dict.get("permissions") + new_group_ids = [str(integration_groups_by_permission_id.get(permission_id).id) for permission_id in permission_ids if integration_groups_by_permission_id.get(permission_id)] + # Only update if new group ids differ from current group ids + if not set(new_group_ids) == set(current_group_ids): + record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { + "attributeName": "__ACCESS_GROUPS", + "newValue": new_group_ids, + "recordId": str(record_item.id), + } + if not record_item.data.get("__ACCESS_USERS"): + current_user_ids = [] + else: + current_user_ids = record_item.data["__ACCESS_USERS"] + new_user_ids = [permissions_users.get(permission_id) for permission_id in permission_ids if permissions_users.get(permission_id)] + # Only update if new user ids differ from current user ids + if not set(new_user_ids) == set(current_user_ids): + extended_user_ids = new_user_ids + record_change_dict[f"{str(record_item.id)}@__ACCESS_USERS"] = { + "attributeName": "__ACCESS_USERS", + "newValue": extended_user_ids, + "recordId": str(record_item.id), + } + changed_records_ids = list(record_change_dict.keys()) + partial_update = len(changed_records_ids) < len(project_records) + errors = edit_records(None, project_id, record_change_dict, True) + if not errors: + all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) + for embedding_item in all_embeddings: + connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id), record_ids=changed_records_ids if partial_update else None) + return errors + + except Exception as e: + print(traceback.format_exc(), flush=True) + return [str(e)] + + +def add_access_groups_or_users(project_id: str, record_ids: List[str], group_ids: Optional[List[str]] = None, user_ids: Optional[List[str]] = None) -> None: + try: + if not record_ids or len(record_ids) == 0: + return + record_change_dict = {} + records_to_change = record.get_by_record_ids(project_id, record_ids) + if group_ids and len(group_ids) > 0: + for record_item in records_to_change: + if not record_item.data.get("__ACCESS_GROUPS"): + current_group_ids = [] + else: + current_group_ids = record_item.data["__ACCESS_GROUPS"] + extended_group_ids = list(set(current_group_ids + group_ids)) # remove duplicates + record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { + "attributeName": "__ACCESS_GROUPS", + "newValue": extended_group_ids, + "recordId": str(record_item.id), + } + if user_ids and len(user_ids) > 0: + for record_item in records_to_change: + if not record_item.data.get("__ACCESS_USERS"): + current_user_ids = [] + else: + current_user_ids = record_item.data["__ACCESS_USERS"] + extended_user_ids = list(set(current_user_ids + user_ids)) + record_change_dict[f"{str(record_item.id)}@__ACCESS_USERS"] = { + "attributeName": "__ACCESS_USERS", + "newValue": extended_user_ids, + "recordId": str(record_item.id), + } + # maybe wait for embedding to finish first? + errors = edit_records(None, project_id, record_change_dict, True) + if not errors: + all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) + for embedding_item in all_embeddings: + connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id)) + return errors + except Exception as e: + print(traceback.format_exc(), flush=True) + return [str(e)] + + def __delete_records(project_id: str, record_ids: List[str]) -> None: try: row_count = record.delete_many(project_id, record_ids) diff --git a/controller/user/manager.py b/controller/user/manager.py index a00a7b22..03d4d6b3 100644 --- a/controller/user/manager.py +++ b/controller/user/manager.py @@ -162,4 +162,20 @@ def __migrate_kratos_users(): if user_database.sso_provider != sso_provider: user_database.sso_provider = sso_provider + if user_database.oidc_identifier is None: + user_search = kratos.get_user_from_search(user_identity["traits"]["email"]) + if user_search and user_search["credentials"]: + if user_search["credentials"].get("oidc", None): + oidc = ( + user_search["credentials"] + .get("oidc", {}) + .get("identifiers", None)[0] + ) + if oidc: + oidc = oidc.split(":") + if len(oidc) > 1: + user_database.oidc_identifier = oidc[1] + else: + user_database.oidc_identifier = None + general.commit() diff --git a/fast_api/routes/project.py b/fast_api/routes/project.py index fc7fac5f..2478bef8 100644 --- a/fast_api/routes/project.py +++ b/fast_api/routes/project.py @@ -70,6 +70,38 @@ def get_all_projects(request: Request) -> Dict: return pack_json_result(projects) +# TO DO, some admin check should be added here +@router.get("/all-projects-with-access-management") +def get_all_projects_with_tokens(request: Request) -> Dict: + org_id = auth_manager.get_organization_id_by_info(request.state.info) + projects_with_access_management = manager.get_all_projects_with_access_management(org_id) + return pack_json_result(projects_with_access_management) + + +# TO DO, some admin check should be added here +@router.post("/{project_id}/access-management", dependencies=[Depends(auth_manager.check_project_access_dep)]) +def activate_access_management( + request: Request, + project_id: str, +): + if manager.is_access_management_activated(project_id): + return get_silent_success() + manager.activate_access_management(project_id) + return get_silent_success() + + +# TO DO, some admin check should be added here +@router.delete("/{project_id}/access-management", dependencies=[Depends(auth_manager.check_project_access_dep)]) +def deactivate_access_management( + request: Request, + project_id: str, +): + if not manager.is_access_management_activated(project_id): + return get_silent_success() + manager.deactivate_access_management(project_id) + return get_silent_success() + + @router.get("/all-projects-mini") def get_all_projects_mini(request: Request) -> Dict: projects = manager.get_all_projects_by_user( @@ -85,7 +117,6 @@ def get_all_projects_mini(request: Request) -> Dict: } for project in projects ] - return pack_json_result(project_extended) diff --git a/fast_api/routes/record.py b/fast_api/routes/record.py index 4c2ce276..b837b789 100644 --- a/fast_api/routes/record.py +++ b/fast_api/routes/record.py @@ -47,3 +47,24 @@ def delete_by_record_ids( ): manager.delete_records(project_id, body.record_ids, as_thread) return get_silent_success() + + +# TODO: add some admin checks for access management +@router.post( + "/{project_id}/access-management", + dependencies=[Depends(auth_manager.check_project_access_dep)], +) +def add_access_groups_or_users( + project_id: str, + body: dict = Body(...), +): + group_ids = body.get("group_ids") + user_ids = body.get("user_ids") + record_ids = body.get("record_ids") + errors = manager.add_access_groups_or_users(project_id, record_ids, group_ids=group_ids, user_ids=user_ids) + if errors and len(errors) > 0: + return get_custom_response( + status_code=status.HTTP_200_OK, + content=json.dumps(errors), + ) + return get_silent_success() \ No newline at end of file diff --git a/fast_api/routes/record_internal.py b/fast_api/routes/record_internal.py index 703a2810..e0847e6c 100644 --- a/fast_api/routes/record_internal.py +++ b/fast_api/routes/record_internal.py @@ -1,10 +1,13 @@ from typing import Optional -from fastapi import APIRouter -from controller.record import manager +from fastapi import APIRouter, Body, status +from controller.record import manager as record_manager +from controller.project import manager as project_manager from fast_api.routes.client_response import ( - get_silent_success, + get_custom_response, + get_silent_success ) from fast_api.models import RecordDeletion +import json router = APIRouter() @@ -15,5 +18,28 @@ def delete_by_record_ids( body: RecordDeletion, as_thread: Optional[bool] = False, ): - manager.delete_records(project_id, body.record_ids, as_thread) + record_manager.delete_records(project_id, body.record_ids, as_thread) return get_silent_success() + + +@router.post( + "/{project_id}/access-management/sharepoint" +) +def sync_access_groups_and_users_sharepoint( + project_id: str, + body: dict = Body(...), +): + record_ids = body.get("record_ids") + integration_id = body.get("integration_id") + permissions_user = body.get("permissions_user", {}) + + if not project_manager.is_access_management_activated(project_id): + project_manager.activate_access_management(project_id) + + errors = record_manager.sync_access_groups_and_users_sharepoint(project_id, integration_id, permissions_user, record_ids) + if errors and len(errors) > 0: + return get_custom_response( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=json.dumps(errors), + ) + return get_silent_success() \ No newline at end of file diff --git a/submodules/model b/submodules/model index a0420da2..7574e8a3 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit a0420da2ab5e2c4bf5a3aa49fa4bafd8c47875de +Subproject commit 7574e8a3ee9a3d78827db3fb9d3c1ddb7e3b58fe From 3bdb6232ada7fa7caad44f950c7a962671bacf39 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:48:40 +0200 Subject: [PATCH 59/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 7574e8a3..2fe5e2d9 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 7574e8a3ee9a3d78827db3fb9d3c1ddb7e3b58fe +Subproject commit 2fe5e2d9e642d71607119ff9719d54d20d573e27 From 950036a104562e9d7f947aeb19f7ffecce29ec03 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 14:58:55 +0200 Subject: [PATCH 60/67] perf(alembic): db alignment --- ...> 36f087da55b1_adds_integration_tables.py} | 10 +- ...2978f669_add_cognition_group_management.py | 59 ---------- ..._added_oidc_identifier_column_to_users_.py | 28 ----- ...c66ea92_adds_cognition_group_management.py | 107 ++++++++++++++++++ .../f75bfdd547f3_add_group_metadata.py | 28 ----- 5 files changed, 112 insertions(+), 120 deletions(-) rename alembic/versions/{c17de4d14030_adds_integration_tables.py => 36f087da55b1_adds_integration_tables.py} (99%) delete mode 100644 alembic/versions/4da42978f669_add_cognition_group_management.py delete mode 100644 alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py create mode 100644 alembic/versions/6868ac66ea92_adds_cognition_group_management.py delete mode 100644 alembic/versions/f75bfdd547f3_add_group_metadata.py diff --git a/alembic/versions/c17de4d14030_adds_integration_tables.py b/alembic/versions/36f087da55b1_adds_integration_tables.py similarity index 99% rename from alembic/versions/c17de4d14030_adds_integration_tables.py rename to alembic/versions/36f087da55b1_adds_integration_tables.py index a0597a1d..85972e7d 100644 --- a/alembic/versions/c17de4d14030_adds_integration_tables.py +++ b/alembic/versions/36f087da55b1_adds_integration_tables.py @@ -1,8 +1,8 @@ """adds integration tables -Revision ID: c17de4d14030 -Revises: 96fbb404381e -Create Date: 2025-06-26 12:14:54.827953 +Revision ID: 36f087da55b1 +Revises: ad13850a7245 +Create Date: 2025-06-26 12:57:31.654259 """ @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "c17de4d14030" -down_revision = "96fbb404381e" +revision = "36f087da55b1" +down_revision = "ad13850a7245" branch_labels = None depends_on = None diff --git a/alembic/versions/4da42978f669_add_cognition_group_management.py b/alembic/versions/4da42978f669_add_cognition_group_management.py deleted file mode 100644 index 402afc52..00000000 --- a/alembic/versions/4da42978f669_add_cognition_group_management.py +++ /dev/null @@ -1,59 +0,0 @@ -"""add cognition group management - -Revision ID: 4da42978f669 -Revises: 5a22c0039b8e -Create Date: 2025-06-03 13:37:57.131295 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '4da42978f669' -down_revision = '5a22c0039b8e' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('group', - sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), - sa.Column('organization_id', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('name', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True), - sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'), - sa.ForeignKeyConstraint(['organization_id'], ['organization.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('name'), - schema='cognition' - ) - op.create_index(op.f('ix_cognition_group_created_by'), 'group', ['created_by'], unique=False, schema='cognition') - op.create_index(op.f('ix_cognition_group_organization_id'), 'group', ['organization_id'], unique=False, schema='cognition') - op.create_table('group_member', - sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), - sa.Column('group_id', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('user_id', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint(['group_id'], ['cognition.group.id'], ondelete='CASCADE'), - sa.ForeignKeyConstraint(['user_id'], ['user.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id'), - schema='cognition' - ) - op.create_index(op.f('ix_cognition_group_member_group_id'), 'group_member', ['group_id'], unique=False, schema='cognition') - op.create_index(op.f('ix_cognition_group_member_user_id'), 'group_member', ['user_id'], unique=False, schema='cognition') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_cognition_group_member_user_id'), table_name='group_member', schema='cognition') - op.drop_index(op.f('ix_cognition_group_member_group_id'), table_name='group_member', schema='cognition') - op.drop_table('group_member', schema='cognition') - op.drop_index(op.f('ix_cognition_group_organization_id'), table_name='group', schema='cognition') - op.drop_index(op.f('ix_cognition_group_created_by'), table_name='group', schema='cognition') - op.drop_table('group', schema='cognition') - # ### end Alembic commands ### diff --git a/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py b/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py deleted file mode 100644 index 378772a5..00000000 --- a/alembic/versions/5a22c0039b8e_added_oidc_identifier_column_to_users_.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Added oidc identifier column to users table - -Revision ID: 5a22c0039b8e -Revises: eb96f9b82cc1 -Create Date: 2025-05-21 08:56:58.320996 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '5a22c0039b8e' -down_revision = 'bac11032012d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('user', sa.Column('oidc_identifier', sa.String(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('user', 'oidc_identifier') - # ### end Alembic commands ### diff --git a/alembic/versions/6868ac66ea92_adds_cognition_group_management.py b/alembic/versions/6868ac66ea92_adds_cognition_group_management.py new file mode 100644 index 00000000..e82eeff8 --- /dev/null +++ b/alembic/versions/6868ac66ea92_adds_cognition_group_management.py @@ -0,0 +1,107 @@ +"""adds cognition group management + +Revision ID: 6868ac66ea92 +Revises: 36f087da55b1 +Create Date: 2025-06-26 12:58:16.408919 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "6868ac66ea92" +down_revision = "36f087da55b1" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "group", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("organization_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("created_by", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("meta_data", sa.JSON(), nullable=True), + sa.ForeignKeyConstraint(["created_by"], ["user.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint( + ["organization_id"], ["organization.id"], ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("name"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_group_created_by"), + "group", + ["created_by"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_group_organization_id"), + "group", + ["organization_id"], + unique=False, + schema="cognition", + ) + op.create_table( + "group_member", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("group_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ["group_id"], ["cognition.group.id"], ondelete="CASCADE" + ), + sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_group_member_group_id"), + "group_member", + ["group_id"], + unique=False, + schema="cognition", + ) + op.create_index( + op.f("ix_cognition_group_member_user_id"), + "group_member", + ["user_id"], + unique=False, + schema="cognition", + ) + op.add_column("user", sa.Column("oidc_identifier", sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("user", "oidc_identifier") + op.drop_index( + op.f("ix_cognition_group_member_user_id"), + table_name="group_member", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_group_member_group_id"), + table_name="group_member", + schema="cognition", + ) + op.drop_table("group_member", schema="cognition") + op.drop_index( + op.f("ix_cognition_group_organization_id"), + table_name="group", + schema="cognition", + ) + op.drop_index( + op.f("ix_cognition_group_created_by"), table_name="group", schema="cognition" + ) + op.drop_table("group", schema="cognition") + # ### end Alembic commands ### diff --git a/alembic/versions/f75bfdd547f3_add_group_metadata.py b/alembic/versions/f75bfdd547f3_add_group_metadata.py deleted file mode 100644 index 83a5cc2f..00000000 --- a/alembic/versions/f75bfdd547f3_add_group_metadata.py +++ /dev/null @@ -1,28 +0,0 @@ -"""add group metadata - -Revision ID: f75bfdd547f3 -Revises: 4da42978f669 -Create Date: 2025-06-19 22:55:22.603225 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'f75bfdd547f3' -down_revision = '4da42978f669' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('group', sa.Column('meta_data', sa.JSON(), nullable=True), schema='cognition') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('group', 'meta_data', schema='cognition') - # ### end Alembic commands ### From 3e4f54089e90e70320d69b17212a8a905d5ae121 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 21:05:55 +0200 Subject: [PATCH 61/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 2fe5e2d9..2a4899f3 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 2fe5e2d9e642d71607119ff9719d54d20d573e27 +Subproject commit 2a4899f3e89f5f7eef9c056e9a1b74d5f313382e From 95cb724cb36d992f1ab70875fefb33210144d2a9 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 26 Jun 2025 21:06:02 +0200 Subject: [PATCH 62/67] perf: rename task --- fast_api/routes/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_api/routes/misc.py b/fast_api/routes/misc.py index 0fd62127..9ad178c8 100644 --- a/fast_api/routes/misc.py +++ b/fast_api/routes/misc.py @@ -134,7 +134,7 @@ def cancel_task( controller_manager.cancel_parse_cognition_file_task( task_entity.organization_id, task_info ) - elif task_type == enums.TaskType.INTEGRATION.value: + elif task_type == enums.TaskType.EXECUTE_INTEGRATION.value: controller_manager.cancel_integration_task(task_info) else: raise ValueError(f"{task_type} is no valid task type") From 70a850d6754a0a0a8f3e922939f93c019010ce07 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 09:14:39 +0200 Subject: [PATCH 63/67] perf: add fail-safe while wait --- controller/record/manager.py | 60 ++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/controller/record/manager.py b/controller/record/manager.py index a0bff9ed..bb388297 100644 --- a/controller/record/manager.py +++ b/controller/record/manager.py @@ -13,7 +13,7 @@ task_queue, record_label_association, comments, - project + project, ) from service.search import search from submodules.model import enums @@ -142,7 +142,10 @@ def get_unique_values_by_attributes(project_id: str) -> Dict[str, List[str]]: def edit_records( - user_id: str, project_id: str, changes: Dict[str, Any], only_access_management_update: Optional[bool] = False + user_id: str, + project_id: str, + changes: Dict[str, Any], + only_access_management_update: Optional[bool] = False, ) -> Optional[List[str]]: prepped = __check_and_prep_edit_records(project_id, changes) if "errors_found" in prepped: @@ -177,8 +180,15 @@ def edit_records( tokenization.delete_token_statistics_by_id(project_id, records.keys(), True) tokenization_service.request_tokenize_project(project_id, user_id) time.sleep(1) + c = 0 # wait for tokenization to finish, the endpoint itself handles missing docbins while tokenization.is_doc_bin_creation_running_or_queued(project_id): + c += 1 + if c > 7200: + # fail-safe (e.g. max 2 h wait) to prevent infinite loop + raise RuntimeError( + "Failed to find a record tokenization task after 2h wait." + ) time.sleep(0.5) except Exception: @@ -336,14 +346,23 @@ def delete_records( __delete_records(project_id, record_ids) -def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str, permissions_users: Dict[str, Any], record_ids: Optional[List[str]]) -> None: +def sync_access_groups_and_users_sharepoint( + project_id: str, + integration_id: str, + permissions_users: Dict[str, Any], + record_ids: Optional[List[str]], +) -> None: try: if record_ids: project_records = record.get_by_record_ids(project_id, record_ids) else: project_records = record.get_all(project_id) organization_id = project.get_org_id(project_id) - integration_groups_by_permission_id = group_db.get_all_by_integration_id_permission_grouped(organization_id, integration_id) + integration_groups_by_permission_id = ( + group_db.get_all_by_integration_id_permission_grouped( + organization_id, integration_id + ) + ) record_change_dict = {} for record_item in project_records: if not record_item.data.get("__ACCESS_GROUPS"): @@ -353,7 +372,11 @@ def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str meta_data_dict = json.loads(record_item.data.get("metadata", "{}")) permission_ids = meta_data_dict.get("permissions") - new_group_ids = [str(integration_groups_by_permission_id.get(permission_id).id) for permission_id in permission_ids if integration_groups_by_permission_id.get(permission_id)] + new_group_ids = [ + str(integration_groups_by_permission_id.get(permission_id).id) + for permission_id in permission_ids + if integration_groups_by_permission_id.get(permission_id) + ] # Only update if new group ids differ from current group ids if not set(new_group_ids) == set(current_group_ids): record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { @@ -365,7 +388,11 @@ def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str current_user_ids = [] else: current_user_ids = record_item.data["__ACCESS_USERS"] - new_user_ids = [permissions_users.get(permission_id) for permission_id in permission_ids if permissions_users.get(permission_id)] + new_user_ids = [ + permissions_users.get(permission_id) + for permission_id in permission_ids + if permissions_users.get(permission_id) + ] # Only update if new user ids differ from current user ids if not set(new_user_ids) == set(current_user_ids): extended_user_ids = new_user_ids @@ -380,7 +407,11 @@ def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str if not errors: all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) for embedding_item in all_embeddings: - connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id), record_ids=changed_records_ids if partial_update else None) + connector.update_attribute_payloads_for_neural_search( + project_id, + str(embedding_item.id), + record_ids=changed_records_ids if partial_update else None, + ) return errors except Exception as e: @@ -388,7 +419,12 @@ def sync_access_groups_and_users_sharepoint(project_id: str, integration_id: str return [str(e)] -def add_access_groups_or_users(project_id: str, record_ids: List[str], group_ids: Optional[List[str]] = None, user_ids: Optional[List[str]] = None) -> None: +def add_access_groups_or_users( + project_id: str, + record_ids: List[str], + group_ids: Optional[List[str]] = None, + user_ids: Optional[List[str]] = None, +) -> None: try: if not record_ids or len(record_ids) == 0: return @@ -400,7 +436,9 @@ def add_access_groups_or_users(project_id: str, record_ids: List[str], group_ids current_group_ids = [] else: current_group_ids = record_item.data["__ACCESS_GROUPS"] - extended_group_ids = list(set(current_group_ids + group_ids)) # remove duplicates + extended_group_ids = list( + set(current_group_ids + group_ids) + ) # remove duplicates record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { "attributeName": "__ACCESS_GROUPS", "newValue": extended_group_ids, @@ -423,7 +461,9 @@ def add_access_groups_or_users(project_id: str, record_ids: List[str], group_ids if not errors: all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) for embedding_item in all_embeddings: - connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id)) + connector.update_attribute_payloads_for_neural_search( + project_id, str(embedding_item.id) + ) return errors except Exception as e: print(traceback.format_exc(), flush=True) From 6f74eaefd0efdbb05e00e31d188a809b69d5d968 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 12:33:05 +0200 Subject: [PATCH 64/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 2a4899f3..db484cfc 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 2a4899f3e89f5f7eef9c056e9a1b74d5f313382e +Subproject commit db484cfcbfc9bd633626935c2e4bffdae1bea13e From ea947470a99ebebb35e1deb8360003df2619a120 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 12:34:20 +0200 Subject: [PATCH 65/67] perf: pr review comments --- api/transfer.py | 4 ++-- controller/monitor/manager.py | 3 +-- controller/project/manager.py | 35 +++++++++++++++++++++++++++++------ controller/record/manager.py | 4 +++- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/api/transfer.py b/api/transfer.py index d80b2b1c..809f5eb8 100644 --- a/api/transfer.py +++ b/api/transfer.py @@ -196,7 +196,7 @@ def __calculate_missing_attributes(project_id: str, user_id: str) -> None: i += 1 if i >= 60: i = 0 - daemon.reset_session_token_in_thread() + daemon.reset_session_token_in_thread() # TODO: this function does not exist if tokenization.is_doc_bin_creation_running_or_queued(project_id): time.sleep(2) continue @@ -211,7 +211,7 @@ def __calculate_missing_attributes(project_id: str, user_id: str) -> None: break if i >= 60: i = 0 - daemon.reset_session_token_in_thread() + daemon.reset_session_token_in_thread() # TODO: this function does not exist current_att_id = attribute_ids[0] current_att = attribute.get(project_id, current_att_id) diff --git a/controller/monitor/manager.py b/controller/monitor/manager.py index 036541b9..f4bdd43b 100644 --- a/controller/monitor/manager.py +++ b/controller/monitor/manager.py @@ -124,6 +124,5 @@ def cancel_integration_task( integration_id = task_info.get("integrationId") task_monitor.set_integration_task_to_failed( - integration_id, - with_commit=True, + integration_id, error_message="Cancelled by task manager" ) diff --git a/controller/project/manager.py b/controller/project/manager.py index 4fac0a83..b86cdbd1 100644 --- a/controller/project/manager.py +++ b/controller/project/manager.py @@ -16,7 +16,7 @@ information_source, general, attribute, - embedding + embedding, ) from submodules.model import daemon from fast_api.types import HuddleData, ProjectSize @@ -68,19 +68,42 @@ def activate_access_management(project_id): else: relative_position += 1 filter_attributes = ["__ACCESS_GROUPS", "__ACCESS_USERS"] - attribute.create(project_id=project_id, relative_position=relative_position, name=filter_attributes[0], data_type=enums.DataTypes.PERMISSION.value, user_created=False, visibility=enums.AttributeVisibility.HIDE.value, with_commit=True, state=enums.AttributeState.AUTOMATICALLY_CREATED.value) - attribute.create(project_id=project_id, relative_position=relative_position + 1, name=filter_attributes[1], data_type=enums.DataTypes.PERMISSION.value, user_created=False, visibility=enums.AttributeVisibility.HIDE.value, with_commit=True, state=enums.AttributeState.AUTOMATICALLY_CREATED.value) + attribute.create( + project_id=project_id, + relative_position=relative_position, + name=filter_attributes[0], + data_type=enums.DataTypes.PERMISSION.value, + user_created=False, + visibility=enums.AttributeVisibility.HIDE.value, + with_commit=True, + state=enums.AttributeState.AUTOMATICALLY_CREATED.value, + ) + attribute.create( + project_id=project_id, + relative_position=relative_position + 1, + name=filter_attributes[1], + data_type=enums.DataTypes.PERMISSION.value, + user_created=False, + visibility=enums.AttributeVisibility.HIDE.value, + with_commit=True, + state=enums.AttributeState.AUTOMATICALLY_CREATED.value, + ) all_embeddings = embedding.get_all_embeddings_by_project_id(project_id) for embedding_item in all_embeddings: prev_filter_attributes = embedding_item.filter_attributes or [] new_filter_attributes = list(set(prev_filter_attributes + filter_attributes)) embedding_item.filter_attributes = new_filter_attributes general.commit() - if connector.update_attribute_payloads_for_neural_search(project_id, str(embedding_item.id)): + if connector.update_attribute_payloads_for_neural_search( + project_id, str(embedding_item.id) + ): embedding.update_embedding_filter_attributes( - project_id, str(embedding_item.id), new_filter_attributes, with_commit=True + project_id, + str(embedding_item.id), + new_filter_attributes, + with_commit=True, ) - + def deactivate_access_management(project_id: str) -> None: record.delete_access_management_attributes(project_id) diff --git a/controller/record/manager.py b/controller/record/manager.py index bb388297..edd32c8a 100644 --- a/controller/record/manager.py +++ b/controller/record/manager.py @@ -401,7 +401,9 @@ def sync_access_groups_and_users_sharepoint( "newValue": extended_user_ids, "recordId": str(record_item.id), } - changed_records_ids = list(record_change_dict.keys()) + changed_records_ids = [ + record_change_dict[key]["recordId"] for key in record_change_dict + ] partial_update = len(changed_records_ids) < len(project_records) errors = edit_records(None, project_id, record_change_dict, True) if not errors: From 59a7e1497749875dab5b97affbdabf4cd10f07ff Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 16:55:20 +0200 Subject: [PATCH 66/67] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index db484cfc..e0d1fb41 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit db484cfcbfc9bd633626935c2e4bffdae1bea13e +Subproject commit e0d1fb41787569faad82f4dcfdf4da17a4eddbf0 From 93b6a66a4419d685e31c37bc3e79ccea4be7e8a3 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 27 Jun 2025 16:56:00 +0200 Subject: [PATCH 67/67] perf: make REFINERY_ATTRIBUTE_ACCESS constants --- api/transfer.py | 4 +-- controller/project/manager.py | 21 ++++++++++++---- controller/record/manager.py | 46 +++++++++++++++++++++++------------ controller/transfer/checks.py | 11 ++++----- 4 files changed, 53 insertions(+), 29 deletions(-) diff --git a/api/transfer.py b/api/transfer.py index 809f5eb8..ac81bf16 100644 --- a/api/transfer.py +++ b/api/transfer.py @@ -196,7 +196,7 @@ def __calculate_missing_attributes(project_id: str, user_id: str) -> None: i += 1 if i >= 60: i = 0 - daemon.reset_session_token_in_thread() # TODO: this function does not exist + general.remove_and_refresh_session(request_new=True) if tokenization.is_doc_bin_creation_running_or_queued(project_id): time.sleep(2) continue @@ -211,7 +211,7 @@ def __calculate_missing_attributes(project_id: str, user_id: str) -> None: break if i >= 60: i = 0 - daemon.reset_session_token_in_thread() # TODO: this function does not exist + general.remove_and_refresh_session(request_new=True) current_att_id = attribute_ids[0] current_att = attribute.get(project_id, current_att_id) diff --git a/controller/project/manager.py b/controller/project/manager.py index b86cdbd1..92e3dedb 100644 --- a/controller/project/manager.py +++ b/controller/project/manager.py @@ -23,6 +23,10 @@ from controller.task_master import manager as task_master_manager from submodules.model.enums import TaskType, RecordTokenizationScope from submodules.model.business_objects import util as db_util +from submodules.model.integration_objects.helper import ( + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, +) from submodules.s3 import controller as s3 from service.search import search from controller.auth import kratos @@ -67,7 +71,10 @@ def activate_access_management(project_id): relative_position = 1 else: relative_position += 1 - filter_attributes = ["__ACCESS_GROUPS", "__ACCESS_USERS"] + filter_attributes = [ + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, + ] attribute.create( project_id=project_id, relative_position=relative_position, @@ -107,8 +114,12 @@ def activate_access_management(project_id): def deactivate_access_management(project_id: str) -> None: record.delete_access_management_attributes(project_id) - access_groups_attribute = attribute.get_by_name(project_id, "__ACCESS_GROUPS") - access_users_attribute = attribute.get_by_name(project_id, "__ACCESS_USERS") + access_groups_attribute = attribute.get_by_name( + project_id, REFINERY_ATTRIBUTE_ACCESS_GROUPS + ) + access_users_attribute = attribute.get_by_name( + project_id, REFINERY_ATTRIBUTE_ACCESS_USERS + ) if access_groups_attribute: attribute.delete(project_id, access_groups_attribute.id, with_commit=True) if access_users_attribute: @@ -116,8 +127,8 @@ def deactivate_access_management(project_id: str) -> None: def is_access_management_activated(project_id: str) -> bool: - access_groups = attribute.get_by_name(project_id, "__ACCESS_GROUPS") - access_users = attribute.get_by_name(project_id, "__ACCESS_USERS") + access_groups = attribute.get_by_name(project_id, REFINERY_ATTRIBUTE_ACCESS_GROUPS) + access_users = attribute.get_by_name(project_id, REFINERY_ATTRIBUTE_ACCESS_USERS) return access_groups is not None and access_users is not None diff --git a/controller/record/manager.py b/controller/record/manager.py index edd32c8a..25885ede 100644 --- a/controller/record/manager.py +++ b/controller/record/manager.py @@ -15,6 +15,10 @@ comments, project, ) +from submodules.model.integration_objects.helper import ( + REFINERY_ATTRIBUTE_ACCESS_GROUPS, + REFINERY_ATTRIBUTE_ACCESS_USERS, +) from service.search import search from submodules.model import enums from submodules.model import daemon @@ -365,10 +369,10 @@ def sync_access_groups_and_users_sharepoint( ) record_change_dict = {} for record_item in project_records: - if not record_item.data.get("__ACCESS_GROUPS"): + if not record_item.data.get(REFINERY_ATTRIBUTE_ACCESS_GROUPS): current_group_ids = [] else: - current_group_ids = record_item.data["__ACCESS_GROUPS"] + current_group_ids = record_item.data[REFINERY_ATTRIBUTE_ACCESS_GROUPS] meta_data_dict = json.loads(record_item.data.get("metadata", "{}")) permission_ids = meta_data_dict.get("permissions") @@ -379,15 +383,17 @@ def sync_access_groups_and_users_sharepoint( ] # Only update if new group ids differ from current group ids if not set(new_group_ids) == set(current_group_ids): - record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { - "attributeName": "__ACCESS_GROUPS", + record_change_dict[ + f"{str(record_item.id)}@{REFINERY_ATTRIBUTE_ACCESS_GROUPS}" + ] = { + "attributeName": REFINERY_ATTRIBUTE_ACCESS_GROUPS, "newValue": new_group_ids, "recordId": str(record_item.id), } - if not record_item.data.get("__ACCESS_USERS"): + if not record_item.data.get(REFINERY_ATTRIBUTE_ACCESS_USERS): current_user_ids = [] else: - current_user_ids = record_item.data["__ACCESS_USERS"] + current_user_ids = record_item.data[REFINERY_ATTRIBUTE_ACCESS_USERS] new_user_ids = [ permissions_users.get(permission_id) for permission_id in permission_ids @@ -396,8 +402,10 @@ def sync_access_groups_and_users_sharepoint( # Only update if new user ids differ from current user ids if not set(new_user_ids) == set(current_user_ids): extended_user_ids = new_user_ids - record_change_dict[f"{str(record_item.id)}@__ACCESS_USERS"] = { - "attributeName": "__ACCESS_USERS", + record_change_dict[ + f"{str(record_item.id)}@{REFINERY_ATTRIBUTE_ACCESS_USERS}" + ] = { + "attributeName": REFINERY_ATTRIBUTE_ACCESS_USERS, "newValue": extended_user_ids, "recordId": str(record_item.id), } @@ -434,27 +442,33 @@ def add_access_groups_or_users( records_to_change = record.get_by_record_ids(project_id, record_ids) if group_ids and len(group_ids) > 0: for record_item in records_to_change: - if not record_item.data.get("__ACCESS_GROUPS"): + if not record_item.data.get(REFINERY_ATTRIBUTE_ACCESS_GROUPS): current_group_ids = [] else: - current_group_ids = record_item.data["__ACCESS_GROUPS"] + current_group_ids = record_item.data[ + REFINERY_ATTRIBUTE_ACCESS_GROUPS + ] extended_group_ids = list( set(current_group_ids + group_ids) ) # remove duplicates - record_change_dict[f"{str(record_item.id)}@__ACCESS_GROUPS"] = { - "attributeName": "__ACCESS_GROUPS", + record_change_dict[ + f"{str(record_item.id)}@{REFINERY_ATTRIBUTE_ACCESS_GROUPS}" + ] = { + "attributeName": REFINERY_ATTRIBUTE_ACCESS_GROUPS, "newValue": extended_group_ids, "recordId": str(record_item.id), } if user_ids and len(user_ids) > 0: for record_item in records_to_change: - if not record_item.data.get("__ACCESS_USERS"): + if not record_item.data.get(REFINERY_ATTRIBUTE_ACCESS_USERS): current_user_ids = [] else: - current_user_ids = record_item.data["__ACCESS_USERS"] + current_user_ids = record_item.data[REFINERY_ATTRIBUTE_ACCESS_USERS] extended_user_ids = list(set(current_user_ids + user_ids)) - record_change_dict[f"{str(record_item.id)}@__ACCESS_USERS"] = { - "attributeName": "__ACCESS_USERS", + record_change_dict[ + f"{str(record_item.id)}@{REFINERY_ATTRIBUTE_ACCESS_USERS}" + ] = { + "attributeName": REFINERY_ATTRIBUTE_ACCESS_USERS, "newValue": extended_user_ids, "recordId": str(record_item.id), } diff --git a/controller/transfer/checks.py b/controller/transfer/checks.py index bff43101..af5e47b9 100644 --- a/controller/transfer/checks.py +++ b/controller/transfer/checks.py @@ -70,13 +70,12 @@ def run_checks(df: pd.DataFrame, project_id, user_id) -> None: # check attribute equality attribute_entities = attribute.get_all( project_id, - state_filter=[ - AttributeState.UPLOADED.value, - AttributeState.AUTOMATICALLY_CREATED.value, - ], + state_filter=[AttributeState.UPLOADED.value], ) - attribute_names = [attribute_item.name for attribute_item in attribute_entities] - differences = set(attribute_names).difference(set(attributes)) + attribute_names = [attribute_item.name for attribute_item in attribute_entities] + [ + attribute.get_running_id_name(project_id) + ] + differences = set(filter(None, attribute_names)).difference(set(attributes)) if differences: guard = True