Skip to content

Commit 8b80880

Browse files
feat: handle new endpoint to scan documents and create incidents for the found secrets
1 parent be13fe7 commit 8b80880

File tree

4 files changed

+197
-0
lines changed

4 files changed

+197
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Added
2+
3+
- New `GGClient.scan_and_create_incidents()` function that scans content for secrets and automatically creates incidents for any findings.

pygitguardian/client.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from io import BytesIO
99
from pathlib import Path
1010
from typing import Any, Dict, List, Optional, Union, cast
11+
from uuid import UUID
1112

1213
import requests
1314
from requests import Response, Session, codes
@@ -30,6 +31,7 @@
3031
Detail,
3132
Document,
3233
DocumentSchema,
34+
DocumentsForIncidentCreation,
3335
HealthCheckResponse,
3436
HoneytokenResponse,
3537
HoneytokenWithContextResponse,
@@ -510,6 +512,7 @@ def multi_content_scan(
510512
else:
511513
raise TypeError("each document must be a dict")
512514

515+
# Validate documents using DocumentSchema
513516
for document in request_obj:
514517
DocumentSchema.validate_size(
515518
document, self.secret_scan_preferences.maximum_document_size
@@ -538,6 +541,71 @@ def multi_content_scan(
538541

539542
return obj
540543

544+
def scan_and_create_incidents(
545+
self,
546+
documents: List[Dict[str, str]],
547+
source_uuid: UUID,
548+
extra_headers: Optional[Dict[str, str]] = None,
549+
params: Optional[Dict[str, Any]] = None,
550+
) -> Union[Detail, MultiScanResult]:
551+
"""
552+
scan_and_create_incidents handles the /scan/create-incidents endpoint of the API.
553+
554+
If documents contain `0` bytes, they will be replaced with the ASCII substitute
555+
character.
556+
557+
:param documents: List of dictionaries containing the keys document
558+
and, optionally, filename.
559+
example: [{"document":"example content","filename":"intro.py"}]
560+
:param source_uuid: the source UUID that will be used to identify the custom source, for which
561+
incidents will be created
562+
:param extra_headers: additional headers to add to the request
563+
:param params: additional parameters to add to the request
564+
:return: Detail or ScanResult response and status code
565+
"""
566+
max_documents = self.secret_scan_preferences.maximum_documents_per_scan
567+
if len(documents) > max_documents:
568+
raise ValueError(
569+
f"too many documents submitted for scan (max={max_documents})"
570+
)
571+
572+
if all(isinstance(doc, dict) for doc in documents):
573+
# Create Document objects directly from the input dictionaries
574+
document_objects = [
575+
Document(document=doc["document"], filename=doc.get("filename"))
576+
for doc in documents
577+
]
578+
else:
579+
raise TypeError("each document must be a dict")
580+
581+
# Validate documents using DocumentSchema
582+
for document in document_objects:
583+
DocumentSchema.validate_size(
584+
{"document": document.document, "filename": document.filename},
585+
self.secret_scan_preferences.maximum_document_size,
586+
)
587+
588+
payload = DocumentsForIncidentCreation(
589+
documents=document_objects, source_uuid=source_uuid
590+
)
591+
592+
resp = self.post(
593+
endpoint="scan/create-incidents",
594+
data=payload.SCHEMA.dump(payload),
595+
extra_headers=extra_headers,
596+
params=params,
597+
)
598+
599+
obj: Union[Detail, MultiScanResult]
600+
if is_ok(resp):
601+
obj = MultiScanResult.from_dict({"scan_results": resp.json()})
602+
else:
603+
obj = load_detail(resp)
604+
605+
obj.status_code = resp.status_code
606+
607+
return obj
608+
541609
def retrieve_secret_incident(
542610
self, incident_id: int, with_occurrences: int = 20
543611
) -> Union[Detail, SecretIncident]:

pygitguardian/models.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,43 @@ def __repr__(self) -> str:
9696
return f"filename:{self.filename}, document:{self.document}"
9797

9898

99+
class DocumentsForIncidentCreationSchema(BaseSchema):
100+
documents = fields.List(fields.Nested(DocumentSchema), required=True)
101+
source_uuid = fields.UUID(required=True)
102+
103+
@post_dump
104+
def transform_filename_to_document_identifier(
105+
self, data: Dict[str, Any], **kwargs: Any
106+
) -> Dict[str, Any]:
107+
"""Transform filename field to document_identifier in the documents list"""
108+
if "documents" in data:
109+
for document in data["documents"]:
110+
if "filename" in document:
111+
document["document_identifier"] = document.pop("filename")
112+
return data
113+
114+
115+
class DocumentsForIncidentCreation(Base):
116+
"""
117+
DocumentsForIncidentCreation is a request object for communicating a list of documents
118+
along with a source UUID to the API for incident creation
119+
120+
Attributes:
121+
documents (List[Document]): list of documents to scan
122+
source_uuid (UUID): UUID identifying the source
123+
"""
124+
125+
SCHEMA = DocumentsForIncidentCreationSchema()
126+
127+
def __init__(self, documents: List[Document], source_uuid: UUID, **kwargs: Any):
128+
super().__init__()
129+
self.documents = documents
130+
self.source_uuid = source_uuid
131+
132+
def __repr__(self) -> str:
133+
return f"documents:{len(self.documents)}, source_uuid:{self.source_uuid}"
134+
135+
99136
class DetailSchema(BaseSchema):
100137
detail = fields.String(required=True)
101138

@@ -757,6 +794,7 @@ class TokenScope(str, Enum):
757794
CUSTOM_TAGS_READ = "custom_tags:read"
758795
CUSTOM_TAGS_WRITE = "custom_tags:write"
759796
SECRET_READ = "secrets:read"
797+
SCAN_CREATE_INCIDENTS = "scan:create-incidents"
760798

761799

762800
class APITokensResponseSchema(BaseSchema):

tests/test_client.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,94 @@ def test_multiscan_parameters(client: GGClient, ignore_known_secrets, all_secret
606606
assert mock_response.call_count == 1
607607

608608

609+
@responses.activate
610+
@pytest.mark.parametrize("params", (None, {"param1": "value1"}))
611+
def test_scan_and_create_incidents_parameters(client: GGClient, params):
612+
"""
613+
GIVEN a ggclient
614+
WHEN calling scan_and_create_incidents with parameters
615+
THEN the parameters are passed in the request
616+
"""
617+
618+
to_match = {}
619+
if params is not None:
620+
to_match.update(params)
621+
622+
mock_response = responses.post(
623+
url=client._url_from_endpoint("scan/create-incidents", "v1"),
624+
status=200,
625+
match=[matchers.query_param_matcher(to_match)],
626+
json=[
627+
{
628+
"policy_break_count": 1,
629+
"policies": ["pol"],
630+
"policy_breaks": [
631+
{
632+
"type": "break",
633+
"detector_name": "break",
634+
"detector_group_name": "break",
635+
"documentation_url": None,
636+
"policy": "mypol",
637+
"matches": [
638+
{
639+
"match": "hello",
640+
"type": "hello",
641+
}
642+
],
643+
}
644+
],
645+
}
646+
],
647+
)
648+
649+
client.scan_and_create_incidents(
650+
[{"filename": FILENAME, "document": DOCUMENT}],
651+
source_uuid="123e4567-e89b-12d3-a456-426614174000",
652+
params=params,
653+
)
654+
655+
assert mock_response.call_count == 1
656+
657+
658+
@responses.activate
659+
def test_scan_and_create_incidents_payload_structure(client: GGClient):
660+
"""
661+
GIVEN a ggclient
662+
WHEN calling scan_and_create_incidents
663+
THEN the payload is structured correctly with documents and source_uuid
664+
"""
665+
666+
documents = [{"filename": FILENAME, "document": DOCUMENT}]
667+
source_uuid = "123e4567-e89b-12d3-a456-426614174000"
668+
669+
expected_payload = {
670+
"documents": [
671+
{
672+
"document": DOCUMENT,
673+
"document_identifier": FILENAME,
674+
}
675+
],
676+
"source_uuid": source_uuid,
677+
}
678+
679+
mock_response = responses.post(
680+
url=client._url_from_endpoint("scan/create-incidents", "v1"),
681+
status=200,
682+
match=[matchers.json_params_matcher(expected_payload)],
683+
json=[
684+
{
685+
"policy_break_count": 0,
686+
"policies": ["pol"],
687+
"policy_breaks": [],
688+
}
689+
],
690+
)
691+
692+
client.scan_and_create_incidents(documents, source_uuid)
693+
694+
assert mock_response.call_count == 1
695+
696+
609697
@responses.activate
610698
def test_retrieve_secret_incident(client: GGClient):
611699
"""

0 commit comments

Comments
 (0)