feat(iast): add support for langchain v0.1.0+

smola · smola · commit e170cc991f1a · 2025-04-23T12:41:49.000+02:00
diff --git a/ddtrace/contrib/internal/langchain/patch.py b/ddtrace/contrib/internal/langchain/patch.py
@@ -196,6 +196,9 @@ def traced_llm_generate(langchain, pin, func, instance, args, kwargs):
                 span.set_tag_str("langchain.request.%s.parameters.%s" % (llm_provider, param), str(val))
 
         completions = func(*args, **kwargs)
+
+        _iast_taint_llm_output(prompts, completions)
+
         if _is_openai_llm_instance(instance):
             _tag_openai_token_usage(span, completions.llm_output)
 
@@ -942,6 +945,57 @@ async def traced_base_tool_ainvoke(langchain, pin, func, instance, args, kwargs)
     return tool_output
 
 
+def _iast_taint_llm_output(prompts, completions):
+    """
+    Taints the output of an LLM call if its inputs are tainted.
+
+    Range propagation does not make sense in LLMs. So we get the first source in inputs, if any,
+    and taint the full output with that source.
+    """
+    if not asm_config._iast_enabled:
+        return
+    if not isinstance(prompts, (tuple, list)):
+        return
+    if not hasattr(completions, "generations"):
+        return
+    try:
+        generations = completions.generations
+        if not isinstance(generations, list):
+            return
+
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
+
+        source = None
+        for prompt in prompts:
+            if not isinstance(prompt, str):
+                continue
+            tainted_ranges = get_tainted_ranges(prompt)
+            if tainted_ranges:
+                source = tainted_ranges[0].source
+                break
+        if not source:
+            return
+        for gens in generations:
+            for gen in gens:
+                if not hasattr(gen, "text"):
+                    continue
+                text = gen.text
+                if not isinstance(text, str):
+                    continue
+                new_text = taint_pyobject(
+                    pyobject=text,
+                    source_name=source.name,
+                    source_value=source.value,
+                    source_origin=source.origin,
+                )
+                setattr(gen, "text", new_text)
+    except Exception as e:
+        from ddtrace.appsec._iast._metrics import _set_iast_error_metric
+
+        _set_iast_error_metric("IAST propagation error. langchain _iast_taint_llm_output. {}".format(e))
+
+
 def _patch_embeddings_and_vectorstores():
     """
     Text embedding models override two abstract base methods instead of super calls,
@@ -1081,10 +1135,15 @@ def patch():
     if asm_config._iast_enabled:
         from ddtrace.appsec._iast._metrics import _set_iast_error_metric
 
+        wrap("langchain_core", "prompts.prompt.PromptTemplate.format", iast_propagate_prompt_template_format)
+
         def wrap_output_parser(module, parser):
             # Ensure not double patched
             if not isinstance(deep_getattr(module, "%s.parse" % parser), wrapt.ObjectProxy):
+                print(f"PATCHING wrap_output_parser {module} {parser}")
                 wrap(module, "%s.parse" % parser, taint_parser_output)
+            else:
+                print(f"NOT PATCHING wrap_output_parser {module} {parser}")
 
         try:
             with_agent_output_parser(wrap_output_parser)
@@ -1125,13 +1184,37 @@ def unpatch():
     delattr(langchain, "_datadog_integration")
 
 
-def taint_parser_output(func, instance, args, kwargs):
-    from ddtrace.appsec._iast._metrics import _set_iast_error_metric
-    from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
-    from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
+def iast_propagate_prompt_template_format(func, instance, args, kwargs):
+    result = func(*args, **kwargs)
+    try:
+        if not asm_config.is_iast_request_enabled:
+            return result
+
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
+
+        source = None
+        for value in kwargs.values():
+            ranges = get_tainted_ranges(value)
+            if ranges:
+                source = ranges[0].source
+                break
+        if source:
+            return taint_pyobject(result, source.name, source.value, source.origin)
+
+    except Exception as e:
+        from ddtrace.appsec._iast._metrics import _set_iast_error_metric
+
+        _set_iast_error_metric("IAST propagation error. langchain iast_propagate_prompt_template_format. {}".format(e))
+    return result
+
 
+def taint_parser_output(func, instance, args, kwargs):
     result = func(*args, **kwargs)
     try:
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
+        from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
+
         try:
             from langchain_core.agents import AgentAction
             from langchain_core.agents import AgentFinish
@@ -1147,6 +1230,8 @@ def taint_parser_output(func, instance, args, kwargs):
                 values = result.return_values
                 values["output"] = taint_pyobject(values["output"], source.name, source.value, source.origin)
     except Exception as e:
+        from ddtrace.appsec._iast._metrics import _set_iast_error_metric
+
         _set_iast_error_metric("IAST propagation error. langchain taint_parser_output. {}".format(e))
 
     return result
diff --git a/hatch.toml b/hatch.toml
@@ -571,6 +571,45 @@ fastapi = ["==0.94.1"]
 python = ["3.8", "3.10", "3.13"]
 fastapi = ["~=0.114.2"]
 
+## ASM appsec_integrations_langchain
+
+[envs.appsec_integrations_langchain]
+template = "appsec_integrations_langchain"
+dependencies = [
+    "pytest",
+    "pytest-cov",
+    "langchain{matrix:langchain:}",
+    "langchain-experimental{matrix:langchain-experimental:}",
+]
+
+[envs.appsec_integrations_langchain.env-vars]
+DD_TRACE_AGENT_URL = "http://testagent:9126"
+_DD_IAST_PATCH_MODULES = "benchmarks.,tests.appsec."
+DD_IAST_REQUEST_SAMPLING = "100"
+DD_IAST_DEDUPLICATION_ENABLED = "false"
+
+[envs.appsec_integrations_langchain.scripts]
+test = [
+    "uname -a",
+    "pip freeze",
+    "python -m pytest -vvv {args:tests/appsec/integrations/langchain_tests/}",
+]
+
+[[envs.appsec_integrations_langchain.matrix]]
+python = ["3.9", "3.10", "3.11", "3.12", "3.13"]
+langchain = ["~=0.1"]
+langchain-experimental = ["~=0.1"]
+
+[[envs.appsec_integrations_langchain.matrix]]
+python = ["3.9", "3.10", "3.11", "3.12", "3.13"]
+langchain = ["~=0.2"]
+langchain-experimental = ["~=0.2"]
+
+[[envs.appsec_integrations_langchain.matrix]]
+python = ["3.9", "3.10", "3.11", "3.12", "3.13"]
+langchain = ["~=0.3"]
+langchain-experimental = ["~=0.3"]
+
 ## ASM FastAPI
 
 [envs.appsec_threats_fastapi]
diff --git a/releasenotes/notes/iast-langchain-0.1.0-e437ea90fe66ad31.yaml b/releasenotes/notes/iast-langchain-0.1.0-e437ea90fe66ad31.yaml
@@ -0,0 +1,4 @@
+---
+features:
+  - |
+    Code Security: IAST support for langchain v0.1.0 and above.
diff --git a/tests/appsec/iast/iast_utils.py b/tests/appsec/iast/iast_utils.py
@@ -52,6 +52,7 @@ def get_line_and_hash(label: Text, vuln_type: Text, filename=None, fixed_line=No
 def _iast_patched_module_and_patched_source(module_name, new_module_object=False):
     module = importlib.import_module(module_name)
     module_path, patched_source = astpatch_module(module)
+    assert patched_source is not None
     compiled_code = compile(patched_source, module_path, "exec")
     module_changed = types.ModuleType(module_name) if new_module_object else module
     exec(compiled_code, module_changed.__dict__)
diff --git a/tests/appsec/integrations/fixtures/patch_langchain.py b/tests/appsec/integrations/fixtures/patch_langchain.py
diff --git a/tests/appsec/integrations/langchain_tests/conftest.py b/tests/appsec/integrations/langchain_tests/conftest.py
@@ -0,0 +1,17 @@
+from ddtrace.appsec._iast import enable_iast_propagation
+from ddtrace.appsec._iast._patch_modules import patch_iast
+from tests.utils import override_env
+from tests.utils import override_global_config
+
+
+# `pytest` automatically calls this function once when tests are run.
+def pytest_configure():
+    with override_global_config(
+        dict(
+            _iast_enabled=True,
+            _iast_deduplication_enabled=False,
+            _iast_request_sampling=100.0,
+        )
+    ), override_env(dict(_DD_IAST_PATCH_MODULES="tests.appsec.integrations")):
+        patch_iast()
+        enable_iast_propagation()
diff --git a/tests/appsec/integrations/langchain_tests/test_iast_langchain.py b/tests/appsec/integrations/langchain_tests/test_iast_langchain.py
@@ -1,34 +1,39 @@
-import pytest
+from langchain.agents import AgentType
+from langchain.agents import initialize_agent
+from langchain_community.tools.shell.tool import ShellTool
+from langchain_core.language_models.fake import FakeListLLM
 
 from ddtrace.appsec._iast.constants import VULN_CMDI
-from ddtrace.internal.module import is_module_installed
-from tests.appsec.iast.conftest import iast_context_defaults  # noqa: F401
-from tests.appsec.iast.iast_utils import _iast_patched_module
+from tests.appsec.iast.conftest import iast_span_defaults  # noqa: F401
 from tests.appsec.iast.iast_utils import get_line_and_hash
 from tests.appsec.iast.taint_sinks.conftest import _get_span_report
 from tests.utils import override_env
 
 
-FIXTURES_PATH = "tests/appsec/integrations/fixtures/patch_langchain.py"
-FIXTURES_MODULE = "tests.appsec.integrations.fixtures.patch_langchain"
-
 with override_env({"DD_IAST_ENABLED": "True"}):
     from ddtrace.appsec._iast._taint_tracking import OriginType
     from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
 
+TEST_FILE = "tests/appsec/integrations/langchain_tests/test_iast_langchain.py"
+
+
+def test_openai_llm_appsec_iast_cmdi(iast_span_defaults):  # noqa: F811
+    responses = ["Action: terminal\nAction Input: echo Hello World", "Final Answer: 4"]
+    llm = FakeListLLM(responses=responses)
+    shell = ShellTool()
+    shell_chain = initialize_agent([shell], llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
 
-@pytest.mark.skipif(not is_module_installed("langchain"), reason="Langchain tests work on 3.9 or higher")
-def test_openai_llm_appsec_iast_cmdi(iast_context_defaults):  # noqa: F811
-    mod = _iast_patched_module(FIXTURES_MODULE)
     string_to_taint = "I need to use the terminal tool to print a Hello World"
     prompt = taint_pyobject(
         pyobject=string_to_taint,
         source_name="test_openai_llm_appsec_iast_cmdi",
         source_value=string_to_taint,
         source_origin=OriginType.PARAMETER,
     )
-    res = mod.patch_langchain(prompt)
-    assert res == "4"
+
+    # label test_openai_llm_appsec_iast_cmdi
+    res = shell_chain.invoke(prompt)
+    assert res["output"] == "4"
 
     span_report = _get_span_report()
     assert span_report
@@ -48,9 +53,9 @@ def test_openai_llm_appsec_iast_cmdi(iast_context_defaults):  # noqa: F811
     assert source["origin"] == OriginType.PARAMETER
     assert "value" not in source.keys()
 
-    line, hash_value = get_line_and_hash("test_openai_llm_appsec_iast_cmdi", VULN_CMDI, filename=FIXTURES_PATH)
-    assert vulnerability["location"]["path"] == FIXTURES_PATH
+    line, hash_value = get_line_and_hash("test_openai_llm_appsec_iast_cmdi", VULN_CMDI, filename=TEST_FILE)
+    assert vulnerability["location"]["path"] == TEST_FILE
     assert vulnerability["location"]["line"] == line
-    assert vulnerability["location"]["method"] == "patch_langchain"
+    assert vulnerability["location"]["method"] == "test_openai_llm_appsec_iast_cmdi"
     assert vulnerability["location"]["class_name"] == ""
     assert vulnerability["hash"] == hash_value

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +features:
 +  - |
 +    Code Security: IAST support for langchain v0.1.0 and above.