diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 942b00ebca..77c48e2e14 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17603,6 +17603,31 @@
         },
         "x-state": "Added in 8.11.0"
       },
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform inference on the service",
+        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-inference",
+        "parameters": [
+          {
+            "$ref": "#/components/parameters/inference.inference-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-timeout"
+          }
+        ],
+        "requestBody": {
+          "$ref": "#/components/requestBodies/inference.inference"
+        },
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.inference-200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
       "delete": {
         "tags": [
           "inference"
@@ -17675,6 +17700,34 @@
         },
         "x-state": "Added in 8.11.0"
       },
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform inference on the service",
+        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-inference-1",
+        "parameters": [
+          {
+            "$ref": "#/components/parameters/inference.inference-task_type"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-timeout"
+          }
+        ],
+        "requestBody": {
+          "$ref": "#/components/requestBodies/inference.inference"
+        },
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.inference-200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
       "delete": {
         "tags": [
           "inference"
@@ -78755,6 +78808,122 @@
       "inference._types.ServiceSettings": {
         "type": "object"
       },
+      "inference._types.InferenceResult": {
+        "description": "InferenceResult is an aggregation of mutually exclusive variants",
+        "type": "object",
+        "properties": {
+          "text_embedding_bytes": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding_bits": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingResult"
+            }
+          },
+          "sparse_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.SparseEmbeddingResult"
+            }
+          },
+          "completion": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.CompletionResult"
+            }
+          },
+          "rerank": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.RankedDocument"
+            }
+          }
+        },
+        "minProperties": 1,
+        "maxProperties": 1
+      },
+      "inference._types.TextEmbeddingByteResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.DenseByteVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.DenseByteVector": {
+        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/_types.byte"
+        }
+      },
+      "inference._types.TextEmbeddingResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.DenseVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.DenseVector": {
+        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+        "type": "array",
+        "items": {
+          "type": "number"
+        }
+      },
+      "inference._types.SparseEmbeddingResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.SparseVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.SparseVector": {
+        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+        "type": "object",
+        "additionalProperties": {
+          "type": "number"
+        }
+      },
+      "inference._types.RankedDocument": {
+        "type": "object",
+        "properties": {
+          "index": {
+            "type": "number"
+          },
+          "relevance_score": {
+            "type": "number"
+          },
+          "text": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "index",
+          "relevance_score"
+        ]
+      },
       "inference._types.AlibabaCloudTaskType": {
         "type": "string",
         "enum": [
@@ -79742,24 +79911,6 @@
           "rerank"
         ]
       },
-      "inference._types.RankedDocument": {
-        "type": "object",
-        "properties": {
-          "index": {
-            "type": "number"
-          },
-          "relevance_score": {
-            "type": "number"
-          },
-          "text": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "index",
-          "relevance_score"
-        ]
-      },
       "inference._types.SparseEmbeddingInferenceResult": {
         "type": "object",
         "properties": {
@@ -79774,24 +79925,6 @@
           "sparse_embedding"
         ]
       },
-      "inference._types.SparseEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.SparseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.SparseVector": {
-        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
-        "type": "object",
-        "additionalProperties": {
-          "type": "number"
-        }
-      },
       "inference._types.TextEmbeddingInferenceResult": {
         "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
         "type": "object",
@@ -79818,42 +79951,6 @@
         "minProperties": 1,
         "maxProperties": 1
       },
-      "inference._types.TextEmbeddingByteResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.DenseByteVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.DenseByteVector": {
-        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-        "type": "array",
-        "items": {
-          "$ref": "#/components/schemas/_types.byte"
-        }
-      },
-      "inference._types.TextEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.DenseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.DenseVector": {
-        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
-        "type": "array",
-        "items": {
-          "type": "number"
-        }
-      },
       "_types.ElasticsearchVersionInfo": {
         "type": "object",
         "properties": {
@@ -101893,6 +101990,16 @@
           }
         }
       },
+      "inference.inference-200": {
+        "description": "",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/inference._types.InferenceResult"
+            }
+          }
+        }
+      },
       "inference.put-200": {
         "description": "",
         "content": {
@@ -110165,6 +110272,38 @@
         },
         "style": "simple"
       },
+      "inference.inference-task_type": {
+        "in": "path",
+        "name": "task_type",
+        "description": "The type of inference task that the model performs.",
+        "required": true,
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/inference._types.TaskType"
+        },
+        "style": "simple"
+      },
+      "inference.inference-inference_id": {
+        "in": "path",
+        "name": "inference_id",
+        "description": "The unique identifier for the inference endpoint.",
+        "required": true,
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Id"
+        },
+        "style": "simple"
+      },
+      "inference.inference-timeout": {
+        "in": "query",
+        "name": "timeout",
+        "description": "The amount of time to wait for the inference request to complete.",
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Duration"
+        },
+        "style": "form"
+      },
       "inference.put-task_type": {
         "in": "path",
         "name": "task_type",
@@ -115915,6 +116054,41 @@
           }
         }
       },
+      "inference.inference": {
+        "content": {
+          "application/json": {
+            "schema": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
+                  "type": "string"
+                },
+                "input": {
+                  "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+                  "oneOf": [
+                    {
+                      "type": "string"
+                    },
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      }
+                    }
+                  ]
+                },
+                "task_settings": {
+                  "$ref": "#/components/schemas/inference._types.TaskSettings"
+                }
+              },
+              "required": [
+                "input"
+              ]
+            }
+          }
+        }
+      },
       "inference.put": {
         "content": {
           "application/json": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index e28668ea9d..fac1cb3e80 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9425,6 +9425,31 @@
         },
         "x-state": "Added in 8.11.0"
       },
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform inference on the service",
+        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-inference",
+        "parameters": [
+          {
+            "$ref": "#/components/parameters/inference.inference-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-timeout"
+          }
+        ],
+        "requestBody": {
+          "$ref": "#/components/requestBodies/inference.inference"
+        },
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.inference-200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
       "delete": {
         "tags": [
           "inference"
@@ -9497,6 +9522,34 @@
         },
         "x-state": "Added in 8.11.0"
       },
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform inference on the service",
+        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-inference-1",
+        "parameters": [
+          {
+            "$ref": "#/components/parameters/inference.inference-task_type"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.inference-timeout"
+          }
+        ],
+        "requestBody": {
+          "$ref": "#/components/requestBodies/inference.inference"
+        },
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.inference-200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
       "delete": {
         "tags": [
           "inference"
@@ -50040,6 +50093,122 @@
       "inference._types.ServiceSettings": {
         "type": "object"
       },
+      "inference._types.InferenceResult": {
+        "description": "InferenceResult is an aggregation of mutually exclusive variants",
+        "type": "object",
+        "properties": {
+          "text_embedding_bytes": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding_bits": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.TextEmbeddingResult"
+            }
+          },
+          "sparse_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.SparseEmbeddingResult"
+            }
+          },
+          "completion": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.CompletionResult"
+            }
+          },
+          "rerank": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types.RankedDocument"
+            }
+          }
+        },
+        "minProperties": 1,
+        "maxProperties": 1
+      },
+      "inference._types.TextEmbeddingByteResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.DenseByteVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.DenseByteVector": {
+        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/_types.byte"
+        }
+      },
+      "inference._types.TextEmbeddingResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.DenseVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.DenseVector": {
+        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+        "type": "array",
+        "items": {
+          "type": "number"
+        }
+      },
+      "inference._types.SparseEmbeddingResult": {
+        "type": "object",
+        "properties": {
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types.SparseVector"
+          }
+        },
+        "required": [
+          "embedding"
+        ]
+      },
+      "inference._types.SparseVector": {
+        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+        "type": "object",
+        "additionalProperties": {
+          "type": "number"
+        }
+      },
+      "inference._types.RankedDocument": {
+        "type": "object",
+        "properties": {
+          "index": {
+            "type": "number"
+          },
+          "relevance_score": {
+            "type": "number"
+          },
+          "text": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "index",
+          "relevance_score"
+        ]
+      },
       "inference._types.AlibabaCloudTaskType": {
         "type": "string",
         "enum": [
@@ -51027,24 +51196,6 @@
           "rerank"
         ]
       },
-      "inference._types.RankedDocument": {
-        "type": "object",
-        "properties": {
-          "index": {
-            "type": "number"
-          },
-          "relevance_score": {
-            "type": "number"
-          },
-          "text": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "index",
-          "relevance_score"
-        ]
-      },
       "inference._types.SparseEmbeddingInferenceResult": {
         "type": "object",
         "properties": {
@@ -51059,24 +51210,6 @@
           "sparse_embedding"
         ]
       },
-      "inference._types.SparseEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.SparseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.SparseVector": {
-        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
-        "type": "object",
-        "additionalProperties": {
-          "type": "number"
-        }
-      },
       "inference._types.TextEmbeddingInferenceResult": {
         "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
         "type": "object",
@@ -51103,42 +51236,6 @@
         "minProperties": 1,
         "maxProperties": 1
       },
-      "inference._types.TextEmbeddingByteResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.DenseByteVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.DenseByteVector": {
-        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-        "type": "array",
-        "items": {
-          "$ref": "#/components/schemas/_types.byte"
-        }
-      },
-      "inference._types.TextEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types.DenseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types.DenseVector": {
-        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
-        "type": "array",
-        "items": {
-          "type": "number"
-        }
-      },
       "_types.ElasticsearchVersionInfo": {
         "type": "object",
         "properties": {
@@ -61455,6 +61552,16 @@
           }
         }
       },
+      "inference.inference-200": {
+        "description": "",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/inference._types.InferenceResult"
+            }
+          }
+        }
+      },
       "inference.put-200": {
         "description": "",
         "content": {
@@ -65738,6 +65845,38 @@
         },
         "style": "simple"
       },
+      "inference.inference-task_type": {
+        "in": "path",
+        "name": "task_type",
+        "description": "The type of inference task that the model performs.",
+        "required": true,
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/inference._types.TaskType"
+        },
+        "style": "simple"
+      },
+      "inference.inference-inference_id": {
+        "in": "path",
+        "name": "inference_id",
+        "description": "The unique identifier for the inference endpoint.",
+        "required": true,
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Id"
+        },
+        "style": "simple"
+      },
+      "inference.inference-timeout": {
+        "in": "query",
+        "name": "timeout",
+        "description": "The amount of time to wait for the inference request to complete.",
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Duration"
+        },
+        "style": "form"
+      },
       "inference.put-task_type": {
         "in": "path",
         "name": "task_type",
@@ -68949,6 +69088,41 @@
           }
         }
       },
+      "inference.inference": {
+        "content": {
+          "application/json": {
+            "schema": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
+                  "type": "string"
+                },
+                "input": {
+                  "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+                  "oneOf": [
+                    {
+                      "type": "string"
+                    },
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      }
+                    }
+                  ]
+                },
+                "task_settings": {
+                  "$ref": "#/components/schemas/inference._types.TaskSettings"
+                }
+              },
+              "required": [
+                "input"
+              ]
+            }
+          }
+        }
+      },
       "inference.put": {
         "content": {
           "application/json": {
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index e1b088049c..1dbb746d8e 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4495,6 +4495,57 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference",
+      "name": "inference.inference",
+      "privileges": {
+        "cluster": [
+          "monitor_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.inference"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.inference"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{inference_id}"
+        },
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{task_type}/{inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -27442,6 +27493,140 @@
       },
       "specLocation": "inference/get/GetResponse.ts#L22-L26"
     },
+    {
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
+            "name": "query",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+            "name": "input",
+            "required": true,
+            "type": {
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "array_of",
+                  "value": {
+                    "kind": "instance_of",
+                    "type": {
+                      "name": "string",
+                      "namespace": "_builtins"
+                    }
+                  }
+                }
+              ],
+              "kind": "union_of"
+            }
+          },
+          {
+            "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
+            "name": "task_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TaskSettings",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "kind": "request",
+      "name": {
+        "name": "Request",
+        "namespace": "inference.inference"
+      },
+      "path": [
+        {
+          "description": "The type of inference task that the model performs.",
+          "name": "task_type",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier for the inference endpoint.",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [
+        {
+          "description": "The amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/inference/InferenceRequest.ts#L26-L89"
+    },
+    {
+      "body": {
+        "codegenName": "result",
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceResult",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "kind": "response",
+      "name": {
+        "name": "Response",
+        "namespace": "inference.inference"
+      },
+      "specLocation": "inference/inference/InferenceResponse.ts#L22-L25"
+    },
     {
       "attachedBehaviors": [
         "CommonQueryParameters"
@@ -124977,6 +125162,213 @@
       ],
       "specLocation": "inference/_types/Services.ts#L60-L89"
     },
+    {
+      "description": "InferenceResult is an aggregation of mutually exclusive variants",
+      "kind": "interface",
+      "name": {
+        "name": "InferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "text_embedding_bytes",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding_bits",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "sparse_embedding",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "SparseEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "completion",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "rerank",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "RankedDocument",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L117-L128",
+      "variants": {
+        "kind": "container"
+      }
+    },
+    {
+      "description": "The text embedding result object for byte representation",
+      "kind": "interface",
+      "name": {
+        "name": "TextEmbeddingByteResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "embedding",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "DenseByteVector",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L53-L58"
+    },
+    {
+      "description": "The text embedding result object",
+      "kind": "interface",
+      "name": {
+        "name": "TextEmbeddingResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "embedding",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "DenseVector",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L60-L65"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "SparseEmbeddingResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "embedding",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "SparseVector",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L36-L38"
+    },
+    {
+      "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested",
+      "kind": "interface",
+      "name": {
+        "name": "RankedDocument",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "index",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "name": "relevance_score",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "name": "text",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L91-L101"
+    },
     {
       "kind": "interface",
       "name": {
@@ -126657,50 +127049,6 @@
       ],
       "specLocation": "inference/_types/Results.ts#L103-L108"
     },
-    {
-      "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested",
-      "kind": "interface",
-      "name": {
-        "name": "RankedDocument",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "name": "index",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "name": "relevance_score",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "name": "text",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L91-L101"
-    },
     {
       "description": "The response format for the sparse embedding request.",
       "kind": "interface",
@@ -126726,27 +127074,6 @@
       ],
       "specLocation": "inference/_types/Results.ts#L40-L45"
     },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "SparseEmbeddingResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "name": "embedding",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "SparseVector",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L36-L38"
-    },
     {
       "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
       "kind": "interface",
@@ -126803,50 +127130,6 @@
         "kind": "container"
       }
     },
-    {
-      "description": "The text embedding result object for byte representation",
-      "kind": "interface",
-      "name": {
-        "name": "TextEmbeddingByteResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "name": "embedding",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "DenseByteVector",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L53-L58"
-    },
-    {
-      "description": "The text embedding result object",
-      "kind": "interface",
-      "name": {
-        "name": "TextEmbeddingResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "name": "embedding",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "DenseVector",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L60-L65"
-    },
     {
       "kind": "interface",
       "name": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 065c5ff942..16197e3561 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9340,6 +9340,57 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference",
+      "name": "inference.inference",
+      "privileges": {
+        "cluster": [
+          "monitor_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.inference"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.inference"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{inference_id}"
+        },
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{task_type}/{inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -152341,6 +152392,104 @@
       ],
       "specLocation": "inference/_types/Services.ts#L46-L58"
     },
+    {
+      "kind": "interface",
+      "description": "InferenceResult is an aggregation of mutually exclusive variants",
+      "name": {
+        "name": "InferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "text_embedding_bytes",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding_bits",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "sparse_embedding",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "SparseEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "completion",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "rerank",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "RankedDocument",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L117-L128",
+      "variants": {
+        "kind": "container"
+      }
+    },
     {
       "kind": "interface",
       "name": {
@@ -153989,6 +154138,140 @@
       },
       "specLocation": "inference/get/GetResponse.ts#L22-L26"
     },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
+            "name": "query",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+            "name": "input",
+            "required": true,
+            "type": {
+              "kind": "union_of",
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "array_of",
+                  "value": {
+                    "kind": "instance_of",
+                    "type": {
+                      "name": "string",
+                      "namespace": "_builtins"
+                    }
+                  }
+                }
+              ]
+            }
+          },
+          {
+            "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
+            "name": "task_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TaskSettings",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.inference"
+      },
+      "path": [
+        {
+          "description": "The type of inference task that the model performs.",
+          "name": "task_type",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier for the inference endpoint.",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [
+        {
+          "description": "The amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/inference/InferenceRequest.ts#L26-L89"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "codegenName": "result",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceResult",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.inference"
+      },
+      "specLocation": "inference/inference/InferenceResponse.ts#L22-L25"
+    },
     {
       "kind": "request",
       "attachedBehaviors": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 25445b34b4..8a697cb376 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13387,6 +13387,15 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi
   task_type: InferenceTaskType
 }
 
+export interface InferenceInferenceResult {
+  text_embedding_bytes?: InferenceTextEmbeddingByteResult[]
+  text_embedding_bits?: InferenceTextEmbeddingByteResult[]
+  text_embedding?: InferenceTextEmbeddingResult[]
+  sparse_embedding?: InferenceSparseEmbeddingResult[]
+  completion?: InferenceCompletionResult[]
+  rerank?: InferenceRankedDocument[]
+}
+
 export interface InferenceJinaAIServiceSettings {
   api_key: string
   model_id?: string
@@ -13579,6 +13588,19 @@ export interface InferenceGetResponse {
   endpoints: InferenceInferenceEndpointInfo[]
 }
 
+export interface InferenceInferenceRequest extends RequestBase {
+  task_type?: InferenceTaskType
+  inference_id: Id
+  timeout?: Duration
+  body?: {
+    query?: string
+    input: string | string[]
+    task_settings?: InferenceTaskSettings
+  }
+}
+
+export type InferenceInferenceResponse = InferenceInferenceResult
+
 export interface InferencePutRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
diff --git a/specification/_json_spec/inference.inference.json b/specification/_json_spec/inference.inference.json
new file mode 100644
index 0000000000..bf1282dfaa
--- /dev/null
+++ b/specification/_json_spec/inference.inference.json
@@ -0,0 +1,45 @@
+{
+  "inference.inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        },
+        {
+          "path": "/_inference/{task_type}/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
index 72c6e08adc..6557b180fd 100644
--- a/specification/inference/_types/Results.ts
+++ b/specification/inference/_types/Results.ts
@@ -113,3 +113,16 @@ export class RerankedInferenceResult {
 export class DeleteInferenceEndpointResult extends AcknowledgedResponseBase {
   pipelines: Array<string>
 }
+
+/**
+ * InferenceResult is an aggregation of mutually exclusive variants
+ * @variants container
+ */
+export class InferenceResult {
+  text_embedding_bytes?: Array<TextEmbeddingByteResult>
+  text_embedding_bits?: Array<TextEmbeddingByteResult>
+  text_embedding?: Array<TextEmbeddingResult>
+  sparse_embedding?: Array<SparseEmbeddingResult>
+  completion?: Array<CompletionResult>
+  rerank?: Array<RankedDocument>
+}
diff --git a/specification/inference/inference/InferenceRequest.ts b/specification/inference/inference/InferenceRequest.ts
new file mode 100644
index 0000000000..c5a70fe181
--- /dev/null
+++ b/specification/inference/inference/InferenceRequest.ts
@@ -0,0 +1,91 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TaskSettings } from '@inference/_types/Services'
+import { TaskType } from '@inference/_types/TaskType'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+
+/**
+ * Perform inference on the service.
+ *
+ * This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
+ * It returns a response with the results of the tasks.
+ * The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
+ *
+ * For details about using this API with a service, such as Amazon Bedrock, Anthropic, or HuggingFace, refer to the service-specific documentation.
+ *
+ * > info
+ * > The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+ * @rest_spec_name inference.inference
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges monitor_inference
+ * @doc_id inference-api-post
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{inference_id}'
+      methods: ['POST']
+    },
+    {
+      path: '/_inference/{task_type}/{inference_id}'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of inference task that the model performs.
+     */
+    task_type?: TaskType
+    /**
+     * The unique identifier for the inference endpoint.
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * The amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The query input, which is required only for the `rerank` task.
+     * It is not required for other tasks.
+     */
+    query?: string
+    /**
+     * The text on which you want to perform the inference task.
+     * It can be a single string or an array.
+     *
+     * > info
+     * > Inference endpoints for the `completion` task type currently only support a single string as input.
+     */
+    input: string | Array<string>
+    /**
+     * Task settings for the individual inference request.
+     * These settings are specific to the task type you specified and override the task settings specified when initializing the service.
+     */
+    task_settings?: TaskSettings
+  }
+}
diff --git a/specification/inference/inference/InferenceResponse.ts b/specification/inference/inference/InferenceResponse.ts
new file mode 100644
index 0000000000..cc95630ef3
--- /dev/null
+++ b/specification/inference/inference/InferenceResponse.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceResult } from '@inference/_types/Results'
+
+export class Response {
+  /** @codegen_name result */
+  body: InferenceResult
+}