apache · Revolyssup · Mar 12, 2025 · Mar 12, 2025
diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
@@ -125,7 +125,7 @@ local _M = {
       access_log_buffer = 16384,
       -- luacheck: push max code line length 300
       access_log_format =
-      '$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri"',
+      '$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri" "$ai_token_usage" "$ai_ttfb"',
       -- luacheck: pop
       access_log_format_escape = "default",
       keepalive_timeout = "60s",
@@ -271,6 +271,9 @@ local _M = {
   },
   stream_plugins = { "ip-restriction", "limit-conn", "mqtt-proxy", "syslog" },
   plugin_attr = {
+    ["ai-proxy"] = {
+      enable_access_log = true,
+    },
     ["log-rotate"] = {
       timeout = 10000,
       interval = 3600,

diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
@@ -588,7 +588,6 @@ http {
         set $upstream_scheme             'http';
         set $upstream_host               $http_host;
         set $upstream_uri                '';
-
         location /apisix/admin {
             {%if allow_admin then%}
                 {% for _, allow_ip in ipairs(allow_admin) do %}
@@ -747,7 +746,10 @@ http {
             set $dubbo_service_version       '';
             set $dubbo_method                '';
             {% end %}
-
+            {% if enable_ai_proxy_log then%}
+            set $ai_token_usage             '';
+            set $ai_ttfb                   '';
+            {% end %}
             access_by_lua_block {
                 apisix.http_access_phase()
             }

diff --git a/apisix/cli/ops.lua b/apisix/cli/ops.lua
@@ -558,7 +558,10 @@ Please modify "admin_key" in conf/config.yaml .
     if enabled_plugins["zipkin"] and yaml_conf.plugin_attr["zipkin"] then
         zipkin_set_ngx_var = yaml_conf.plugin_attr["zipkin"].set_ngx_var
     end
-
+    local enable_ai_proxy_log
+    if yaml_conf.plugin_attr["ai-proxy"].enable_access_log then
+        enable_ai_proxy_log = true
+    end
     -- Using template.render
     local sys_conf = {
         lua_path = env.pkg_path_org,
@@ -580,7 +583,8 @@ Please modify "admin_key" in conf/config.yaml .
         prometheus_server_addr = prometheus_server_addr,
         proxy_mirror_timeouts = proxy_mirror_timeouts,
         opentelemetry_set_ngx_var = opentelemetry_set_ngx_var,
-        zipkin_set_ngx_var = zipkin_set_ngx_var
+        zipkin_set_ngx_var = zipkin_set_ngx_var,
+        enable_ai_proxy_log = enable_ai_proxy_log,
     }
 
     if not yaml_conf.apisix then

diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua
@@ -30,7 +30,9 @@ local ngx_re = require("ngx.re")
 
 local ngx_print = ngx.print
 local ngx_flush = ngx.flush
-
+local ngx_var = ngx.var
+local ngx_now = ngx.now
+local ngx_req = ngx.req
 local pairs = pairs
 local type  = type
 local ipairs = ipairs
@@ -148,6 +150,7 @@ function _M.read_response(ctx, res)
     local content_type = res.headers["Content-Type"]
     core.response.set_header("Content-Type", content_type)
 
+    local time_to_first_byte
     if core.string.find(content_type, "text/event-stream") then
         while true do
             local chunk, err = body_reader() -- will read chunk by chunk
@@ -158,7 +161,9 @@ function _M.read_response(ctx, res)
             if not chunk then
                 break
             end
-
+            if not time_to_first_byte then
+                time_to_first_byte = ngx_now() - ngx_req.start_time()
+            end
             ngx_print(chunk)
             ngx_flush(true)
 
@@ -202,14 +207,18 @@ function _M.read_response(ctx, res)
 
             ::CONTINUE::
         end
+        ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage)
+        ngx_var.ai_ttfb = "ai_time_to_first_byte" .. time_to_first_byte
         return
     end
-
     local raw_res_body, err = res:read_body()
     if not raw_res_body then
         core.log.error("failed to read response body: ", err)
         return 500
     end
+    if not time_to_first_byte then
+        time_to_first_byte = ngx_now() - ngx_req.start_time()
+    end
     local res_body, err = core.json.decode(raw_res_body)
     if err then
         core.log.warn("invalid response body from ai service: ", raw_res_body, " err: ", err,
@@ -220,6 +229,8 @@ function _M.read_response(ctx, res)
             completion_tokens = res_body.usage and res_body.usage.completion_tokens or 0,
             total_tokens = res_body.usage and res_body.usage.total_tokens or 0,
         }
+        ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage)
+        ngx_var.ai_ttfb = "ai_time_to_first_byte(in seconds)=" .. time_to_first_byte
     end
     return res.status, raw_res_body
 end