diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua index 56af978c2c70..9a3f1ef304e8 100644 --- a/apisix/cli/config.lua +++ b/apisix/cli/config.lua @@ -125,7 +125,7 @@ local _M = { access_log_buffer = 16384, -- luacheck: push max code line length 300 access_log_format = - '$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri"', + '$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri" "$ai_token_usage" "$ai_ttfb"', -- luacheck: pop access_log_format_escape = "default", keepalive_timeout = "60s", @@ -271,6 +271,9 @@ local _M = { }, stream_plugins = { "ip-restriction", "limit-conn", "mqtt-proxy", "syslog" }, plugin_attr = { + ["ai-proxy"] = { + enable_access_log = true, + }, ["log-rotate"] = { timeout = 10000, interval = 3600, diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua index 4b7ff4102bc1..694ece511be3 100644 --- a/apisix/cli/ngx_tpl.lua +++ b/apisix/cli/ngx_tpl.lua @@ -588,7 +588,6 @@ http { set $upstream_scheme 'http'; set $upstream_host $http_host; set $upstream_uri ''; - location /apisix/admin { {%if allow_admin then%} {% for _, allow_ip in ipairs(allow_admin) do %} @@ -747,7 +746,10 @@ http { set $dubbo_service_version ''; set $dubbo_method ''; {% end %} - + {% if enable_ai_proxy_log then%} + set $ai_token_usage ''; + set $ai_ttfb ''; + {% end %} access_by_lua_block { apisix.http_access_phase() } diff --git a/apisix/cli/ops.lua b/apisix/cli/ops.lua index b73fa6701740..ff4acc471092 100644 --- a/apisix/cli/ops.lua +++ b/apisix/cli/ops.lua @@ -558,7 +558,10 @@ Please modify "admin_key" in conf/config.yaml . if enabled_plugins["zipkin"] and yaml_conf.plugin_attr["zipkin"] then zipkin_set_ngx_var = yaml_conf.plugin_attr["zipkin"].set_ngx_var end - + local enable_ai_proxy_log + if yaml_conf.plugin_attr["ai-proxy"].enable_access_log then + enable_ai_proxy_log = true + end -- Using template.render local sys_conf = { lua_path = env.pkg_path_org, @@ -580,7 +583,8 @@ Please modify "admin_key" in conf/config.yaml . prometheus_server_addr = prometheus_server_addr, proxy_mirror_timeouts = proxy_mirror_timeouts, opentelemetry_set_ngx_var = opentelemetry_set_ngx_var, - zipkin_set_ngx_var = zipkin_set_ngx_var + zipkin_set_ngx_var = zipkin_set_ngx_var, + enable_ai_proxy_log = enable_ai_proxy_log, } if not yaml_conf.apisix then diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua index 4f0b38afe3f8..0b1b0fd394d0 100644 --- a/apisix/plugins/ai-drivers/openai-base.lua +++ b/apisix/plugins/ai-drivers/openai-base.lua @@ -30,7 +30,9 @@ local ngx_re = require("ngx.re") local ngx_print = ngx.print local ngx_flush = ngx.flush - +local ngx_var = ngx.var +local ngx_now = ngx.now +local ngx_req = ngx.req local pairs = pairs local type = type local ipairs = ipairs @@ -148,6 +150,7 @@ function _M.read_response(ctx, res) local content_type = res.headers["Content-Type"] core.response.set_header("Content-Type", content_type) + local time_to_first_byte if core.string.find(content_type, "text/event-stream") then while true do local chunk, err = body_reader() -- will read chunk by chunk @@ -158,7 +161,9 @@ function _M.read_response(ctx, res) if not chunk then break end - + if not time_to_first_byte then + time_to_first_byte = ngx_now() - ngx_req.start_time() + end ngx_print(chunk) ngx_flush(true) @@ -202,14 +207,18 @@ function _M.read_response(ctx, res) ::CONTINUE:: end + ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage) + ngx_var.ai_ttfb = "ai_time_to_first_byte" .. time_to_first_byte return end - local raw_res_body, err = res:read_body() if not raw_res_body then core.log.error("failed to read response body: ", err) return 500 end + if not time_to_first_byte then + time_to_first_byte = ngx_now() - ngx_req.start_time() + end local res_body, err = core.json.decode(raw_res_body) if err then core.log.warn("invalid response body from ai service: ", raw_res_body, " err: ", err, @@ -220,6 +229,8 @@ function _M.read_response(ctx, res) completion_tokens = res_body.usage and res_body.usage.completion_tokens or 0, total_tokens = res_body.usage and res_body.usage.total_tokens or 0, } + ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage) + ngx_var.ai_ttfb = "ai_time_to_first_byte(in seconds)=" .. time_to_first_byte end return res.status, raw_res_body end