Skip to content

feat: add latency and token_usage info in ai-proxy access log #12042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion apisix/cli/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ local _M = {
access_log_buffer = 16384,
-- luacheck: push max code line length 300
access_log_format =
'$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri"',
'$remote_addr - $remote_user [$time_local] $http_host "$request" $status $body_bytes_sent $request_time "$http_referer" "$http_user_agent" $upstream_addr $upstream_status $upstream_response_time "$upstream_scheme://$upstream_host$upstream_uri" "$ai_token_usage" "$ai_ttfb"',
-- luacheck: pop
access_log_format_escape = "default",
keepalive_timeout = "60s",
Expand Down Expand Up @@ -271,6 +271,9 @@ local _M = {
},
stream_plugins = { "ip-restriction", "limit-conn", "mqtt-proxy", "syslog" },
plugin_attr = {
["ai-proxy"] = {
enable_access_log = true,
},
["log-rotate"] = {
timeout = 10000,
interval = 3600,
Expand Down
6 changes: 4 additions & 2 deletions apisix/cli/ngx_tpl.lua
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,6 @@ http {
set $upstream_scheme 'http';
set $upstream_host $http_host;
set $upstream_uri '';

location /apisix/admin {
{%if allow_admin then%}
{% for _, allow_ip in ipairs(allow_admin) do %}
Expand Down Expand Up @@ -747,7 +746,10 @@ http {
set $dubbo_service_version '';
set $dubbo_method '';
{% end %}

{% if enable_ai_proxy_log then%}
set $ai_token_usage '';
set $ai_ttfb '';
{% end %}
access_by_lua_block {
apisix.http_access_phase()
}
Expand Down
8 changes: 6 additions & 2 deletions apisix/cli/ops.lua
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,10 @@ Please modify "admin_key" in conf/config.yaml .
if enabled_plugins["zipkin"] and yaml_conf.plugin_attr["zipkin"] then
zipkin_set_ngx_var = yaml_conf.plugin_attr["zipkin"].set_ngx_var
end

local enable_ai_proxy_log
if yaml_conf.plugin_attr["ai-proxy"].enable_access_log then
enable_ai_proxy_log = true
end
-- Using template.render
local sys_conf = {
lua_path = env.pkg_path_org,
Expand All @@ -580,7 +583,8 @@ Please modify "admin_key" in conf/config.yaml .
prometheus_server_addr = prometheus_server_addr,
proxy_mirror_timeouts = proxy_mirror_timeouts,
opentelemetry_set_ngx_var = opentelemetry_set_ngx_var,
zipkin_set_ngx_var = zipkin_set_ngx_var
zipkin_set_ngx_var = zipkin_set_ngx_var,
enable_ai_proxy_log = enable_ai_proxy_log,
}

if not yaml_conf.apisix then
Expand Down
17 changes: 14 additions & 3 deletions apisix/plugins/ai-drivers/openai-base.lua
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ local ngx_re = require("ngx.re")

local ngx_print = ngx.print
local ngx_flush = ngx.flush

local ngx_var = ngx.var
local ngx_now = ngx.now
local ngx_req = ngx.req
local pairs = pairs
local type = type
local ipairs = ipairs
Expand Down Expand Up @@ -148,6 +150,7 @@ function _M.read_response(ctx, res)
local content_type = res.headers["Content-Type"]
core.response.set_header("Content-Type", content_type)

local time_to_first_byte
if core.string.find(content_type, "text/event-stream") then
while true do
local chunk, err = body_reader() -- will read chunk by chunk
Expand All @@ -158,7 +161,9 @@ function _M.read_response(ctx, res)
if not chunk then
break
end

if not time_to_first_byte then
time_to_first_byte = ngx_now() - ngx_req.start_time()
end
ngx_print(chunk)
ngx_flush(true)

Expand Down Expand Up @@ -202,14 +207,18 @@ function _M.read_response(ctx, res)

::CONTINUE::
end
ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage)
ngx_var.ai_ttfb = "ai_time_to_first_byte" .. time_to_first_byte
return
end

local raw_res_body, err = res:read_body()
if not raw_res_body then
core.log.error("failed to read response body: ", err)
return 500
end
if not time_to_first_byte then
time_to_first_byte = ngx_now() - ngx_req.start_time()
end
local res_body, err = core.json.decode(raw_res_body)
if err then
core.log.warn("invalid response body from ai service: ", raw_res_body, " err: ", err,
Expand All @@ -220,6 +229,8 @@ function _M.read_response(ctx, res)
completion_tokens = res_body.usage and res_body.usage.completion_tokens or 0,
total_tokens = res_body.usage and res_body.usage.total_tokens or 0,
}
ngx_var.ai_token_usage = "ai_token_usage=" .. core.json.encode(ctx.ai_token_usage)
ngx_var.ai_ttfb = "ai_time_to_first_byte(in seconds)=" .. time_to_first_byte
end
return res.status, raw_res_body
end
Expand Down
Loading