-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathproxy_server.py
79 lines (67 loc) · 2.45 KB
/
proxy_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from llama_cpp_runner.main import LlamaCpp
app = FastAPI(title="LlamaCpp Proxy")
# Initialize the LlamaCpp class
models_dir = os.environ.get("MODELS_DIR", "/models")
cache_dir = os.environ.get("CACHE_DIR", "/cache")
verbose = os.environ.get("VERBOSE", "true").lower() == "true"
timeout = int(os.environ.get("TIMEOUT_MINUTES", "30"))
print(f"Models directory: {models_dir}")
print(f"Cache directory: {cache_dir}")
# Create the LlamaCpp instance
llama_runner = LlamaCpp(
models_dir=models_dir,
cache_dir=cache_dir,
verbose=verbose,
timeout_minutes=timeout
)
@app.get("/")
def read_root():
"""Get server status and list of available models."""
return {"status": "running", "models": llama_runner.list_models()}
@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
"""Forward chat completion requests to the LlamaCpp server."""
try:
body = await request.json()
if "model" not in body:
return JSONResponse(
status_code=400,
content={"error": "Model not specified in request"}
)
try:
result = llama_runner.chat_completion(body)
# Handle streaming responses
if body.get("stream", False):
async def generate():
for line in result:
if line:
yield f"data: {line}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
else:
return result
except Exception as e:
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
except Exception as e:
return JSONResponse(
status_code=400,
content={"error": f"Invalid request: {str(e)}"}
)
@app.get("/models")
def list_models():
"""List all available models."""
return {"models": llama_runner.list_models()}
if __name__ == "__main__":
print("Starting LlamaCpp Proxy Server on port 3636")
models = llama_runner.list_models()
print(f"Available models: {models}")
if not models:
print("WARNING: No models found in the models directory.")
uvicorn.run(app, host="0.0.0.0", port=3636)