Skip to content

Commit 1e8e1d5

Browse files
authored
feat(wren-ai-service): user guide and misleading streaming(ai-env-changed) (#1015)
1 parent 2c49905 commit 1e8e1d5

23 files changed

+540
-135
lines changed

Diff for: deployment/kustomizations/base/cm.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ data:
154154
llm: litellm_llm.default
155155
embedder: litellm_embedder.default
156156
document_store: qdrant
157+
- name: misleading_assistance
158+
llm: litellm_llm.default
157159
- name: data_assistance
158160
llm: litellm_llm.default
159161
- name: sql_pairs_indexing
@@ -171,6 +173,8 @@ data:
171173
llm: litellm_llm.default
172174
- name: chart_adjustment
173175
llm: litellm_llm.default
176+
- name: user_guide_assistance
177+
llm: litellm_llm.default
174178
- name: sql_question_generation
175179
llm: litellm_llm.default
176180
- name: sql_generation_reasoning
@@ -191,9 +195,10 @@ data:
191195
document_store: qdrant
192196
- name: project_meta_indexing
193197
document_store: qdrant
194-
195198
---
196199
settings:
200+
doc_endpoint: https://docs.getwren.ai
201+
is_oss: true
197202
engine_timeout: 30
198203
column_indexing_batch_size: 50
199204
table_retrieval_size: 10

Diff for: docker/config.example.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ pipes:
106106
llm: litellm_llm.default
107107
embedder: litellm_embedder.default
108108
document_store: qdrant
109+
- name: misleading_assistance
110+
llm: litellm_llm.default
109111
- name: data_assistance
110112
llm: litellm_llm.default
111113
- name: sql_pairs_indexing
@@ -123,6 +125,8 @@ pipes:
123125
llm: litellm_llm.default
124126
- name: chart_adjustment
125127
llm: litellm_llm.default
128+
- name: user_guide_assistance
129+
llm: litellm_llm.default
126130
- name: sql_question_generation
127131
llm: litellm_llm.default
128132
- name: sql_generation_reasoning
@@ -146,6 +150,8 @@ pipes:
146150

147151
---
148152
settings:
153+
doc_endpoint: https://docs.getwren.ai
154+
is_oss: true
149155
engine_timeout: 30
150156
column_indexing_batch_size: 50
151157
table_retrieval_size: 10

Diff for: wren-ai-service/docs/config_examples/config.anthropic.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pipes:
101101
llm: litellm_llm.default
102102
embedder: litellm_embedder.default
103103
document_store: qdrant
104+
- name: misleading_assistance
105+
llm: litellm_llm.default
104106
- name: data_assistance
105107
llm: litellm_llm.default
106108
- name: sql_pairs_indexing
@@ -114,6 +116,8 @@ pipes:
114116
llm: litellm_llm.default
115117
- name: sql_executor
116118
engine: wren_ui
119+
- name: user_guide_assistance
120+
llm: litellm_llm.default
117121
- name: chart_generation
118122
llm: litellm_llm.default
119123
- name: chart_adjustment

Diff for: wren-ai-service/docs/config_examples/config.azure.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ pipes:
104104
llm: litellm_llm.default
105105
embedder: litellm_embedder.default
106106
document_store: qdrant
107+
- name: misleading_assistance
108+
llm: litellm_llm.default
107109
- name: data_assistance
108110
llm: litellm_llm.default
109111
- name: sql_pairs_preparation
@@ -118,6 +120,8 @@ pipes:
118120
llm: litellm_llm.default
119121
- name: sql_executor
120122
engine: wren_ui
123+
- name: user_guide_assistance
124+
llm: litellm_llm.default
121125
- name: chart_generation
122126
llm: litellm_llm.default
123127
- name: chart_adjustment

Diff for: wren-ai-service/docs/config_examples/config.deepseek.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ pipes:
129129
llm: litellm_llm.default
130130
embedder: litellm_embedder.default
131131
document_store: qdrant
132+
- name: misleading_assistance
133+
llm: litellm_llm.default
132134
- name: data_assistance
133135
llm: litellm_llm.deepseek/deepseek-chat
134136
- name: sql_pairs_indexing
@@ -142,6 +144,8 @@ pipes:
142144
llm: litellm_llm.default
143145
- name: sql_executor
144146
engine: wren_ui
147+
- name: user_guide_assistance
148+
llm: litellm_llm.default
145149
- name: sql_question_generation
146150
llm: litellm_llm.default
147151
- name: sql_generation_reasoning

Diff for: wren-ai-service/docs/config_examples/config.google_ai_studio.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ pipes:
115115
llm: litellm_llm.default
116116
embedder: litellm_embedder.default
117117
document_store: qdrant
118+
- name: misleading_assistance
119+
llm: litellm_llm.default
118120
- name: data_assistance
119121
llm: litellm_llm.default
120122
- name: sql_pairs_indexing
@@ -128,6 +130,8 @@ pipes:
128130
llm: litellm_llm.default
129131
- name: sql_executor
130132
engine: wren_ui
133+
- name: user_guide_assistance
134+
llm: litellm_llm.default
131135
- name: sql_question_generation
132136
llm: litellm_llm.default
133137
- name: sql_generation_reasoning

Diff for: wren-ai-service/docs/config_examples/config.groq.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ pipes:
110110
llm: litellm_llm.default
111111
embedder: litellm_embedder.default
112112
document_store: qdrant
113+
- name: misleading_assistance
114+
llm: litellm_llm.default
113115
- name: data_assistance
114116
llm: litellm_llm.default
115117
- name: sql_pairs_indexing
@@ -123,6 +125,8 @@ pipes:
123125
llm: litellm_llm.default
124126
- name: sql_executor
125127
engine: wren_ui
128+
- name: user_guide_assistance
129+
llm: litellm_llm.default
126130
- name: sql_question_generation
127131
llm: litellm_llm.default
128132
- name: sql_generation_reasoning

Diff for: wren-ai-service/docs/config_examples/config.lm_studio.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ pipes:
109109
llm: litellm_llm.default
110110
embedder: litellm_embedder.default
111111
document_store: qdrant
112+
- name: misleading_assistance
113+
llm: litellm_llm.default
112114
- name: data_assistance
113115
llm: litellm_llm.default
114116
- name: sql_pairs_indexing
@@ -122,6 +124,8 @@ pipes:
122124
llm: litellm_llm.default
123125
- name: sql_executor
124126
engine: wren_ui
127+
- name: user_guide_assistance
128+
llm: litellm_llm.default
125129
- name: sql_question_generation
126130
llm: litellm_llm.default
127131
- name: sql_generation_reasoning

Diff for: wren-ai-service/docs/config_examples/config.ollama.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ pipes:
107107
llm: litellm_llm.default
108108
embedder: litellm_embedder.default
109109
document_store: qdrant
110+
- name: misleading_assistance
111+
llm: litellm_llm.default
110112
- name: data_assistance
111113
llm: litellm_llm.default
112114
- name: sql_pairs_indexing
@@ -120,6 +122,8 @@ pipes:
120122
llm: litellm_llm.default
121123
- name: sql_executor
122124
engine: wren_ui
125+
- name: user_guide_assistance
126+
llm: litellm_llm.default
123127
- name: sql_question_generation
124128
llm: litellm_llm.default
125129
- name: sql_generation_reasoning

Diff for: wren-ai-service/src/config.py

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ class Settings(BaseSettings):
5454
""",
5555
)
5656

57+
# user guide config
58+
is_oss: bool = Field(default=True)
59+
doc_endpoint: str = Field(default="https://docs.getwren.ai")
60+
5761
# langfuse config
5862
# in order to use langfuse, we also need to set the LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY in the .env or .env.dev file
5963
langfuse_host: str = Field(default="https://cloud.langfuse.com")

Diff for: wren-ai-service/src/globals.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from src.core.pipeline import PipelineComponent
88
from src.core.provider import EmbedderProvider, LLMProvider
99
from src.pipelines import generation, indexing, retrieval
10+
from src.utils import fetch_wren_ai_docs
1011
from src.web.v1 import services
1112

1213
logger = logging.getLogger("wren-ai-service")
@@ -44,6 +45,10 @@ def create_service_container(
4445
"maxsize": settings.query_cache_maxsize,
4546
"ttl": settings.query_cache_ttl,
4647
}
48+
wren_ai_docs = fetch_wren_ai_docs(settings.doc_endpoint, settings.is_oss)
49+
if not wren_ai_docs:
50+
logger.warning("Failed to fetch Wren AI docs or response was empty.")
51+
4752
return ServiceContainer(
4853
semantics_description=services.SemanticsDescription(
4954
pipelines={
@@ -82,10 +87,18 @@ def create_service_container(
8287
pipelines={
8388
"intent_classification": generation.IntentClassification(
8489
**pipe_components["intent_classification"],
90+
wren_ai_docs=wren_ai_docs,
91+
),
92+
"misleading_assistance": generation.MisleadingAssistance(
93+
**pipe_components["misleading_assistance"],
8594
),
8695
"data_assistance": generation.DataAssistance(
8796
**pipe_components["data_assistance"]
8897
),
98+
"user_guide_assistance": generation.UserGuideAssistance(
99+
**pipe_components["user_guide_assistance"],
100+
wren_ai_docs=wren_ai_docs,
101+
),
89102
"retrieval": retrieval.Retrieval(
90103
**pipe_components["db_schema_retrieval"],
91104
table_retrieval_size=settings.table_retrieval_size,

Diff for: wren-ai-service/src/pipelines/generation/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .followup_sql_generation import FollowUpSQLGeneration
55
from .followup_sql_generation_reasoning import FollowUpSQLGenerationReasoning
66
from .intent_classification import IntentClassification
7+
from .misleading_assistance import MisleadingAssistance
78
from .question_recommendation import QuestionRecommendation
89
from .relationship_recommendation import RelationshipRecommendation
910
from .semantics_description import SemanticsDescription
@@ -16,6 +17,7 @@
1617
from .sql_question import SQLQuestion
1718
from .sql_regeneration import SQLRegeneration
1819
from .sql_summary import SQLSummary
20+
from .user_guide_assistance import UserGuideAssistance
1921

2022
__all__ = [
2123
"ChartGeneration",
@@ -33,7 +35,9 @@
3335
"SQLGeneration",
3436
"SQLGenerationReasoning",
3537
"SQLSummary",
38+
"UserGuideAssistance",
3639
"SQLQuestion",
3740
"SQLRegeneration",
3841
"FollowUpSQLGenerationReasoning",
42+
"MisleadingAssistance",
3943
]

Diff for: wren-ai-service/src/pipelines/generation/intent_classification.py

+42-18
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
### TASK ###
2626
You are a great detective, who is great at intent classification.
2727
First, rephrase the user's question to make it more specific, clear and relevant to the database schema before making the intent classification.
28-
Second, you need to use rephrased user's question to classify user's intent based on given database schema to one of three conditions: MISLEADING_QUERY, TEXT_TO_SQL, GENERAL.
28+
Second, you need to use rephrased user's question to classify user's intent based on given database schema to one of four conditions: MISLEADING_QUERY, TEXT_TO_SQL, GENERAL, USER_GUIDE.
2929
Also you should provide reasoning for the classification clearly and concisely within 20 words.
3030
3131
### INSTRUCTIONS ###
@@ -56,6 +56,31 @@
5656
- "What is the total sales for last quarter?"
5757
- "Show me all customers who purchased product X."
5858
- "List the top 10 products by revenue."
59+
- GENERAL
60+
- When to Use:
61+
- Use this category if the user is seeking general information about the database schema.
62+
- If the rephrasedd user's question is related to the previous question, but considering them together cannot be answered by generating an SQL query using that schema.
63+
- Characteristics:
64+
- The question is about understanding the dataset or its capabilities.
65+
- The user may need guidance on how to proceed or what questions to ask.
66+
- Instructions:
67+
- MUST explicitly add phrases from the rephrasedd user's question that are not explicitly related to the database schema in the reasoning output. Choose the most relevant phrases that cause the rephrasedd user's question to be GENERAL.
68+
- Examples:
69+
- "What is the dataset about?"
70+
- "Tell me more about the database."
71+
- "How can I analyze customer behavior with this data?"
72+
- USER_GUIDE
73+
- When to Use:
74+
- If the user's question is about Wren AI's features, capabilities, or how to use Wren AI.
75+
- If the user's question is related to the content in the user guide.
76+
- Characteristics:
77+
- The question is about Wren AI's features, capabilities, or how to use Wren AI.
78+
- Examples:
79+
- "What can Wren AI do?"
80+
- "How can I reset project?"
81+
- "How can I delete project?"
82+
- "How can I connect to other databases?"
83+
- "How to draw a chart?"
5984
- MISLEADING_QUERY
6085
- When to Use:
6186
- If the rephrasedd user's question is irrelevant to the given database schema and cannot be answered using SQL with that schema.
@@ -71,28 +96,14 @@
7196
- "How are you?"
7297
- "What's the weather like today?"
7398
- "Tell me a joke."
74-
- GENERAL
75-
- When to Use:
76-
- Use this category if the user is seeking general information about the database schema.
77-
- If the rephrasedd user's question is related to the previous question, but considering them together cannot be answered by generating an SQL query using that schema.
78-
- Characteristics:
79-
- The question is about understanding the dataset or its capabilities.
80-
- The user may need guidance on how to proceed or what questions to ask.
81-
- Instructions:
82-
- MUST explicitly add phrases from the rephrasedd user's question that are not explicitly related to the database schema in the reasoning output. Choose the most relevant phrases that cause the rephrasedd user's question to be GENERAL.
83-
- Examples:
84-
- "What is the dataset about?"
85-
- "Tell me more about the database."
86-
- "What can Wren AI do?"
87-
- "How can I analyze customer behavior with this data?"
88-
99+
89100
### OUTPUT FORMAT ###
90101
Please provide your response as a JSON object, structured as follows:
91102
92103
{
93104
"rephrased_question": "<REPHRASED_USER_QUESTION_IN_STRING_FORMAT>",
94105
"reasoning": "<CHAIN_OF_THOUGHT_REASONING_BASED_ON_REPHRASED_USER_QUESTION_IN_STRING_FORMAT>",
95-
"results": "MISLEADING_QUERY" | "TEXT_TO_SQL" | "GENERAL"
106+
"results": "MISLEADING_QUERY" | "TEXT_TO_SQL" | "GENERAL" | "USER_GUIDE"
96107
}
97108
"""
98109

@@ -127,6 +138,11 @@
127138
{% endfor %}
128139
{% endif %}
129140
141+
### USER GUIDE ###
142+
{% for doc in docs %}
143+
- {{doc.path}}: {{doc.content}}
144+
{% endfor %}
145+
130146
### QUESTION ###
131147
User's question: {{query}}
132148
Current Time: {{ current_time }}
@@ -245,6 +261,7 @@ def construct_db_schemas(dbschema_retrieval: list[Document]) -> list[str]:
245261
@observe(capture_input=False)
246262
def prompt(
247263
query: str,
264+
wren_ai_docs: list[dict],
248265
construct_db_schemas: list[str],
249266
prompt_builder: PromptBuilder,
250267
histories: Optional[list[AskHistory]] = None,
@@ -263,6 +280,7 @@ def prompt(
263280
configuration=configuration,
264281
),
265282
current_time=configuration.show_current_time(),
283+
docs=wren_ai_docs,
266284
)
267285

268286

@@ -294,7 +312,7 @@ def post_process(classify_intent: dict, construct_db_schemas: list[str]) -> dict
294312

295313

296314
class IntentClassificationResult(BaseModel):
297-
results: Literal["MISLEADING_QUERY", "TEXT_TO_SQL", "GENERAL"]
315+
results: Literal["MISLEADING_QUERY", "TEXT_TO_SQL", "GENERAL", "USER_GUIDE"]
298316
rephrased_question: str
299317
reasoning: str
300318

@@ -316,6 +334,7 @@ def __init__(
316334
llm_provider: LLMProvider,
317335
embedder_provider: EmbedderProvider,
318336
document_store_provider: DocumentStoreProvider,
337+
wren_ai_docs: list[dict],
319338
table_retrieval_size: Optional[int] = 50,
320339
table_column_retrieval_size: Optional[int] = 100,
321340
**kwargs,
@@ -339,6 +358,10 @@ def __init__(
339358
),
340359
}
341360

361+
self._configs = {
362+
"wren_ai_docs": wren_ai_docs,
363+
}
364+
342365
super().__init__(
343366
AsyncDriver({}, sys.modules[__name__], result_builder=base.DictResult())
344367
)
@@ -364,6 +387,7 @@ async def run(
364387
"instructions": instructions or [],
365388
"configuration": configuration,
366389
**self._components,
390+
**self._configs,
367391
},
368392
)
369393

0 commit comments

Comments
 (0)