Skip to content

Commit 7111f73

Browse files
authored
Merge pull request #476 from dipatidar/llama3.1-deploymnet-guide
modified the payload to accommodate current date in prompt
2 parents cb8b585 + 268a901 commit 7111f73

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

LLM/deploy-llama3.1.md

+13-8
Original file line numberDiff line numberDiff line change
@@ -227,22 +227,26 @@ This format has to be exactly reproduced for effective use.
227227
import requests
228228
import ads
229229
from string import Template
230+
from datetime import datetime
230231

231232
ads.set_auth("resource_principal")
233+
endpoint = f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict"
232234

233-
prompt_template= Template("""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
235+
current_date = datetime.now().strftime("%d %B %Y")
236+
237+
prompt_template= Templatef(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
234238
235239
Cutting Knowledge Date: December 2023
236-
Today Date: 24 Jul 2024
240+
Today Date: {current_date}
237241
238242
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
239243
240244
$prompt<|eot_id|><|start_header_id|>assistant<|end_header_id|>""")
241245

242-
prompt = t.substitute(prompt= "What amateur radio bands are best to use when there are solar flares?")
246+
prompt = prompt_template.substitute(prompt= "What amateur radio bands are best to use when there are solar flares?")
243247

244248
requests.post(
245-
"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
249+
endpoint,
246250
json={
247251
"model": "llama3.1",
248252
"prompt": prompt,
@@ -253,7 +257,6 @@ requests.post(
253257
auth=ads.common.auth.default_signer()["signer"],
254258
headers={},
255259
).json()
256-
257260
```
258261
#### Output:
259262

@@ -304,19 +307,21 @@ Keep in mind that the impact of solar flares on amateur radio communications can
304307
import ads
305308
from langchain_community.llms import OCIModelDeploymentVLLM
306309
from string import Template
310+
from datetime import datetime
307311

308312
ads.set_auth("resource_principal")
313+
current_date = datetime.now().strftime("%d %B %Y")
309314

310315
llm = OCIModelDeploymentVLLM(
311-
endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
316+
endpoint=f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
312317
model="llama3.1",
313318
)
314319

315320
llm.invoke(
316-
input=Template("""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
321+
input=Template(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
317322

318323
Cutting Knowledge Date: December 2023
319-
Today Date: 24 Jul 2024
324+
Today Date:{current_date}
320325

321326
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
322327

LLM/llama3.1-8B-deployment-vLLM-container.md

+15-8
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ infrastructure = (
185185

186186
```python
187187
env_var = {
188-
'MODEL_DEPLOY_PREDICT_ENDPOINT': '/v1/chat/completions',
188+
'MODEL_DEPLOY_PREDICT_ENDPOINT': '/v1/completions',
189189
'MODEL_DEPLOY_ENABLE_STREAMING': 'true',
190190
}
191191

@@ -243,22 +243,26 @@ This format has to be exactly reproduced for effective use. More details about p
243243
import requests
244244
import ads
245245
from string import Template
246+
from datetime import datetime
246247

247248
ads.set_auth("resource_principal")
249+
endpoint = f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict"
248250

249-
prompt_template= Template("""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
251+
current_date = datetime.now().strftime("%d %B %Y")
252+
253+
prompt_template= Templatef(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
250254
251255
Cutting Knowledge Date: December 2023
252-
Today Date: 29 Jul 2024
256+
Today Date: {current_date}
253257
254258
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
255259
256260
$prompt<|eot_id|><|start_header_id|>assistant<|end_header_id|>""")
257261

258-
prompt = t.substitute(prompt= "What amateur radio bands are best to use when there are solar flares?")
262+
prompt = prompt_template.substitute(prompt= "What amateur radio bands are best to use when there are solar flares?")
259263

260264
requests.post(
261-
"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
265+
endpoint,
262266
json={
263267
"model": "llama3.1",
264268
"prompt": prompt,
@@ -334,19 +338,22 @@ Remember, it's always better to err on the side of caution and choose lower freq
334338
import ads
335339
from langchain_community.llms import OCIModelDeploymentVLLM
336340
from string import Template
341+
from datetime import datetime
342+
337343

338344
ads.set_auth("resource_principal")
345+
current_date = datetime.now().strftime("%d %B %Y")
339346

340347
llm = OCIModelDeploymentVLLM(
341-
endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
348+
endpoint=f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{deployment.model_deployment_id}/predict",
342349
model="llama3.1",
343350
)
344351

345352
llm.invoke(
346-
input=Template("""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
353+
input=Template(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
347354

348355
Cutting Knowledge Date: December 2023
349-
Today Date: 29 Jul 2024
356+
Today Date: {current_date}
350357

351358
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
352359

0 commit comments

Comments
 (0)