from utils import llama, llama_chat
= """
prompt Help me write a birthday card for my dear friend Andrew.
Here are details about my friend:
He likes long walks on the beach and reading in the bookstore.
His hobbies include reading research papers and speaking at conferences.
His favorite color is light blue.
He likes pandas.
"""
= llama(prompt)
response print(response)
= """
prompt_2 Oh, he also likes teaching. Can you rewrite it to include that?
"""
= llama(prompt_2)
response_2 print(response_2)
Construct Multi-Turn Prompts
Llama - Multi-Prompt Input
Follow Up Question
Remember that the model as setup now does not remember the previous prompts
The second prompt below will not yield the expected outcome because the model has no clue what “he” refers to since it does not retain your previous prompts.
So how can we carry on a multi-prompt interaction with the model like we would with a chatbot?
Look at the example below
We have to provide prior prompts and responses as part of the context of each new turn in the conversation
Chat Helper Function
Instead of having to type in the TAGS manually, we can make use of the Chat Helper Function which inserts the needed brackets. I’ve included the utils.py file at the end of this page to display the code for the functions used.
from utils import llama_chat
= """
prompt_1 What are fun activities I can do this weekend?
"""
= llama(prompt_1)
response_1
= """
prompt_2 Which of these would be good for my health?
"""
= [prompt_1,prompt_2]
prompts = [response_1]
responses
# Pass prompts and responses to llama_chat function.
#Set verbose=true to view the code passed
= llama_chat(prompts,responses,verbose=True)
response_2
print(response_2)
Here is a sample
# replace prompt_3 with your own question!
= "Which of these activites would be fun with friends?"
prompt_3 = [prompt_1, prompt_2, prompt_3]
prompts = [response_1, response_2]
responses
= llama_chat(prompts, responses, verbose=True)
response_3
print(response_3)
UTILS.py
Here is the utils.py file that we use to define the llama() and llama_chat() functions.
import os
from dotenv import load_dotenv
import os
from dotenv import load_dotenv, find_dotenv
import warnings
import requests
import json
import time
# Initailize global variables
= load_dotenv(find_dotenv())
_ # warnings.filterwarnings('ignore')
= f"{os.getenv('DLAI_TOGETHER_API_BASE', 'https://api.together.xyz')}/inference"
url = {
headers "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}",
"Content-Type": "application/json"
}
def llama(prompt,
=True,
add_inst="togethercomputer/llama-2-7b-chat",
model=0.0,
temperature=1024,
max_tokens=False,
verbose=url,
url=headers,
headers=2, # number of seconds to wait
base=3):
max_tries
if add_inst:
= f"[INST]{prompt}[/INST]"
prompt
if verbose:
print(f"Prompt:\n{prompt}\n")
print(f"model: {model}")
= {
data "model": model,
"prompt": prompt,
"temperature": temperature,
"max_tokens": max_tokens
}
# Allow multiple attempts to call the API incase of downtime.
# Return provided response to user after 3 failed attempts.
= [base**i for i in range(max_tries)]
wait_seconds
for num_tries in range(max_tries):
try:
= requests.post(url, headers=headers, json=data)
response return response.json()['output']['choices'][0]['text']
except Exception as e:
if response.status_code != 500:
return response.json()
print(f"error message: {e}")
print(f"response object: {response}")
print(f"num_tries {num_tries}")
print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
time.sleep(wait_seconds[num_tries])
print(f"Tried {max_tries} times to make API call to get a valid response object")
print("Returning provided response")
return response
def llama_chat(prompts,
responses,="togethercomputer/llama-2-7b-chat",
model=0.0,
temperature=1024,
max_tokens=False,
verbose=url,
url=headers,
headers=2,
base=3
max_tries
):
= get_prompt_chat(prompts,responses)
prompt
# Allow multiple attempts to call the API incase of downtime.
# Return provided response to user after 3 failed attempts.
= [base**i for i in range(max_tries)]
wait_seconds
for num_tries in range(max_tries):
try:
= llama(prompt=prompt,
response =False,
add_inst=model,
model=temperature,
temperature=max_tokens,
max_tokens=verbose,
verbose=url,
url=headers
headers
)return response
except Exception as e:
if response.status_code != 500:
return response.json()
print(f"error message: {e}")
print(f"response object: {response}")
print(f"num_tries {num_tries}")
print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
time.sleep(wait_seconds[num_tries])
print(f"Tried {max_tries} times to make API call to get a valid response object")
print("Returning provided response")
return response
def get_prompt_chat(prompts, responses):
= f"<s>[INST] {prompts[0]} [/INST]"
prompt_chat for n, response in enumerate(responses):
= prompts[n + 1]
prompt += f"\n{response}\n </s><s>[INST] \n{ prompt }\n [/INST]"
prompt_chat
return prompt_chat