Python Code

Code Llama

Here is a typical list of Code Llama that would be provided by your Cloud Service Provider if you are using one (this one is provided by together.ai)

  • togethercomputer/CodeLlama-7b
  • togethercomputer/CodeLlama-13b
  • togethercomputer/CodeLlama-34b
  • togethercomputer/CodeLlama-7b-Python
  • togethercomputer/CodeLlama-13b-Python
  • togethercomputer/CodeLlama-34b-Python
  • togethercomputer/CodeLlama-7b-Instruct
  • togethercomputer/CodeLlama-13b-Instruct
  • togethercomputer/CodeLlama-34b-Instruct

Utils.py is provided again at the end of the page,

Solve Math Problem

from utils import llama, code_llama

temp_min = [42, 52, 47, 47, 53, 48, 47, 53, 55, 56, 57, 50, 48, 45]
temp_max = [55, 57, 59, 59, 58, 62, 65, 65, 64, 63, 60, 60, 62, 62]

prompt = f"""
Below is the 14 day temperature forecast in fahrenheit degree:
14-day low temperatures: {temp_min}
14-day high temperatures: {temp_max}
Which day has the lowest temperature?
"""


#Using the base 7B model
response = llama(prompt)
print(response)

Let’s ask it to write code to solve the problem

Code for Function

prompt_2 = f"""
Write Python code that can calculate
the minimum of the list temp_min
and the maximum of the list temp_max
"""
response_2 = code_llama(prompt_2)
print(response_2)

def get_min_max(temp_min, temp_max):
    return min(temp_min), max(temp_max)

Use function on lists above

temp_min = [42, 52, 47, 47, 53, 48, 47, 53, 55, 56, 57, 50, 48, 45]
temp_max = [55, 57, 59, 59, 58, 62, 65, 65, 64, 63, 60, 60, 62, 62]

results = get_min_max(temp_min, temp_max)
print(results)

Code for Fibonacci Calculation

Write a prompt that asks the model to write code that will calculate the fibonacci number

prompt = """
Provide a function that calculates the n-th fibonacci number.
"""

response = code_llama(prompt, verbose=True)
print(response)

def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)

Code Filling

Use Code Llama to fill in partially completed code

  • Notice the [INST] ..[/INST] tags inserted into the prompt by the function
prompt = """
def star_rating(n):
'''
  This function returns a rating given the number n,
  where n is an integers from 1 to 5.
'''

    if n == 1:
        rating="poor"
    <FILL>
    elif n == 5:
        rating="excellent"

    return rating
"""

response = code_llama(prompt,
                      verbose=True)
                      
print(response)

Improve Code Efficiency

Make Code more Efficient

  • Let’s ask the model to critic its initial code that was generated
code = """
def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)
"""

prompt_1 = f"""
For the following code: {code}
Is this implementation efficient?
Please explain.
"""
response_1 = code_llama(prompt_1, verbose=True)

print(response_1)



# ....A more efficient implementation of the Fibonacci sequence would be to use a loop instead of recursion, like this:

def  fibonacci(n):
        a, b = 0,1
        for i in range(n):
                a, b = b, a+b
        return a

#This implementation has a time complexity of 0(n), which means that the time it takes to compute the nth Fibonacci number grows linearly with the size of the input

Code to Compare Functions

  • Let’s run both functions and monitor the time it takes for each
  • Let’s ask the model to provide code to compare the two
prompt = f"""
Provide sample code that calculates the runtime of a Python function call
"""

response = code_llama(prompt, verbose = True)
print(response)

Prompt:
[INST]
Provide sample code that calculates the runtime of a Python function call
[/INST]

model: togethercomputer/CodeLlama-7b-Instruct

import time

def my_function():
        # do something
        pass

start_time = time.time()
my_function()
end_time = time.time()
print("Runtime:", end_time - start_time)

Run Time Test1

  • Let’s first run the test on the first function
  • Remember here is the first function
def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)
import time
n=40

start_time = time.time()
fibonacci(n)
end_time = time.time()
print(f"Recursive fibonacci({n}) ")
print(f"Runtime in seconds:{end_time - start_time}")

Recursive fibonacci(40)
Runtime in seconds: 19.5236082

Run Time Test2

  • Let’s run the so-called more efficient non-recursive function
  • Remember the function is listed next
def fibonacci_fast(n):
    a, b = 0, 1
    for i in range(n):
        a, b = b, a + b
    return a
import time
n=40

start_time = time.time()
fibonacci_fast(n)
end_time = time.time()
print(f"Recursive fibonacci_fast({n}) ")
print(f"Runtime in seconds:{end_time - start_time}")

Recursive fibonacci_fast(40)
Runtime in seconds: 9.89437e-05

Considerably faster as in a fraction of a second

Input Larger Text

Remember we had limitations when we used the non code Llama, but we can find a way around it by using Code-Llama

  • Code Llama models can handle much larger input text than the Llama Chat models - more than 20,000 characters.
  • The size of the input text is known as the context window
  • Let’s try to run the same example here it was using the Llama 2 7B Chat model, and we received an error message
with open("TheVelveteenRabbit.txt", 'r', encoding='utf-8') as file:
    text = file.read()

prompt=f"""
Give me a summary of the following text in 50 words:\n\n 
{text}
"""

# Ask the 7B model to respond
response = llama(prompt)
print(response)

Run it in Code-Llama

  • We receive an output
from utils import llama
with open("TheVelveteenRabbit.txt", 'r', encoding='utf-8') as file:
    text = file.read()

prompt=f"""
Give me a summary of the following text in 50 words:\n\n 
{text}
"""
response = code_llama(prompt)
print(response)

Utils.py


import os
from dotenv import load_dotenv
import os
from dotenv import load_dotenv, find_dotenv
import warnings
import requests
import json
import time

# Initailize global variables
_ = load_dotenv(find_dotenv())
# warnings.filterwarnings('ignore')
url = f"{os.getenv('DLAI_TOGETHER_API_BASE', 'https://api.together.xyz')}/inference"
headers = {
        "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}",
        "Content-Type": "application/json"
    }
# which allow for the largest number of
# tokens for the input prompt: 
# I think max_tokens set the maximum that can be output
# but the sum of tokens from input prompt and response 
# can not exceed 4097.
def code_llama(prompt, 
          model="togethercomputer/CodeLlama-7b-Instruct", 
          temperature=0.0, 
          max_tokens=1024,
          verbose=False,
          url=url,
          headers=headers,
          base=2,
          max_tries=3):

    if model.endswith("Instruct"):
        prompt = f"[INST]{prompt}[/INST]"

    if verbose:
        print(f"Prompt:\n{prompt}\n")
        print(f"model: {model}")

    data = {
            "model": model,
            "prompt": prompt,
            "temperature": temperature,
            "max_tokens": max_tokens
        }

    # Allow multiple attempts to call the API incase of downtime.
    # Return provided response to user after 3 failed attempts.
    wait_seconds = [base**i for i in range(max_tries)]

    for num_tries in range(max_tries):
        try:
            response = requests.post(url, headers=headers, json=data)
            return response.json()['output']['choices'][0]['text']
        except Exception as e:
            if response.status_code != 500:
                return response.json()

            print(f"error message: {e}")
            print(f"response object: {response}")
            print(f"num_tries {num_tries}")
            print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
            time.sleep(wait_seconds[num_tries])
 
    print(f"Tried {max_tries} times to make API call to get a valid response object")
    print("Returning provided response")
    return response


# 20 is the minum new tokens, 
# which allow for the largest number of
# tokens for the input prompt: 4097 - 20 = 4077 
# But max_tokens limits the number of output tokens
# sum of input prompt tokens + max_tokens (response)
# can't exceed 4097.
def llama(prompt, 
          add_inst=True, 
          model="togethercomputer/llama-2-7b-chat", 
          temperature=0.0, 
          max_tokens=1024,
          verbose=False,
          url=url,
          headers=headers,
          base=2, # number of seconds to wait
          max_tries=3):
    
    if add_inst:
        prompt = f"[INST]{prompt}[/INST]"

    if verbose:
        print(f"Prompt:\n{prompt}\n")
        print(f"model: {model}")

    data = {
            "model": model,
            "prompt": prompt,
            "temperature": temperature,
            "max_tokens": max_tokens
        }

    # Allow multiple attempts to call the API incase of downtime.
    # Return provided response to user after 3 failed attempts.    
    wait_seconds = [base**i for i in range(max_tries)]

    for num_tries in range(max_tries):
        try:
            response = requests.post(url, headers=headers, json=data)
            return response.json()['output']['choices'][0]['text']
        except Exception as e:
            if response.status_code != 500:
                return response.json()

            print(f"error message: {e}")
            print(f"response object: {response}")
            print(f"num_tries {num_tries}")
            print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
            time.sleep(wait_seconds[num_tries])
 
    print(f"Tried {max_tries} times to make API call to get a valid response object")
    print("Returning provided response")
    return response


def llama_chat(prompts, 
               responses,
               model="togethercomputer/llama-2-7b-chat", 
               temperature=0.0, 
               max_tokens=1024,
               verbose=False,
               url=url,
               headers=headers,
               base=2,
               max_tries=3
              ):

    prompt = get_prompt_chat(prompts,responses)

    # Allow multiple attempts to call the API incase of downtime.
    # Return provided response to user after 3 failed attempts.
    wait_seconds = [base**i for i in range(max_tries)]

    for num_tries in range(max_tries):
        try:
            response = llama(prompt=prompt,
                             add_inst=False,
                             model=model, 
                             temperature=temperature, 
                             max_tokens=max_tokens,
                             verbose=verbose,
                             url=url,
                             headers=headers
                            )
            return response
        except Exception as e:
            if response.status_code != 500:
                return response.json()

            print(f"error message: {e}")
            print(f"response object: {response}")
            print(f"num_tries {num_tries}")
            print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
            time.sleep(wait_seconds[num_tries])
 
    print(f"Tried {max_tries} times to make API call to get a valid response object")
    print("Returning provided response")
    return response


def get_prompt_chat(prompts, responses):
  prompt_chat = f"<s>[INST] {prompts[0]} [/INST]"
  for n, response in enumerate(responses):
    prompt = prompts[n + 1]
    prompt_chat += f"\n{response}\n </s><s>[INST] \n{ prompt }\n [/INST]"

  return prompt_chat