Merge pull request #126 from narquette/week4_doc_string
Week 4 Exercise - Add Document Strings For Existing Python File
This commit is contained in:
29
week4/community-contributions/doc_string_exercise/README.md
Normal file
29
week4/community-contributions/doc_string_exercise/README.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# Script Overview
|
||||
|
||||
The documentation will show you how to run the python script generate_doc_string.py. It is designed to take input
|
||||
from an existing python file and create a new one with a suffix ('claude' or 'gpt'). If you do not specify and llm
|
||||
model, it will default to claude.
|
||||
|
||||
# How to run
|
||||
|
||||
```powershell
|
||||
conda activate llms
|
||||
cd <script_location>
|
||||
python generate_doc_string -fp <full_file_path> -llm <name_of_model>
|
||||
```
|
||||
|
||||
# Show Help Instructions
|
||||
|
||||
```shell
|
||||
python generate_doc_string --help
|
||||
```
|
||||
|
||||
# Error Checking
|
||||
|
||||
1) File Path Existence
|
||||
|
||||
If the file path doesn't exist, the script will stop running and print out an error.
|
||||
|
||||
2) LLM Model Choice
|
||||
|
||||
If you choose something other than 'gpt' or 'claude', it will show and assertion error.
|
||||
@@ -0,0 +1,19 @@
|
||||
|
||||
def calculate(iterations, param1, param2):
|
||||
result = 1.0
|
||||
for i in range(1, iterations+1):
|
||||
j = i * param1 - param2
|
||||
result -= (1/j)
|
||||
j = i * param1 + param2
|
||||
result += (1/j)
|
||||
return result
|
||||
|
||||
|
||||
def calculate_2(iterations, param1, param2):
|
||||
result = 1.0
|
||||
for i in range(1, iterations+1):
|
||||
j = i * param1 - param2
|
||||
result -= (1/j)
|
||||
j = i * param1 + param2
|
||||
result += (1/j)
|
||||
return result
|
||||
@@ -0,0 +1,85 @@
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from openai import OpenAI
|
||||
import anthropic
|
||||
from utils import add_doc_string, Model, get_system_message
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# get run time arguments
|
||||
parser = ArgumentParser(
|
||||
prog='Generate Doc String for an existing functions',
|
||||
description='Run Doc String for a given file and model',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-fp',
|
||||
'--file_path',
|
||||
help='Enter the file path to the script that will be updated with doc strings',
|
||||
default=None
|
||||
)
|
||||
parser.add_argument(
|
||||
'-llm',
|
||||
'--llm_model',
|
||||
help='Choose the LLM model that will create the doc strings',
|
||||
default='claude'
|
||||
)
|
||||
|
||||
# get run time arguments
|
||||
args = parser.parse_args()
|
||||
file_path = Path(args.file_path)
|
||||
llm_model = args.llm_model
|
||||
|
||||
# check for file path
|
||||
assert file_path.exists(), f"File Path {str(file_path.as_posix())} doesn't exist. Please try again."
|
||||
|
||||
# check for value llm values
|
||||
assert llm_model in ['gpt', 'claude'], (f"Invalid model chosen '{llm_model}'. "
|
||||
f"Please choose a valid model ('gpt' or 'claude')")
|
||||
|
||||
# load keys and environment variables
|
||||
load_dotenv()
|
||||
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
|
||||
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
|
||||
os.environ['HF_TOKEN'] = os.getenv('HF_INF_TOKEN', 'your-key-if-not-using-env')
|
||||
|
||||
# get system messages
|
||||
system_message = get_system_message()
|
||||
|
||||
# get model info
|
||||
model_info = {
|
||||
'gpt': {
|
||||
'client': OpenAI(),
|
||||
'model': Model.OPENAI_MODEL.value,
|
||||
},
|
||||
'claude': {
|
||||
'client': anthropic.Anthropic(),
|
||||
'model': Model.CLAUDE_MODEL.value
|
||||
}
|
||||
}
|
||||
|
||||
# add standard argumens
|
||||
model_info[llm_model].update(
|
||||
{
|
||||
'file_path': file_path,
|
||||
'system_message': system_message
|
||||
}
|
||||
)
|
||||
|
||||
# convert python code to c++ code using open ai
|
||||
print(f"\nSTARTED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
|
||||
add_doc_string(**model_info[llm_model])
|
||||
print(f"\nFINISHED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
147
week4/community-contributions/doc_string_exercise/utils.py
Normal file
147
week4/community-contributions/doc_string_exercise/utils.py
Normal file
@@ -0,0 +1,147 @@
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Model(Enum):
|
||||
"""
|
||||
Enumeration of supported AI models.
|
||||
"""
|
||||
OPENAI_MODEL = "gpt-4o"
|
||||
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
|
||||
|
||||
|
||||
def get_system_message() -> str:
|
||||
"""
|
||||
Generate a system message for AI assistants creating docstrings.
|
||||
|
||||
:return: A string containing instructions for the AI assistant.
|
||||
:rtype: str
|
||||
"""
|
||||
system_message = "You are an assistant that creates doc strings in reStructure Text format for an existing python function. "
|
||||
system_message += "Respond only with an updated python function; use comments sparingly and do not provide any explanation other than occasional comments. "
|
||||
system_message += "Be sure to include typing annotation for each function argument or key word argument and return object types."
|
||||
|
||||
return system_message
|
||||
|
||||
|
||||
def user_prompt_for(python: str) -> str:
|
||||
"""
|
||||
Generate a user prompt for rewriting Python functions with docstrings.
|
||||
|
||||
:param python: The Python code to be rewritten.
|
||||
:type python: str
|
||||
:return: A string containing the user prompt and the Python code.
|
||||
:rtype: str
|
||||
"""
|
||||
user_prompt = "Rewrite this Python function with doc strings in the reStructuredText style."
|
||||
user_prompt += "Respond only with python code; do not explain your work other than a few comments. "
|
||||
user_prompt += "Be sure to write a description of the function purpose with typing for each argument and return\n\n"
|
||||
user_prompt += python
|
||||
return user_prompt
|
||||
|
||||
|
||||
def messages_for(python: str, system_message: str) -> list:
|
||||
"""
|
||||
Create a list of messages for the AI model.
|
||||
|
||||
:param python: The Python code to be processed.
|
||||
:type python: str
|
||||
:param system_message: The system message for the AI assistant.
|
||||
:type system_message: str
|
||||
:return: A list of dictionaries containing role and content for each message.
|
||||
:rtype: list
|
||||
"""
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt_for(python)}
|
||||
]
|
||||
|
||||
|
||||
def write_output(output: str, file_suffix: str, file_path: Path) -> None:
|
||||
"""
|
||||
Write the processed output to a file.
|
||||
|
||||
:param output: The processed Python code with docstrings.
|
||||
:type output: str
|
||||
:param file_suffix: The suffix to be added to the output file name.
|
||||
:type file_suffix: str
|
||||
:param file_path: The path of the input file.
|
||||
:type file_path: Path
|
||||
:return: None
|
||||
"""
|
||||
code = output.replace("", "").replace("", "")
|
||||
out_file = file_path.with_name(f"{file_path.stem}{file_suffix if file_suffix else ''}.py")
|
||||
out_file.write_text(code)
|
||||
|
||||
|
||||
def add_doc_string(client: object, system_message: str, file_path: Path, model: str) -> None:
|
||||
"""
|
||||
Add docstrings to a Python file using the specified AI model.
|
||||
|
||||
:param client: The AI client object.
|
||||
:type client: object
|
||||
:param system_message: The system message for the AI assistant.
|
||||
:type system_message: str
|
||||
:param file_path: The path of the input Python file.
|
||||
:type file_path: Path
|
||||
:param model: The AI model to be used.
|
||||
:type model: str
|
||||
:return: None
|
||||
"""
|
||||
if 'gpt' in model:
|
||||
add_doc_string_gpt(client=client, system_message=system_message, file_path=file_path, model=model)
|
||||
else:
|
||||
add_doc_string_claude(client=client, system_message=system_message, file_path=file_path, model=model)
|
||||
|
||||
|
||||
def add_doc_string_gpt(client: object, system_message: str, file_path: Path, model: str = 'gpt-4o') -> None:
|
||||
"""
|
||||
Add docstrings to a Python file using GPT model.
|
||||
|
||||
:param client: The OpenAI client object.
|
||||
:type client: object
|
||||
:param system_message: The system message for the AI assistant.
|
||||
:type system_message: str
|
||||
:param file_path: The path of the input Python file.
|
||||
:type file_path: Path
|
||||
:param model: The GPT model to be used, defaults to 'gpt-4o'.
|
||||
:type model: str
|
||||
:return: None
|
||||
"""
|
||||
code_text = file_path.read_text(encoding='utf-8')
|
||||
stream = client.chat.completions.create(model=model, messages=messages_for(code_text, system_message), stream=True)
|
||||
reply = ""
|
||||
for chunk in stream:
|
||||
fragment = chunk.choices[0].delta.content or ""
|
||||
reply += fragment
|
||||
print(fragment, end='', flush=True)
|
||||
write_output(reply, file_suffix='_gpt', file_path=file_path)
|
||||
|
||||
|
||||
def add_doc_string_claude(client: object, system_message: str, file_path: Path, model: str = 'claude-3-5-sonnet-20240620') -> None:
|
||||
"""
|
||||
Add docstrings to a Python file using Claude model.
|
||||
|
||||
:param client: The Anthropic client object.
|
||||
:type client: object
|
||||
:param system_message: The system message for the AI assistant.
|
||||
:type system_message: str
|
||||
:param file_path: The path of the input Python file.
|
||||
:type file_path: Path
|
||||
:param model: The Claude model to be used, defaults to 'claude-3-5-sonnet-20240620'.
|
||||
:type model: str
|
||||
:return: None
|
||||
"""
|
||||
code_text = file_path.read_text(encoding='utf-8')
|
||||
result = client.messages.stream(
|
||||
model=model,
|
||||
max_tokens=2000,
|
||||
system=system_message,
|
||||
messages=[{"role": "user", "content": user_prompt_for(code_text)}],
|
||||
)
|
||||
reply = ""
|
||||
with result as stream:
|
||||
for text in stream.text_stream:
|
||||
reply += text
|
||||
print(text, end="", flush=True)
|
||||
write_output(reply, file_suffix='_claude', file_path=file_path)
|
||||
Reference in New Issue
Block a user