Merge pull request #126 from narquette/week4_doc_string
Week 4 Exercise - Add Document Strings For Existing Python File
This commit is contained in:
29
week4/community-contributions/doc_string_exercise/README.md
Normal file
29
week4/community-contributions/doc_string_exercise/README.md
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# Script Overview
|
||||||
|
|
||||||
|
The documentation will show you how to run the python script generate_doc_string.py. It is designed to take input
|
||||||
|
from an existing python file and create a new one with a suffix ('claude' or 'gpt'). If you do not specify and llm
|
||||||
|
model, it will default to claude.
|
||||||
|
|
||||||
|
# How to run
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
conda activate llms
|
||||||
|
cd <script_location>
|
||||||
|
python generate_doc_string -fp <full_file_path> -llm <name_of_model>
|
||||||
|
```
|
||||||
|
|
||||||
|
# Show Help Instructions
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python generate_doc_string --help
|
||||||
|
```
|
||||||
|
|
||||||
|
# Error Checking
|
||||||
|
|
||||||
|
1) File Path Existence
|
||||||
|
|
||||||
|
If the file path doesn't exist, the script will stop running and print out an error.
|
||||||
|
|
||||||
|
2) LLM Model Choice
|
||||||
|
|
||||||
|
If you choose something other than 'gpt' or 'claude', it will show and assertion error.
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
def calculate(iterations, param1, param2):
|
||||||
|
result = 1.0
|
||||||
|
for i in range(1, iterations+1):
|
||||||
|
j = i * param1 - param2
|
||||||
|
result -= (1/j)
|
||||||
|
j = i * param1 + param2
|
||||||
|
result += (1/j)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_2(iterations, param1, param2):
|
||||||
|
result = 1.0
|
||||||
|
for i in range(1, iterations+1):
|
||||||
|
j = i * param1 - param2
|
||||||
|
result -= (1/j)
|
||||||
|
j = i * param1 + param2
|
||||||
|
result += (1/j)
|
||||||
|
return result
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
from argparse import ArgumentParser
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from openai import OpenAI
|
||||||
|
import anthropic
|
||||||
|
from utils import add_doc_string, Model, get_system_message
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
# get run time arguments
|
||||||
|
parser = ArgumentParser(
|
||||||
|
prog='Generate Doc String for an existing functions',
|
||||||
|
description='Run Doc String for a given file and model',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-fp',
|
||||||
|
'--file_path',
|
||||||
|
help='Enter the file path to the script that will be updated with doc strings',
|
||||||
|
default=None
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-llm',
|
||||||
|
'--llm_model',
|
||||||
|
help='Choose the LLM model that will create the doc strings',
|
||||||
|
default='claude'
|
||||||
|
)
|
||||||
|
|
||||||
|
# get run time arguments
|
||||||
|
args = parser.parse_args()
|
||||||
|
file_path = Path(args.file_path)
|
||||||
|
llm_model = args.llm_model
|
||||||
|
|
||||||
|
# check for file path
|
||||||
|
assert file_path.exists(), f"File Path {str(file_path.as_posix())} doesn't exist. Please try again."
|
||||||
|
|
||||||
|
# check for value llm values
|
||||||
|
assert llm_model in ['gpt', 'claude'], (f"Invalid model chosen '{llm_model}'. "
|
||||||
|
f"Please choose a valid model ('gpt' or 'claude')")
|
||||||
|
|
||||||
|
# load keys and environment variables
|
||||||
|
load_dotenv()
|
||||||
|
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
|
||||||
|
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
|
||||||
|
os.environ['HF_TOKEN'] = os.getenv('HF_INF_TOKEN', 'your-key-if-not-using-env')
|
||||||
|
|
||||||
|
# get system messages
|
||||||
|
system_message = get_system_message()
|
||||||
|
|
||||||
|
# get model info
|
||||||
|
model_info = {
|
||||||
|
'gpt': {
|
||||||
|
'client': OpenAI(),
|
||||||
|
'model': Model.OPENAI_MODEL.value,
|
||||||
|
},
|
||||||
|
'claude': {
|
||||||
|
'client': anthropic.Anthropic(),
|
||||||
|
'model': Model.CLAUDE_MODEL.value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# add standard argumens
|
||||||
|
model_info[llm_model].update(
|
||||||
|
{
|
||||||
|
'file_path': file_path,
|
||||||
|
'system_message': system_message
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# convert python code to c++ code using open ai
|
||||||
|
print(f"\nSTARTED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
|
||||||
|
add_doc_string(**model_info[llm_model])
|
||||||
|
print(f"\nFINISHED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
147
week4/community-contributions/doc_string_exercise/utils.py
Normal file
147
week4/community-contributions/doc_string_exercise/utils.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
from enum import Enum
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class Model(Enum):
|
||||||
|
"""
|
||||||
|
Enumeration of supported AI models.
|
||||||
|
"""
|
||||||
|
OPENAI_MODEL = "gpt-4o"
|
||||||
|
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
|
||||||
|
|
||||||
|
|
||||||
|
def get_system_message() -> str:
|
||||||
|
"""
|
||||||
|
Generate a system message for AI assistants creating docstrings.
|
||||||
|
|
||||||
|
:return: A string containing instructions for the AI assistant.
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
system_message = "You are an assistant that creates doc strings in reStructure Text format for an existing python function. "
|
||||||
|
system_message += "Respond only with an updated python function; use comments sparingly and do not provide any explanation other than occasional comments. "
|
||||||
|
system_message += "Be sure to include typing annotation for each function argument or key word argument and return object types."
|
||||||
|
|
||||||
|
return system_message
|
||||||
|
|
||||||
|
|
||||||
|
def user_prompt_for(python: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a user prompt for rewriting Python functions with docstrings.
|
||||||
|
|
||||||
|
:param python: The Python code to be rewritten.
|
||||||
|
:type python: str
|
||||||
|
:return: A string containing the user prompt and the Python code.
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
user_prompt = "Rewrite this Python function with doc strings in the reStructuredText style."
|
||||||
|
user_prompt += "Respond only with python code; do not explain your work other than a few comments. "
|
||||||
|
user_prompt += "Be sure to write a description of the function purpose with typing for each argument and return\n\n"
|
||||||
|
user_prompt += python
|
||||||
|
return user_prompt
|
||||||
|
|
||||||
|
|
||||||
|
def messages_for(python: str, system_message: str) -> list:
|
||||||
|
"""
|
||||||
|
Create a list of messages for the AI model.
|
||||||
|
|
||||||
|
:param python: The Python code to be processed.
|
||||||
|
:type python: str
|
||||||
|
:param system_message: The system message for the AI assistant.
|
||||||
|
:type system_message: str
|
||||||
|
:return: A list of dictionaries containing role and content for each message.
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
{"role": "system", "content": system_message},
|
||||||
|
{"role": "user", "content": user_prompt_for(python)}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def write_output(output: str, file_suffix: str, file_path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Write the processed output to a file.
|
||||||
|
|
||||||
|
:param output: The processed Python code with docstrings.
|
||||||
|
:type output: str
|
||||||
|
:param file_suffix: The suffix to be added to the output file name.
|
||||||
|
:type file_suffix: str
|
||||||
|
:param file_path: The path of the input file.
|
||||||
|
:type file_path: Path
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
code = output.replace("", "").replace("", "")
|
||||||
|
out_file = file_path.with_name(f"{file_path.stem}{file_suffix if file_suffix else ''}.py")
|
||||||
|
out_file.write_text(code)
|
||||||
|
|
||||||
|
|
||||||
|
def add_doc_string(client: object, system_message: str, file_path: Path, model: str) -> None:
|
||||||
|
"""
|
||||||
|
Add docstrings to a Python file using the specified AI model.
|
||||||
|
|
||||||
|
:param client: The AI client object.
|
||||||
|
:type client: object
|
||||||
|
:param system_message: The system message for the AI assistant.
|
||||||
|
:type system_message: str
|
||||||
|
:param file_path: The path of the input Python file.
|
||||||
|
:type file_path: Path
|
||||||
|
:param model: The AI model to be used.
|
||||||
|
:type model: str
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
if 'gpt' in model:
|
||||||
|
add_doc_string_gpt(client=client, system_message=system_message, file_path=file_path, model=model)
|
||||||
|
else:
|
||||||
|
add_doc_string_claude(client=client, system_message=system_message, file_path=file_path, model=model)
|
||||||
|
|
||||||
|
|
||||||
|
def add_doc_string_gpt(client: object, system_message: str, file_path: Path, model: str = 'gpt-4o') -> None:
|
||||||
|
"""
|
||||||
|
Add docstrings to a Python file using GPT model.
|
||||||
|
|
||||||
|
:param client: The OpenAI client object.
|
||||||
|
:type client: object
|
||||||
|
:param system_message: The system message for the AI assistant.
|
||||||
|
:type system_message: str
|
||||||
|
:param file_path: The path of the input Python file.
|
||||||
|
:type file_path: Path
|
||||||
|
:param model: The GPT model to be used, defaults to 'gpt-4o'.
|
||||||
|
:type model: str
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
code_text = file_path.read_text(encoding='utf-8')
|
||||||
|
stream = client.chat.completions.create(model=model, messages=messages_for(code_text, system_message), stream=True)
|
||||||
|
reply = ""
|
||||||
|
for chunk in stream:
|
||||||
|
fragment = chunk.choices[0].delta.content or ""
|
||||||
|
reply += fragment
|
||||||
|
print(fragment, end='', flush=True)
|
||||||
|
write_output(reply, file_suffix='_gpt', file_path=file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def add_doc_string_claude(client: object, system_message: str, file_path: Path, model: str = 'claude-3-5-sonnet-20240620') -> None:
|
||||||
|
"""
|
||||||
|
Add docstrings to a Python file using Claude model.
|
||||||
|
|
||||||
|
:param client: The Anthropic client object.
|
||||||
|
:type client: object
|
||||||
|
:param system_message: The system message for the AI assistant.
|
||||||
|
:type system_message: str
|
||||||
|
:param file_path: The path of the input Python file.
|
||||||
|
:type file_path: Path
|
||||||
|
:param model: The Claude model to be used, defaults to 'claude-3-5-sonnet-20240620'.
|
||||||
|
:type model: str
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
code_text = file_path.read_text(encoding='utf-8')
|
||||||
|
result = client.messages.stream(
|
||||||
|
model=model,
|
||||||
|
max_tokens=2000,
|
||||||
|
system=system_message,
|
||||||
|
messages=[{"role": "user", "content": user_prompt_for(code_text)}],
|
||||||
|
)
|
||||||
|
reply = ""
|
||||||
|
with result as stream:
|
||||||
|
for text in stream.text_stream:
|
||||||
|
reply += text
|
||||||
|
print(text, end="", flush=True)
|
||||||
|
write_output(reply, file_suffix='_claude', file_path=file_path)
|
||||||
Reference in New Issue
Block a user