Merge pull request #126 from narquette/week4_doc_string

Week 4 Exercise - Add Document Strings For Existing Python File
2025-01-29 17:08:52 -05:00
parent f7096730ba b582e41ecf
commit d76b462a5b
4 changed files with 280 additions and 0 deletions
--- a/week4/community-contributions/doc_string_exercise/README.md
+++ b/week4/community-contributions/doc_string_exercise/README.md
@@ -0,0 +1,29 @@
 # Script Overview
 The documentation will show you how to run the python script generate_doc_string.py. It is designed to take input
 from an existing python file and create a new one with a suffix ('claude' or 'gpt'). If you do not specify and llm 
 model, it will default to claude.
 # How to run
 ```powershell
 conda activate llms
 cd <script_location>
 python generate_doc_string -fp <full_file_path> -llm <name_of_model>
 ```
 # Show Help Instructions
 ```shell
 python generate_doc_string --help
 ```
 # Error Checking
 1) File Path Existence
 If the file path doesn't exist, the script will stop running and print out an error.
 2) LLM Model Choice
 If you choose something other than 'gpt' or 'claude', it will show and assertion error.
--- a/week4/community-contributions/doc_string_exercise/data/original_file.py
+++ b/week4/community-contributions/doc_string_exercise/data/original_file.py
@@ -0,0 +1,19 @@
 def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result
 def calculate_2(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result
--- a/week4/community-contributions/doc_string_exercise/generate_doc_string.py
+++ b/week4/community-contributions/doc_string_exercise/generate_doc_string.py
@@ -0,0 +1,85 @@
 from argparse import ArgumentParser
 import os
 from dotenv import load_dotenv
 from openai import OpenAI
 import anthropic
 from utils import add_doc_string, Model, get_system_message
 from pathlib import Path
 def main():
    # get run time arguments
    parser = ArgumentParser(
        prog='Generate Doc String for an existing functions',
        description='Run Doc String for a given file and model',
    )
    parser.add_argument(
        '-fp',
        '--file_path',
        help='Enter the file path to the script that will be updated with doc strings',
        default=None
    )
    parser.add_argument(
        '-llm',
        '--llm_model',
        help='Choose the LLM model that will create the doc strings',
        default='claude'
    )
    # get run time arguments
    args = parser.parse_args()
    file_path = Path(args.file_path)
    llm_model = args.llm_model
    # check for file path
    assert file_path.exists(), f"File Path {str(file_path.as_posix())} doesn't exist. Please try again."
    # check for value llm values
    assert llm_model in ['gpt', 'claude'], (f"Invalid model chosen '{llm_model}'. "
                                            f"Please choose a valid model ('gpt' or 'claude')")
    # load keys and environment variables
    load_dotenv()
    os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
    os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
    os.environ['HF_TOKEN'] = os.getenv('HF_INF_TOKEN', 'your-key-if-not-using-env')
    # get system messages
    system_message = get_system_message()
    # get model info
    model_info = {
        'gpt': {
            'client': OpenAI(),
            'model': Model.OPENAI_MODEL.value,
        },
        'claude': {
            'client': anthropic.Anthropic(),
            'model': Model.CLAUDE_MODEL.value
        }
    }
    # add standard argumens
    model_info[llm_model].update(
        {
            'file_path': file_path,
            'system_message': system_message
        }
    )
    # convert python code to c++ code using open ai
    print(f"\nSTARTED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
    add_doc_string(**model_info[llm_model])
    print(f"\nFINISHED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
 if __name__ == '__main__':
    main()
--- a/week4/community-contributions/doc_string_exercise/utils.py
+++ b/week4/community-contributions/doc_string_exercise/utils.py
@@ -0,0 +1,147 @@
 from enum import Enum
 from pathlib import Path
 class Model(Enum):
    """
    Enumeration of supported AI models.
    """
    OPENAI_MODEL = "gpt-4o"
    CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
 def get_system_message() -> str:
    """
    Generate a system message for AI assistants creating docstrings.
    :return: A string containing instructions for the AI assistant.
    :rtype: str
    """
    system_message = "You are an assistant that creates doc strings in reStructure Text format for an existing python function. "
    system_message += "Respond only with an updated python function; use comments sparingly and do not provide any explanation other than occasional comments. "
    system_message += "Be sure to include typing annotation for each function argument or key word argument and return object types."
    return system_message
 def user_prompt_for(python: str) -> str:
    """
    Generate a user prompt for rewriting Python functions with docstrings.
    :param python: The Python code to be rewritten.
    :type python: str
    :return: A string containing the user prompt and the Python code.
    :rtype: str
    """
    user_prompt = "Rewrite this Python function with doc strings in the reStructuredText style."
    user_prompt += "Respond only with python code; do not explain your work other than a few comments. "
    user_prompt += "Be sure to write a description of the function purpose with typing for each argument and return\n\n"
    user_prompt += python
    return user_prompt
 def messages_for(python: str, system_message: str) -> list:
    """
    Create a list of messages for the AI model.
    :param python: The Python code to be processed.
    :type python: str
    :param system_message: The system message for the AI assistant.
    :type system_message: str
    :return: A list of dictionaries containing role and content for each message.
    :rtype: list
    """
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]
 def write_output(output: str, file_suffix: str, file_path: Path) -> None:
    """
    Write the processed output to a file.
    :param output: The processed Python code with docstrings.
    :type output: str
    :param file_suffix: The suffix to be added to the output file name.
    :type file_suffix: str
    :param file_path: The path of the input file.
    :type file_path: Path
    :return: None
    """
    code = output.replace("", "").replace("", "")
    out_file = file_path.with_name(f"{file_path.stem}{file_suffix if file_suffix else ''}.py")
    out_file.write_text(code)
 def add_doc_string(client: object, system_message: str, file_path: Path, model: str) -> None:
    """
    Add docstrings to a Python file using the specified AI model.
    :param client: The AI client object.
    :type client: object
    :param system_message: The system message for the AI assistant.
    :type system_message: str
    :param file_path: The path of the input Python file.
    :type file_path: Path
    :param model: The AI model to be used.
    :type model: str
    :return: None
    """
    if 'gpt' in model:
        add_doc_string_gpt(client=client, system_message=system_message, file_path=file_path, model=model)
    else:
        add_doc_string_claude(client=client, system_message=system_message, file_path=file_path, model=model)
 def add_doc_string_gpt(client: object, system_message: str, file_path: Path, model: str = 'gpt-4o') -> None:
    """
    Add docstrings to a Python file using GPT model.
    :param client: The OpenAI client object.
    :type client: object
    :param system_message: The system message for the AI assistant.
    :type system_message: str
    :param file_path: The path of the input Python file.
    :type file_path: Path
    :param model: The GPT model to be used, defaults to 'gpt-4o'.
    :type model: str
    :return: None
    """
    code_text = file_path.read_text(encoding='utf-8')
    stream = client.chat.completions.create(model=model, messages=messages_for(code_text, system_message), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        print(fragment, end='', flush=True)
    write_output(reply, file_suffix='_gpt', file_path=file_path)
 def add_doc_string_claude(client: object, system_message: str, file_path: Path, model: str = 'claude-3-5-sonnet-20240620') -> None:
    """
    Add docstrings to a Python file using Claude model.
    :param client: The Anthropic client object.
    :type client: object
    :param system_message: The system message for the AI assistant.
    :type system_message: str
    :param file_path: The path of the input Python file.
    :type file_path: Path
    :param model: The Claude model to be used, defaults to 'claude-3-5-sonnet-20240620'.
    :type model: str
    :return: None
    """
    code_text = file_path.read_text(encoding='utf-8')
    result = client.messages.stream(
        model=model,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(code_text)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            print(text, end="", flush=True)
    write_output(reply, file_suffix='_claude', file_path=file_path)