Merge pull request #126 from narquette/week4_doc_string

Week 4 Exercise - Add Document Strings For Existing Python File
2025-01-29 17:08:52 -05:00
parent f7096730ba b582e41ecf
commit d76b462a5b
4 changed files with 280 additions and 0 deletions
--- a/week4/community-contributions/doc_string_exercise/README.md
+++ b/week4/community-contributions/doc_string_exercise/README.md
@@ -0,0 +1,29 @@
+# Script Overview
+
+The documentation will show you how to run the python script generate_doc_string.py. It is designed to take input
+from an existing python file and create a new one with a suffix ('claude' or 'gpt'). If you do not specify and llm 
+model, it will default to claude.
+
+# How to run
+
+```powershell
+conda activate llms
+cd <script_location>
+python generate_doc_string -fp <full_file_path> -llm <name_of_model>
+```
+
+# Show Help Instructions
+
+```shell
+python generate_doc_string --help
+```
+
+# Error Checking
+
+1) File Path Existence
+
+If the file path doesn't exist, the script will stop running and print out an error.
+
+2) LLM Model Choice
+
+If you choose something other than 'gpt' or 'claude', it will show and assertion error.
--- a/week4/community-contributions/doc_string_exercise/data/original_file.py
+++ b/week4/community-contributions/doc_string_exercise/data/original_file.py
@@ -0,0 +1,19 @@
+
+def calculate(iterations, param1, param2):
+    result = 1.0
+    for i in range(1, iterations+1):
+        j = i * param1 - param2
+        result -= (1/j)
+        j = i * param1 + param2
+        result += (1/j)
+    return result
+
+
+def calculate_2(iterations, param1, param2):
+    result = 1.0
+    for i in range(1, iterations+1):
+        j = i * param1 - param2
+        result -= (1/j)
+        j = i * param1 + param2
+        result += (1/j)
+    return result
--- a/week4/community-contributions/doc_string_exercise/generate_doc_string.py
+++ b/week4/community-contributions/doc_string_exercise/generate_doc_string.py
@@ -0,0 +1,85 @@
+from argparse import ArgumentParser
+import os
+from dotenv import load_dotenv
+from openai import OpenAI
+import anthropic
+from utils import add_doc_string, Model, get_system_message
+from pathlib import Path
+
+
+def main():
+
+    # get run time arguments
+    parser = ArgumentParser(
+        prog='Generate Doc String for an existing functions',
+        description='Run Doc String for a given file and model',
+    )
+    parser.add_argument(
+        '-fp',
+        '--file_path',
+        help='Enter the file path to the script that will be updated with doc strings',
+        default=None
+    )
+    parser.add_argument(
+        '-llm',
+        '--llm_model',
+        help='Choose the LLM model that will create the doc strings',
+        default='claude'
+    )
+
+    # get run time arguments
+    args = parser.parse_args()
+    file_path = Path(args.file_path)
+    llm_model = args.llm_model
+
+    # check for file path
+    assert file_path.exists(), f"File Path {str(file_path.as_posix())} doesn't exist. Please try again."
+
+    # check for value llm values
+    assert llm_model in ['gpt', 'claude'], (f"Invalid model chosen '{llm_model}'. "
+                                            f"Please choose a valid model ('gpt' or 'claude')")
+
+    # load keys and environment variables
+    load_dotenv()
+    os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
+    os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
+    os.environ['HF_TOKEN'] = os.getenv('HF_INF_TOKEN', 'your-key-if-not-using-env')
+
+    # get system messages
+    system_message = get_system_message()
+
+    # get model info
+    model_info = {
+        'gpt': {
+            'client': OpenAI(),
+            'model': Model.OPENAI_MODEL.value,
+        },
+        'claude': {
+            'client': anthropic.Anthropic(),
+            'model': Model.CLAUDE_MODEL.value
+        }
+    }
+
+    # add standard argumens
+    model_info[llm_model].update(
+        {
+            'file_path': file_path,
+            'system_message': system_message
+        }
+    )
+
+    # convert python code to c++ code using open ai
+    print(f"\nSTARTED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
+    add_doc_string(**model_info[llm_model])
+    print(f"\nFINISHED | Doc Strings Using {llm_model.upper()} for file {str(file_path)}\n\n")
+
+
+if __name__ == '__main__':
+
+    main()
+
+
+
+
+
+
--- a/week4/community-contributions/doc_string_exercise/utils.py
+++ b/week4/community-contributions/doc_string_exercise/utils.py
@@ -0,0 +1,147 @@
+from enum import Enum
+from pathlib import Path
+
+
+class Model(Enum):
+    """
+    Enumeration of supported AI models.
+    """
+    OPENAI_MODEL = "gpt-4o"
+    CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
+
+
+def get_system_message() -> str:
+    """
+    Generate a system message for AI assistants creating docstrings.
+
+    :return: A string containing instructions for the AI assistant.
+    :rtype: str
+    """
+    system_message = "You are an assistant that creates doc strings in reStructure Text format for an existing python function. "
+    system_message += "Respond only with an updated python function; use comments sparingly and do not provide any explanation other than occasional comments. "
+    system_message += "Be sure to include typing annotation for each function argument or key word argument and return object types."
+
+    return system_message
+
+
+def user_prompt_for(python: str) -> str:
+    """
+    Generate a user prompt for rewriting Python functions with docstrings.
+
+    :param python: The Python code to be rewritten.
+    :type python: str
+    :return: A string containing the user prompt and the Python code.
+    :rtype: str
+    """
+    user_prompt = "Rewrite this Python function with doc strings in the reStructuredText style."
+    user_prompt += "Respond only with python code; do not explain your work other than a few comments. "
+    user_prompt += "Be sure to write a description of the function purpose with typing for each argument and return\n\n"
+    user_prompt += python
+    return user_prompt
+
+
+def messages_for(python: str, system_message: str) -> list:
+    """
+    Create a list of messages for the AI model.
+
+    :param python: The Python code to be processed.
+    :type python: str
+    :param system_message: The system message for the AI assistant.
+    :type system_message: str
+    :return: A list of dictionaries containing role and content for each message.
+    :rtype: list
+    """
+    return [
+        {"role": "system", "content": system_message},
+        {"role": "user", "content": user_prompt_for(python)}
+    ]
+
+
+def write_output(output: str, file_suffix: str, file_path: Path) -> None:
+    """
+    Write the processed output to a file.
+
+    :param output: The processed Python code with docstrings.
+    :type output: str
+    :param file_suffix: The suffix to be added to the output file name.
+    :type file_suffix: str
+    :param file_path: The path of the input file.
+    :type file_path: Path
+    :return: None
+    """
+    code = output.replace("", "").replace("", "")
+    out_file = file_path.with_name(f"{file_path.stem}{file_suffix if file_suffix else ''}.py")
+    out_file.write_text(code)
+
+
+def add_doc_string(client: object, system_message: str, file_path: Path, model: str) -> None:
+    """
+    Add docstrings to a Python file using the specified AI model.
+
+    :param client: The AI client object.
+    :type client: object
+    :param system_message: The system message for the AI assistant.
+    :type system_message: str
+    :param file_path: The path of the input Python file.
+    :type file_path: Path
+    :param model: The AI model to be used.
+    :type model: str
+    :return: None
+    """
+    if 'gpt' in model:
+        add_doc_string_gpt(client=client, system_message=system_message, file_path=file_path, model=model)
+    else:
+        add_doc_string_claude(client=client, system_message=system_message, file_path=file_path, model=model)
+
+
+def add_doc_string_gpt(client: object, system_message: str, file_path: Path, model: str = 'gpt-4o') -> None:
+    """
+    Add docstrings to a Python file using GPT model.
+
+    :param client: The OpenAI client object.
+    :type client: object
+    :param system_message: The system message for the AI assistant.
+    :type system_message: str
+    :param file_path: The path of the input Python file.
+    :type file_path: Path
+    :param model: The GPT model to be used, defaults to 'gpt-4o'.
+    :type model: str
+    :return: None
+    """
+    code_text = file_path.read_text(encoding='utf-8')
+    stream = client.chat.completions.create(model=model, messages=messages_for(code_text, system_message), stream=True)
+    reply = ""
+    for chunk in stream:
+        fragment = chunk.choices[0].delta.content or ""
+        reply += fragment
+        print(fragment, end='', flush=True)
+    write_output(reply, file_suffix='_gpt', file_path=file_path)
+
+
+def add_doc_string_claude(client: object, system_message: str, file_path: Path, model: str = 'claude-3-5-sonnet-20240620') -> None:
+    """
+    Add docstrings to a Python file using Claude model.
+
+    :param client: The Anthropic client object.
+    :type client: object
+    :param system_message: The system message for the AI assistant.
+    :type system_message: str
+    :param file_path: The path of the input Python file.
+    :type file_path: Path
+    :param model: The Claude model to be used, defaults to 'claude-3-5-sonnet-20240620'.
+    :type model: str
+    :return: None
+    """
+    code_text = file_path.read_text(encoding='utf-8')
+    result = client.messages.stream(
+        model=model,
+        max_tokens=2000,
+        system=system_message,
+        messages=[{"role": "user", "content": user_prompt_for(code_text)}],
+    )
+    reply = ""
+    with result as stream:
+        for text in stream.text_stream:
+            reply += text
+            print(text, end="", flush=True)
+    write_output(reply, file_suffix='_claude', file_path=file_path)