{ "cells": [ { "cell_type": "markdown", "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9", "metadata": {}, "source": [ "# Python Code Documentation Assistant\n", "\n", "The requirement: use a Frontier model to add docstrings and comments to your Python code\n" ] }, { "cell_type": "markdown", "id": "d4634170-c444-4326-9e68-5f87c63fa0e0", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f", "metadata": {}, "outputs": [], "source": [ "!pip install -U -q \"google-genai\"" ] }, { "cell_type": "code", "execution_count": null, "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3", "metadata": {}, "outputs": [], "source": [ "import os\n", "import io\n", "import sys\n", "from dotenv import load_dotenv\n", "from openai import OpenAI\n", "from google import genai\n", "from google.genai import types\n", "import anthropic\n", "from IPython.display import Markdown, display, update_display\n", "import gradio as gr\n", "import subprocess" ] }, { "cell_type": "markdown", "id": "f91e8b32-4c98-4210-a1e1-bfe0b1fddab7", "metadata": {}, "source": [ "## Environment" ] }, { "cell_type": "code", "execution_count": null, "id": "4f672e1c-87e9-4865-b760-370fa605e614", "metadata": {}, "outputs": [], "source": [ "load_dotenv(override=True)\n", "openai_api_key = os.getenv('OPENAI_API_KEY')\n", "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", "google_api_key = os.getenv('GOOGLE_API_KEY')\n", "\n", "if openai_api_key:\n", " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", "else:\n", " print(\"OpenAI API Key not set\")\n", " \n", "if anthropic_api_key:\n", " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", "else:\n", " print(\"Anthropic API Key not set\")\n", "\n", "if google_api_key:\n", " print(f\"Google API Key exists and begins {google_api_key[:4]}\")\n", "else:\n", " print(\"Google API Key not set\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da", "metadata": {}, "outputs": [], "source": [ "openai = OpenAI()\n", "claude = anthropic.Anthropic()\n", "gemini = genai.Client()\n", "\n", "OPENAI_MODEL = \"o4-mini\"\n", "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n", "GEMINI_MODEL = \"gemini-2.5-flash\"" ] }, { "cell_type": "markdown", "id": "88a18c58-40d5-4592-8dd3-d7c7b0d951aa", "metadata": {}, "source": [ "## Prompts" ] }, { "cell_type": "code", "execution_count": null, "id": "6896636f-923e-4a2c-9d6c-fac07828a201", "metadata": {}, "outputs": [], "source": [ "system_message = \"\"\"\n", "You are an assistant that documents Python code. \n", "Your task: \n", "- Add concise, clear, and informative docstrings to functions, classes, and modules. \n", "- Add inline comments only where they improve readability or clarify intent. \n", "- Do not modify the code logic or structure. \n", "- Respond with Python code only. \n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb", "metadata": {}, "outputs": [], "source": [ "def user_prompt_for(python):\n", " user_prompt = \"Add docstrings and comments to the following Python code:\\n\"\n", " user_prompt += python\n", " return user_prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "c6190659-f54c-4951-bef4-4960f8e51cc4", "metadata": {}, "outputs": [], "source": [ "def messages_for(python):\n", " return [\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_prompt_for(python)}\n", " ]" ] }, { "cell_type": "markdown", "id": "624e5066-bcf6-490d-a790-608d2bb34184", "metadata": {}, "source": [ "## Helper functions" ] }, { "cell_type": "code", "execution_count": null, "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b", "metadata": {}, "outputs": [], "source": [ "def write_output(python, filename_suffix):\n", " filename = f\"annotated_{filename_suffix}.py\"\n", " code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n", " with open(filename, \"w\") as f:\n", " f.write(code)\n", " print(f\"\\nWritten code to {filename}\")\n", " return filename" ] }, { "cell_type": "code", "execution_count": null, "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9", "metadata": {}, "outputs": [], "source": [ "def annotate_with_gpt(python, task_name): \n", " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", " reply = \"\"\n", " for chunk in stream:\n", " fragment = chunk.choices[0].delta.content or \"\"\n", " reply += fragment\n", " print(fragment, end='', flush=True)\n", " return write_output(reply, f\"{task_name}_gpt\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d", "metadata": {}, "outputs": [], "source": [ "def annotate_with_claude(python, task_name):\n", " result = claude.messages.stream(\n", " model=CLAUDE_MODEL,\n", " max_tokens=2000,\n", " system=system_message,\n", " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", " )\n", " reply = \"\"\n", " with result as stream:\n", " for text in stream.text_stream:\n", " reply += text\n", " print(text, end=\"\", flush=True)\n", " return write_output(reply, f\"{task_name}_claude\")" ] }, { "cell_type": "code", "execution_count": null, "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf", "metadata": {}, "outputs": [], "source": [ "def annotate_with_gemini(python, task_name):\n", " reply = gemini.models.generate_content(\n", " model=GEMINI_MODEL,\n", " contents=user_prompt_for(python),\n", " config=types.GenerateContentConfig(\n", " system_instruction=system_message,\n", " )\n", " )\n", "\n", " print(reply.text)\n", " return write_output(reply.text, f\"{task_name}_gemini\")" ] }, { "cell_type": "markdown", "id": "028dcfdd-2d52-4e11-a79e-2214a97cb26d", "metadata": {}, "source": [ "# Run the Annotator" ] }, { "cell_type": "markdown", "id": "7462d9f9-6215-4fb0-9471-1d0141d33205", "metadata": {}, "source": [ "## Pi example" ] }, { "cell_type": "code", "execution_count": null, "id": "a1cbb778-fa57-43de-b04b-ed523f396c38", "metadata": {}, "outputs": [], "source": [ "pi = \"\"\"\n", "import time\n", "\n", "def calculate(iterations, param1, param2):\n", " result = 1.0\n", " for i in range(1, iterations+1):\n", " j = i * param1 - param2\n", " result -= (1/j)\n", " j = i * param1 + param2\n", " result += (1/j)\n", " return result\n", "\n", "start_time = time.time()\n", "result = calculate(100_000_000, 4, 1) * 4\n", "end_time = time.time()\n", "\n", "print(f\"Result: {result:.12f}\")\n", "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "105db6f9-343c-491d-8e44-3a5328b81719", "metadata": {}, "outputs": [], "source": [ "gpt_pi = annotate_with_gpt(pi, \"pi))" ] }, { "cell_type": "code", "execution_count": null, "id": "415819d0-fc95-4f78-a6ae-5c7d6781c6a7", "metadata": {}, "outputs": [], "source": [ "# check if the script works\n", "\n", "exec(open(gpt_pi).read())" ] }, { "cell_type": "code", "execution_count": null, "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6", "metadata": { "scrolled": true }, "outputs": [], "source": [ "claude_pi = annotate_with_claude(pi, \"pi\")" ] }, { "cell_type": "code", "execution_count": null, "id": "52f5b710-0dea-4884-8ed7-a94059d88281", "metadata": {}, "outputs": [], "source": [ "exec(open(claude_pi).read())" ] }, { "cell_type": "code", "execution_count": null, "id": "01f331f2-caac-48f6-9a03-8a228ee521bc", "metadata": {}, "outputs": [], "source": [ "gemini_pi = annotate_with_gemini(pi, \"pi\")" ] }, { "cell_type": "code", "execution_count": null, "id": "23529942-53fa-46ad-a5db-1f3096dd6607", "metadata": {}, "outputs": [], "source": [ "exec(open(gemini_pi).read())" ] }, { "cell_type": "markdown", "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f", "metadata": {}, "source": [ "## Hard example" ] }, { "cell_type": "code", "execution_count": null, "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0", "metadata": {}, "outputs": [], "source": [ "python_hard = \"\"\"# Be careful to support large number sizes\n", "\n", "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", " value = seed\n", " while True:\n", " value = (a * value + c) % m\n", " yield value\n", " \n", "def max_subarray_sum(n, seed, min_val, max_val):\n", " lcg_gen = lcg(seed)\n", " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", " max_sum = float('-inf')\n", " for i in range(n):\n", " current_sum = 0\n", " for j in range(i, n):\n", " current_sum += random_numbers[j]\n", " if current_sum > max_sum:\n", " max_sum = current_sum\n", " return max_sum\n", "\n", "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", " total_sum = 0\n", " lcg_gen = lcg(initial_seed)\n", " for _ in range(20):\n", " seed = next(lcg_gen)\n", " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", " return total_sum\n", "\n", "# Parameters\n", "n = 10000 # Number of random numbers\n", "initial_seed = 42 # Initial seed for the LCG\n", "min_val = -10 # Minimum value of random numbers\n", "max_val = 10 # Maximum value of random numbers\n", "\n", "# Timing the function\n", "import time\n", "start_time = time.time()\n", "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", "end_time = time.time()\n", "\n", "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899", "metadata": {}, "outputs": [], "source": [ "exec(python_hard)" ] }, { "cell_type": "code", "execution_count": null, "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8", "metadata": {}, "outputs": [], "source": [ "gpt_hard = annotate_with_gpt(python_hard, \"hard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "80a15259-3d51-47b8-953c-6271fbd4b6fb", "metadata": {}, "outputs": [], "source": [ "exec(open(gpt_hard).read())" ] }, { "cell_type": "code", "execution_count": null, "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c", "metadata": {}, "outputs": [], "source": [ "gemini_hard = annotate_with_gemini(python_hard, \"hard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ad6eecc8-0517-43d8-bd21-5bbdedae7a10", "metadata": {}, "outputs": [], "source": [ "exec(open(gemini_hard).read())" ] }, { "cell_type": "code", "execution_count": null, "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79", "metadata": { "scrolled": true }, "outputs": [], "source": [ "claude_hard = annotate_with_claude(python_hard, \"hard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "47af1516-455f-4d1c-8a1c-2da5a38c0ba5", "metadata": {}, "outputs": [], "source": [ "exec(open(claude_hard).read())" ] }, { "cell_type": "markdown", "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72", "metadata": {}, "source": [ "# Streaming" ] }, { "cell_type": "code", "execution_count": null, "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0", "metadata": {}, "outputs": [], "source": [ "def stream_gpt(python): \n", " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", " reply = \"\"\n", " for chunk in stream:\n", " fragment = chunk.choices[0].delta.content or \"\"\n", " reply += fragment\n", " yield reply.replace('```python\\n','').replace('```','')" ] }, { "cell_type": "code", "execution_count": null, "id": "8669f56b-8314-4582-a167-78842caea131", "metadata": {}, "outputs": [], "source": [ "def stream_claude(python):\n", " result = claude.messages.stream(\n", " model=CLAUDE_MODEL,\n", " max_tokens=2000,\n", " system=system_message,\n", " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", " )\n", " reply = \"\"\n", " with result as stream:\n", " for text in stream.text_stream:\n", " reply += text\n", " yield reply.replace('```python\\n','').replace('```','')" ] }, { "cell_type": "code", "execution_count": null, "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2", "metadata": {}, "outputs": [], "source": [ "def stream_gemini(python):\n", " stream = gemini.models.generate_content_stream(\n", " model=GEMINI_MODEL,\n", " contents=user_prompt_for(python),\n", " config=types.GenerateContentConfig(\n", " system_instruction=system_message,\n", " ),\n", " )\n", " reply = \"\"\n", " for chunk in stream:\n", " reply += chunk.text\n", " yield reply.replace('```python\\n','').replace('```','')" ] }, { "cell_type": "code", "execution_count": null, "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d", "metadata": {}, "outputs": [], "source": [ "def annotate(python, model):\n", " if model == \"GPT\":\n", " result = stream_gpt(python)\n", " elif model == \"Claude\":\n", " result = stream_claude(python)\n", " elif model == \"Gemini\":\n", " result = stream_gemini(python)\n", " else:\n", " raise ValueError(\"Unknown model\")\n", " for stream_so_far in result:\n", " yield stream_so_far " ] }, { "cell_type": "code", "execution_count": null, "id": "19bf2bff-a822-4009-a539-f003b1651383", "metadata": {}, "outputs": [], "source": [ "def execute_python(code):\n", " try:\n", " output = io.StringIO()\n", " sys.stdout = output\n", " exec(code)\n", " finally:\n", " sys.stdout = sys.__stdout__\n", " return output.getvalue()" ] }, { "cell_type": "code", "execution_count": null, "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442", "metadata": {}, "outputs": [], "source": [ "css = \"\"\"\n", ".python {background-color: #306998;}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "76167ea9-d0a1-4bc6-8d73-633d3b8c8df6", "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "\n", "# Parameters\n", "LINES = 25\n", "LINE_HEIGHT = 20 # px, typical CodeMirror line height\n", "PADDING = 10 # px, top + bottom padding\n", "\n", "CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING\n", "\n", "\n", "with gr.Blocks(\n", " theme=gr.themes.Soft(),\n", " css=f\"\"\"\n", "#code_input .cm-editor, #annotated_code .cm-editor {{\n", " height: {CODE_HEIGHT}px !important;\n", " overflow-y: auto !important;\n", "}}\n", "\"\"\"\n", ") as demo_v2:\n", " gr.Markdown(\"## 🐍 Annotate Python Code with Docstrings and Comments\")\n", "\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " gr.Markdown(\"### Python code:\")\n", " code_input = gr.Code(\n", " language=\"python\", \n", " value=python_hard,\n", " lines=25,\n", " elem_id=\"code_input\"\n", " )\n", " \n", " with gr.Column(scale=1):\n", " gr.Markdown(\"### Annotated code:\")\n", " annotated_output = gr.Code(\n", " language=\"python\",\n", " lines=25,\n", " elem_id=\"annotated_code\"\n", " )\n", "\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " model_dropdown = gr.Dropdown(\n", " choices=[\"Gemini\", \"GPT-4\", \"Claude\"],\n", " value=\"Gemini\",\n", " label=\"Select model\"\n", " )\n", " with gr.Column(scale=1):\n", " annotate_btn = gr.Button(\"✨ Annotate code\", variant=\"primary\")\n", " run_btn = gr.Button(\"▶️ Run Python\", variant=\"secondary\")\n", "\n", " with gr.Row():\n", " with gr.Column():\n", " gr.Markdown(\"### Python result:\")\n", " result_output = gr.Textbox(\n", " lines=5, \n", " label=\"Output\",\n", " interactive=False\n", " )\n", " \n", " annotate_btn.click(\n", " annotate,\n", " inputs=[code_input, model_dropdown],\n", " outputs=[annotated_output]\n", " )\n", " run_btn.click(execute_python, inputs=[code_input], outputs=[result_output])\n", "\n", " \n", "demo_v2.launch(inbrowser=True)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }