{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
   "metadata": {},
   "source": [
    "# Python Code Documentation Assistant\n",
    "\n",
    "The requirement: use a Frontier model to add docstrings and comments to your Python code\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4634170-c444-4326-9e68-5f87c63fa0e0",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -U -q \"google-genai\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import io\n",
    "import sys\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "from google import genai\n",
    "from google.genai import types\n",
    "import anthropic\n",
    "from IPython.display import Markdown, display, update_display\n",
    "import gradio as gr\n",
    "import subprocess"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f91e8b32-4c98-4210-a1e1-bfe0b1fddab7",
   "metadata": {},
   "source": [
    "## Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f672e1c-87e9-4865-b760-370fa605e614",
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv(override=True)\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
    "\n",
    "if openai_api_key:\n",
    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "if anthropic_api_key:\n",
    "    print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
    "else:\n",
    "    print(\"Anthropic API Key not set\")\n",
    "\n",
    "if google_api_key:\n",
    "    print(f\"Google API Key exists and begins {google_api_key[:4]}\")\n",
    "else:\n",
    "    print(\"Google API Key not set\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
   "metadata": {},
   "outputs": [],
   "source": [
    "openai = OpenAI()\n",
    "claude = anthropic.Anthropic()\n",
    "gemini = genai.Client()\n",
    "\n",
    "OPENAI_MODEL = \"o4-mini\"\n",
    "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n",
    "GEMINI_MODEL = \"gemini-2.5-flash\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "88a18c58-40d5-4592-8dd3-d7c7b0d951aa",
   "metadata": {},
   "source": [
    "## Prompts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_message = \"\"\"\n",
    "You are an assistant that documents Python code.  \n",
    "Your task:  \n",
    "- Add concise, clear, and informative docstrings to functions, classes, and modules.  \n",
    "- Add inline comments only where they improve readability or clarify intent.  \n",
    "- Do not modify the code logic or structure.  \n",
    "- Respond with Python code only.  \n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_prompt_for(python):\n",
    "    user_prompt = \"Add docstrings and comments to the following Python code:\\n\"\n",
    "    user_prompt += python\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def messages_for(python):\n",
    "    return [\n",
    "        {\"role\": \"system\", \"content\": system_message},\n",
    "        {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
    "    ]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "624e5066-bcf6-490d-a790-608d2bb34184",
   "metadata": {},
   "source": [
    "## Helper functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def write_output(python, filename_suffix):\n",
    "    filename = f\"annotated_{filename_suffix}.py\"\n",
    "    code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n",
    "    with open(filename, \"w\") as f:\n",
    "        f.write(code)\n",
    "    print(f\"\\nWritten code to {filename}\")\n",
    "    return filename"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def annotate_with_gpt(python, task_name):    \n",
    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
    "    reply = \"\"\n",
    "    for chunk in stream:\n",
    "        fragment = chunk.choices[0].delta.content or \"\"\n",
    "        reply += fragment\n",
    "        print(fragment, end='', flush=True)\n",
    "    return write_output(reply, f\"{task_name}_gpt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def annotate_with_claude(python, task_name):\n",
    "    result = claude.messages.stream(\n",
    "        model=CLAUDE_MODEL,\n",
    "        max_tokens=2000,\n",
    "        system=system_message,\n",
    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
    "    )\n",
    "    reply = \"\"\n",
    "    with result as stream:\n",
    "        for text in stream.text_stream:\n",
    "            reply += text\n",
    "            print(text, end=\"\", flush=True)\n",
    "    return write_output(reply, f\"{task_name}_claude\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def annotate_with_gemini(python, task_name):\n",
    "    reply = gemini.models.generate_content(\n",
    "        model=GEMINI_MODEL,\n",
    "        contents=user_prompt_for(python),\n",
    "        config=types.GenerateContentConfig(\n",
    "            system_instruction=system_message,\n",
    "        )\n",
    "    )\n",
    "\n",
    "    print(reply.text)\n",
    "    return write_output(reply.text, f\"{task_name}_gemini\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "028dcfdd-2d52-4e11-a79e-2214a97cb26d",
   "metadata": {},
   "source": [
    "# Run the Annotator"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7462d9f9-6215-4fb0-9471-1d0141d33205",
   "metadata": {},
   "source": [
    "## Pi example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
   "metadata": {},
   "outputs": [],
   "source": [
    "pi = \"\"\"\n",
    "import time\n",
    "\n",
    "def calculate(iterations, param1, param2):\n",
    "    result = 1.0\n",
    "    for i in range(1, iterations+1):\n",
    "        j = i * param1 - param2\n",
    "        result -= (1/j)\n",
    "        j = i * param1 + param2\n",
    "        result += (1/j)\n",
    "    return result\n",
    "\n",
    "start_time = time.time()\n",
    "result = calculate(100_000_000, 4, 1) * 4\n",
    "end_time = time.time()\n",
    "\n",
    "print(f\"Result: {result:.12f}\")\n",
    "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "105db6f9-343c-491d-8e44-3a5328b81719",
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt_pi = annotate_with_gpt(pi, \"pi))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "415819d0-fc95-4f78-a6ae-5c7d6781c6a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# check if the script works\n",
    "\n",
    "exec(open(gpt_pi).read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "claude_pi = annotate_with_claude(pi, \"pi\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52f5b710-0dea-4884-8ed7-a94059d88281",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(open(claude_pi).read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01f331f2-caac-48f6-9a03-8a228ee521bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "gemini_pi = annotate_with_gemini(pi, \"pi\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23529942-53fa-46ad-a5db-1f3096dd6607",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(open(gemini_pi).read())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f",
   "metadata": {},
   "source": [
    "## Hard example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0",
   "metadata": {},
   "outputs": [],
   "source": [
    "python_hard = \"\"\"# Be careful to support large number sizes\n",
    "\n",
    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
    "    value = seed\n",
    "    while True:\n",
    "        value = (a * value + c) % m\n",
    "        yield value\n",
    "        \n",
    "def max_subarray_sum(n, seed, min_val, max_val):\n",
    "    lcg_gen = lcg(seed)\n",
    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
    "    max_sum = float('-inf')\n",
    "    for i in range(n):\n",
    "        current_sum = 0\n",
    "        for j in range(i, n):\n",
    "            current_sum += random_numbers[j]\n",
    "            if current_sum > max_sum:\n",
    "                max_sum = current_sum\n",
    "    return max_sum\n",
    "\n",
    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
    "    total_sum = 0\n",
    "    lcg_gen = lcg(initial_seed)\n",
    "    for _ in range(20):\n",
    "        seed = next(lcg_gen)\n",
    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
    "    return total_sum\n",
    "\n",
    "# Parameters\n",
    "n = 10000         # Number of random numbers\n",
    "initial_seed = 42 # Initial seed for the LCG\n",
    "min_val = -10     # Minimum value of random numbers\n",
    "max_val = 10      # Maximum value of random numbers\n",
    "\n",
    "# Timing the function\n",
    "import time\n",
    "start_time = time.time()\n",
    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
    "end_time = time.time()\n",
    "\n",
    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(python_hard)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt_hard = annotate_with_gpt(python_hard, \"hard\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80a15259-3d51-47b8-953c-6271fbd4b6fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(open(gpt_hard).read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "gemini_hard = annotate_with_gemini(python_hard, \"hard\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad6eecc8-0517-43d8-bd21-5bbdedae7a10",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(open(gemini_hard).read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "claude_hard = annotate_with_claude(python_hard, \"hard\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47af1516-455f-4d1c-8a1c-2da5a38c0ba5",
   "metadata": {},
   "outputs": [],
   "source": [
    "exec(open(claude_hard).read())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72",
   "metadata": {},
   "source": [
    "# Streaming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def stream_gpt(python):    \n",
    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
    "    reply = \"\"\n",
    "    for chunk in stream:\n",
    "        fragment = chunk.choices[0].delta.content or \"\"\n",
    "        reply += fragment\n",
    "        yield reply.replace('```python\\n','').replace('```','')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8669f56b-8314-4582-a167-78842caea131",
   "metadata": {},
   "outputs": [],
   "source": [
    "def stream_claude(python):\n",
    "    result = claude.messages.stream(\n",
    "        model=CLAUDE_MODEL,\n",
    "        max_tokens=2000,\n",
    "        system=system_message,\n",
    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
    "    )\n",
    "    reply = \"\"\n",
    "    with result as stream:\n",
    "        for text in stream.text_stream:\n",
    "            reply += text\n",
    "            yield reply.replace('```python\\n','').replace('```','')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def stream_gemini(python):\n",
    "    stream = gemini.models.generate_content_stream(\n",
    "        model=GEMINI_MODEL,\n",
    "        contents=user_prompt_for(python),\n",
    "        config=types.GenerateContentConfig(\n",
    "            system_instruction=system_message,\n",
    "        ),\n",
    "    )\n",
    "    reply = \"\"\n",
    "    for chunk in stream:\n",
    "        reply += chunk.text\n",
    "        yield reply.replace('```python\\n','').replace('```','')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def annotate(python, model):\n",
    "    if model == \"GPT\":\n",
    "        result = stream_gpt(python)\n",
    "    elif model == \"Claude\":\n",
    "        result = stream_claude(python)\n",
    "    elif model == \"Gemini\":\n",
    "        result = stream_gemini(python)\n",
    "    else:\n",
    "        raise ValueError(\"Unknown model\")\n",
    "    for stream_so_far in result:\n",
    "        yield stream_so_far        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19bf2bff-a822-4009-a539-f003b1651383",
   "metadata": {},
   "outputs": [],
   "source": [
    "def execute_python(code):\n",
    "    try:\n",
    "        output = io.StringIO()\n",
    "        sys.stdout = output\n",
    "        exec(code)\n",
    "    finally:\n",
    "        sys.stdout = sys.__stdout__\n",
    "    return output.getvalue()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442",
   "metadata": {},
   "outputs": [],
   "source": [
    "css = \"\"\"\n",
    ".python {background-color: #306998;}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76167ea9-d0a1-4bc6-8d73-633d3b8c8df6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import gradio as gr\n",
    "\n",
    "# Parameters\n",
    "LINES = 25\n",
    "LINE_HEIGHT = 20  # px, typical CodeMirror line height\n",
    "PADDING = 10      # px, top + bottom padding\n",
    "\n",
    "CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING\n",
    "\n",
    "\n",
    "with gr.Blocks(\n",
    "    theme=gr.themes.Soft(),\n",
    "    css=f\"\"\"\n",
    "#code_input .cm-editor, #annotated_code .cm-editor {{\n",
    "    height: {CODE_HEIGHT}px !important;\n",
    "    overflow-y: auto !important;\n",
    "}}\n",
    "\"\"\"\n",
    ") as demo_v2:\n",
    "    gr.Markdown(\"## 🐍 Annotate Python Code with Docstrings and Comments\")\n",
    "\n",
    "    with gr.Row():\n",
    "        with gr.Column(scale=1):\n",
    "            gr.Markdown(\"### Python code:\")\n",
    "            code_input = gr.Code(\n",
    "                language=\"python\", \n",
    "                value=python_hard,\n",
    "                lines=25,\n",
    "                elem_id=\"code_input\"\n",
    "            )\n",
    "        \n",
    "        with gr.Column(scale=1):\n",
    "            gr.Markdown(\"### Annotated code:\")\n",
    "            annotated_output = gr.Code(\n",
    "                language=\"python\",\n",
    "                lines=25,\n",
    "                elem_id=\"annotated_code\"\n",
    "            )\n",
    "\n",
    "    with gr.Row():\n",
    "        with gr.Column(scale=1):\n",
    "            model_dropdown = gr.Dropdown(\n",
    "                choices=[\"Gemini\", \"GPT-4\", \"Claude\"],\n",
    "                value=\"Gemini\",\n",
    "                label=\"Select model\"\n",
    "            )\n",
    "        with gr.Column(scale=1):\n",
    "            annotate_btn = gr.Button(\"✨ Annotate code\", variant=\"primary\")\n",
    "            run_btn = gr.Button(\"▶️ Run Python\", variant=\"secondary\")\n",
    "\n",
    "    with gr.Row():\n",
    "        with gr.Column():\n",
    "            gr.Markdown(\"### Python result:\")\n",
    "            result_output = gr.Textbox(\n",
    "                lines=5, \n",
    "                label=\"Output\",\n",
    "                interactive=False\n",
    "            )\n",
    "    \n",
    "    annotate_btn.click(\n",
    "        annotate,\n",
    "        inputs=[code_input, model_dropdown],\n",
    "        outputs=[annotated_output]\n",
    "    )\n",
    "    run_btn.click(execute_python, inputs=[code_input], outputs=[result_output])\n",
    "\n",
    "    \n",
    "demo_v2.launch(inbrowser=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}