Merge pull request #309 from Adriana394/week-exercises

Create unit_testing_commets_code_generator.ipynb
2025-04-11 18:45:39 -04:00
parent c71f7fd2fa 8951be8ca0
commit ef948f4cff
1 changed files with 413 additions and 0 deletions
--- a/week4/community-contributions/unit_testing_commets_code_generator.ipynb
+++ b/week4/community-contributions/unit_testing_commets_code_generator.ipynb
@@ -0,0 +1,413 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ba410c21-be08-430f-8592-07aeefca27d1",
+   "metadata": {},
+   "source": [
+    "# Code Generator for Unit Tests and Comments/Docstrings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0fe5e62b-78b5-476d-a3b1-77918d085c44",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2b529e40-4902-4a1b-9208-a938af156be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from openai import OpenAI\n",
+    "import anthropic\n",
+    "\n",
+    "from huggingface_hub import login\n",
+    "from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM\n",
+    "\n",
+    "import gradio as gr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4cd288ab-9332-4ce5-86b6-f81d2fff96a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()\n",
+    "\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "anthropic_api_key = os.getenv('CLAUDE_API_KEY')\n",
+    "hf_token = os.getenv('HF_TOKEN')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1a192ae5-2be7-46a3-9376-d33e514e184e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "openai = OpenAI()\n",
+    "claude = anthropic.Anthropic(api_key = anthropic_api_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7d6efe88-d90c-40f9-9df8-ab5370a31b21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "OPENAI = 'o3-mini-2025-01-31'\n",
+    "CLAUDE = 'claude-3-5-sonnet-20240620'\n",
+    "\n",
+    "QWEN = 'Qwen/CodeQwen1.5-7B-Chat'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ef0df5ce-c786-44c7-bdbd-600adfe8908e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TESTING = 'Unit Tests'\n",
+    "COMMENTING = 'Docstrings/Comments'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4b2a75a-e713-404d-898a-c87db87fa849",
+   "metadata": {},
+   "source": [
+    "## System and User Prompt for Unit Test and Comments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4fab566a-4093-4ac4-bd77-866e0f307b74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message_comment = \"\"\" You are an AI programming documentation assisstant. Your task is to generate clear, concise, \n",
+    "and informativ docstrings for the provided code block given by the user. \n",
+    "Analyze the code to understand its functionality and intent. Then produce a detailed documentation that includes:\n",
+    "- a short summary what the code does.\n",
+    "- a short description of the parameters, including their expected types\n",
+    "- a short explanation what the function returns \n",
+    "- if it's a complex code, and only then, some key insights\n",
+    "- if applicable how the function can be used\n",
+    "Ensure your documentation is written in clear gramatically correct english and in standard concentions (e.g PEP 257 for Python). \n",
+    "It should be understandable and maintainable for other developers \"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "70273c7d-d461-4f59-982a-592443ce1257",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message_tests = \"\"\" You are an AI assisstant specialized for creating unit tests. Your task is to gnerate high-quality\n",
+    "unit tests for code provided by the user.\n",
+    "First analyze the code and identify the main functionality, parameters, return values and possible edge cases.\n",
+    "Create comprehensive unit tests that cover the following aspects:\n",
+    "- normal use cases with expected inputs and outputs\n",
+    "- boundary cases and extreme values\n",
+    "- error handling and exceptions\n",
+    "- edge cases \n",
+    "Use the appropriate testing framework for the programming language (e.g., pytest for Python, etc.) and explain to the user why you \n",
+    "chose this specific framework.\n",
+    "Structure the tests clearly with meaningful test names and add comments to explain the test logic.\n",
+    "If the code block does not provide enough context, as for the necessary details.\n",
+    "Supplemenet your response with a brief explanation of the testing strategy and suggestions for improving test coverage. \"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "48f2dd17-1ad1-4e34-ad76-0e02899f1962",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def user_prompt_comment(code):\n",
+    "    user_prompt = f\"\"\"Please add detailed docstrings to the following code: \n",
+    "                    {code} \"\"\"\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cb8b9962-c716-45d6-b4d1-ced781bb40f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def user_prompt_tests(code):\n",
+    "    user_prompt = f\"\"\" Please generate unit tests for the following code using the appropriate framework: \n",
+    "                    {code} \"\"\"\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "959d263e-f6ad-4e0e-95d3-bb5f56877d47",
+   "metadata": {},
+   "source": [
+    "## Define Model Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "8832b9d7-b17a-40d0-add5-07720d2e8af6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_gpt(system_message, user_prompt):\n",
+    "    stream = openai.chat.completions.create(\n",
+    "        model = OPENAI,\n",
+    "        messages = [\n",
+    "            {'role': 'system', 'content': system_message},\n",
+    "            {'role': 'user', 'content': user_prompt}\n",
+    "        ],\n",
+    "        stream = True\n",
+    "    )\n",
+    "\n",
+    "    response = \"\"\n",
+    "    for chunk in stream:\n",
+    "        response += chunk.choices[0].delta.content or \"\"\n",
+    "        yield response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5ac1d70c-cd4e-4809-bc2f-75a2e82b4e58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_claude(system_message, user_prompt):\n",
+    "    response = claude.messages.stream(\n",
+    "        model = CLAUDE,\n",
+    "        max_tokens = 2000,\n",
+    "        system = system_message, \n",
+    "        messages = [\n",
+    "            {'role': 'user', 'content': user_prompt}\n",
+    "        ], \n",
+    "        temperature = 0.4\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    with response as stream:\n",
+    "        for text in stream.text_stream:\n",
+    "            reply += text or \"\"\n",
+    "            yield reply"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "16702a62-fc9b-45b0-84cd-4f98523dfbd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_qwen(system_message, user_prompt):\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(QWEN)\n",
+    "    model = AutoModelForCausalLM.from_pretrained(QWEN, device_map = 'gpu')\n",
+    "    streamer = TextStreamer(tokenizer)\n",
+    "    inputs = tokenizer.apply_chat_template(\n",
+    "        conv = [\n",
+    "            {'role': 'system', 'content': system_message},\n",
+    "            {'role': 'user', 'content': user_prompt}\n",
+    "        ],\n",
+    "        tokenize = False,\n",
+    "        add_generation_prompt = True\n",
+    "    )\n",
+    "\n",
+    "    stream = model.text_generation(\n",
+    "        prompt = inputs, \n",
+    "        stream = True,\n",
+    "        details = True,\n",
+    "        max_new_tokens = 2000\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    for text in stream: \n",
+    "        reply += text.token.text or \"\"\n",
+    "        yield reply    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "f5dbf75f-c935-4412-b641-8afce97552e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def define_prompts(code, operation):\n",
+    "    if operation == 'Unit Tests':\n",
+    "        system_message = system_message_tests\n",
+    "        user_prompt = user_prompt_tests(code)\n",
+    "    elif operation == 'Docstrings/Comments':\n",
+    "        system_message = system_message_comment\n",
+    "        user_prompt = user_prompt_comment(code)\n",
+    "    else: \n",
+    "        return 'Unknown operation', ''\n",
+    "\n",
+    "    return system_message, user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "88a671f9-0ebc-487b-b116-b1abe4c6f934",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_test_comment(code, model, operation):\n",
+    "    \n",
+    "    system_message, user_prompt = define_prompts(code, operation)\n",
+    "    \n",
+    "    if model == 'GPT-o3-mini':\n",
+    "        gen = stream_gpt(system_message, user_prompt)\n",
+    "    elif model == 'Claude-3.5-sonnet':\n",
+    "        gen = stream_claude(system_message, user_prompt)\n",
+    "    elif model == 'CodeQwen':\n",
+    "        gen =  stream_qwen(system_message, user_prompt)\n",
+    "    else: \n",
+    "        gen =  'Unknown Model'\n",
+    "\n",
+    "    result = ''\n",
+    "    for text in gen:\n",
+    "        result = text\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c7eea7a-fc30-4afd-b470-f4f83a288981",
+   "metadata": {},
+   "source": [
+    "## Creating easy Gradio UI "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3d3d014b-bfc8-4ffd-941b-1fb3c9c9a80f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_ui():\n",
+    "\n",
+    "    with gr.Blocks(title = 'Code Generator') as ui:\n",
+    "        gr.Markdown('# Code Generator for Unit Testing and Docstrings')\n",
+    "    \n",
+    "        with gr.Row():\n",
+    "            with gr.Column(min_width = 500):\n",
+    "                code = gr.Textbox(label = 'Enter your Code', \n",
+    "                                  placeholder = 'Code...', lines = 20\n",
+    "                                 )\n",
+    "                model = gr.Dropdown(['GPT-o3-mini', 'Claude-3.5-sonnet', 'CodeQwen'],\n",
+    "                                    label = 'Choose your Model',\n",
+    "                                    value = 'GPT-o3-mini'\n",
+    "                                   )\n",
+    "                operation = gr.Dropdown(['Unit Tests', 'Docstrings/Comments'],\n",
+    "                                        label = 'Choose operation',\n",
+    "                                        value = 'Unit Tests'\n",
+    "                                       )\n",
+    "                generate_button = gr.Button('Generate')\n",
+    "    \n",
+    "            with gr.Column():\n",
+    "                output = gr.Textbox(label = 'Generated Output',\n",
+    "                                    lines = 20\n",
+    "                                   )\n",
+    "    \n",
+    "        generate_button.click(fn = create_test_comment, inputs = [code, model, operation],\n",
+    "                              outputs = output,\n",
+    "                             )\n",
+    "    return ui"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "89be90c2-55ed-41e5-8123-e4f8ab965281",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7860\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ui = create_ui()\n",
+    "ui.launch(inbrowser = True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac4d6d48-4e52-477e-abf9-156eb1e4d561",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}