433 lines
14 KiB
Plaintext
433 lines
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Requirements\n",
|
|
"\n",
|
|
"1. Install pytest and pytest-cov library\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pipenv install pytest pytest-cov"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"import re\n",
|
|
"import os\n",
|
|
"import sys\n",
|
|
"import textwrap\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from openai import OpenAI\n",
|
|
"import anthropic\n",
|
|
"import gradio as gr\n",
|
|
"from pathlib import Path\n",
|
|
"import subprocess\n",
|
|
"from IPython.display import Markdown"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Initialization\n",
|
|
"\n",
|
|
"load_dotenv()\n",
|
|
"\n",
|
|
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
|
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
|
"if openai_api_key:\n",
|
|
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
|
"else:\n",
|
|
" print(\"OpenAI API Key not set\")\n",
|
|
" \n",
|
|
"OPENAI_MODEL = \"gpt-4o-mini\"\n",
|
|
"CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n",
|
|
"openai = OpenAI()\n",
|
|
"claude = anthropic.Anthropic()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
|
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
|
|
"OLLAMA_MODEL = \"llama3.2\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Code execution"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"def extract_code(text):\n",
|
|
" # Regular expression to find text between ``python and ``\n",
|
|
" match = re.search(r\"```python(.*?)```\", text, re.DOTALL)\n",
|
|
"\n",
|
|
" if match:\n",
|
|
" code = match.group(0).strip() # Extract and strip extra spaces\n",
|
|
" else:\n",
|
|
" code = \"\"\n",
|
|
" print(\"No matching substring found.\")\n",
|
|
"\n",
|
|
" return code.replace(\"```python\\n\", \"\").replace(\"```\", \"\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def execute_coverage_report(python_interpreter=sys.executable):\n",
|
|
" if not python_interpreter:\n",
|
|
" raise EnvironmentError(\"Python interpreter not found in the specified virtual environment.\")\n",
|
|
" # test_code_path = Path(\"tests\")\n",
|
|
" # command = [\"pytest\", \"-cov\",\"--capture=no\"]\n",
|
|
" command = [\"coverage\", \"run\", \"-m\", \"pytest\"]\n",
|
|
" # command =[\"pytest\", \"--cov=your_package\", \"--cov-report=term-missing\"]\n",
|
|
"\n",
|
|
" try:\n",
|
|
" result = subprocess.run(command, check=True, capture_output=True, text=True)\n",
|
|
" print(\"Tests ran successfully!\")\n",
|
|
" print(result.stdout)\n",
|
|
" return result.stdout\n",
|
|
" except subprocess.CalledProcessError as e:\n",
|
|
" print(\"Some tests failed!\")\n",
|
|
" print(\"Output:\\n\", e.stdout)\n",
|
|
" print(\"Errors:\\n\", e.stderr)\n",
|
|
" # Extracting failed test information\n",
|
|
" failed_tests = []\n",
|
|
" for line in e.stdout.splitlines():\n",
|
|
" if \"FAILED\" in line and \"::\" in line:\n",
|
|
" failed_tests.append(line.strip())\n",
|
|
" if failed_tests:\n",
|
|
" print(\"Failed Tests:\")\n",
|
|
" for test in failed_tests:\n",
|
|
" print(test)\n",
|
|
" return failed_tests\n",
|
|
"\n",
|
|
"def save_unit_tests(code):\n",
|
|
"\n",
|
|
" match = re.search(r\"def\\s+(\\w+)\\(\", code, re.DOTALL)\n",
|
|
"\n",
|
|
" if match:\n",
|
|
" function_name = match.group(1).strip() # Extract and strip extra spaces\n",
|
|
" else:\n",
|
|
" function_name = \"\"\n",
|
|
" print(\"No matching substring found.\")\n",
|
|
"\n",
|
|
" test_code_path = Path(\"tests\")\n",
|
|
" (test_code_path / f\"test_{function_name}.py\").write_text(extract_code(code))\n",
|
|
" Path(\"tests\", \"test_code.py\").unlink()\n",
|
|
" \n",
|
|
"\n",
|
|
"def execute_tests_in_venv(code_to_test, tests, python_interpreter=sys.executable):\n",
|
|
" \"\"\"\n",
|
|
" Execute the given Python code string within the specified virtual environment.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" - code_str: str, the Python code to execute.\n",
|
|
" - venv_dir: str, the directory path to the virtual environment created by pipenv.\n",
|
|
" \"\"\"\n",
|
|
" \n",
|
|
" if not python_interpreter:\n",
|
|
" raise EnvironmentError(\"Python interpreter not found in the specified virtual environment.\")\n",
|
|
"\n",
|
|
" # Prepare the command to execute the code\n",
|
|
" code_str = textwrap.dedent(code_to_test) + \"\\n\" + extract_code(tests)\n",
|
|
" test_code_path = Path(\"tests\")\n",
|
|
" test_code_path.mkdir(parents=True, exist_ok=True)\n",
|
|
" (test_code_path / f\"test_code.py\").write_text(code_str)\n",
|
|
" command = [\"pytest\", str(test_code_path)]\n",
|
|
"\n",
|
|
" try:\n",
|
|
" result = subprocess.run(command, check=True, capture_output=True, text=True)\n",
|
|
" print(\"Tests ran successfully!\")\n",
|
|
" print(result.stderr)\n",
|
|
" return result.stdout\n",
|
|
" except subprocess.CalledProcessError as e:\n",
|
|
" print(\"Some tests failed!\")\n",
|
|
" print(\"Output:\\n\", e.stdout)\n",
|
|
" print(\"Errors:\\n\", e.stderr)\n",
|
|
" # Extracting failed test information\n",
|
|
" failed_tests = []\n",
|
|
" for line in e.stdout.splitlines():\n",
|
|
" if \"FAILED\" in line and \"::\" in line:\n",
|
|
" failed_tests.append(line.strip())\n",
|
|
" if failed_tests:\n",
|
|
" print(\"Failed Tests:\")\n",
|
|
" for test in failed_tests:\n",
|
|
" print(test)\n",
|
|
" return e.stderr\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Prompts and calls to the models"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_message = \"\"\"You are a helpful assistant which helps developers to write unit test cases for their code.\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_user_prompt(code):\n",
|
|
"\n",
|
|
" user_prompt = \"Write for a python code the unit test cases.\"\n",
|
|
" user_prompt += \"Return unit tests cases using pytest library, do not create any custom imports; do not explain your work other than a few comments.\"\n",
|
|
" user_prompt += \"Do not insert the function to be tested in the output before the tests. Validate both the case where the function is executed successfully and where it is expected to fail.\"\n",
|
|
" user_prompt += code\n",
|
|
"\n",
|
|
" return user_prompt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def stream_gpt(code):\n",
|
|
"\n",
|
|
" user_prompt = get_user_prompt(code)\n",
|
|
" stream = openai.chat.completions.create(\n",
|
|
" model=OPENAI_MODEL,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": user_prompt,\n",
|
|
" },\n",
|
|
" ],\n",
|
|
" stream=True,\n",
|
|
" )\n",
|
|
"\n",
|
|
" response = \"\"\n",
|
|
" for chunk in stream:\n",
|
|
" response += chunk.choices[0].delta.content or \"\"\n",
|
|
" yield response\n",
|
|
" \n",
|
|
" return response\n",
|
|
"\n",
|
|
"def stream_ollama(code):\n",
|
|
"\n",
|
|
" user_prompt = get_user_prompt(code)\n",
|
|
" ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
|
" stream = ollama_via_openai.chat.completions.create(\n",
|
|
" model=OLLAMA_MODEL,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": user_prompt,\n",
|
|
" },\n",
|
|
" ],\n",
|
|
" stream=True,\n",
|
|
" )\n",
|
|
"\n",
|
|
" response = \"\"\n",
|
|
" for chunk in stream:\n",
|
|
" response += chunk.choices[0].delta.content or \"\"\n",
|
|
" yield response\n",
|
|
" \n",
|
|
" return response\n",
|
|
"\n",
|
|
"\n",
|
|
"def stream_claude(code):\n",
|
|
" user_prompt = get_user_prompt(code)\n",
|
|
" result = claude.messages.stream(\n",
|
|
" model=CLAUDE_MODEL,\n",
|
|
" max_tokens=2000,\n",
|
|
" system=system_message,\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": user_prompt,\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" )\n",
|
|
" reply = \"\"\n",
|
|
" with result as stream:\n",
|
|
" for text in stream.text_stream:\n",
|
|
" reply += text\n",
|
|
" yield reply\n",
|
|
" print(text, end=\"\", flush=True)\n",
|
|
" return reply"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Code examples to test the inteface"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"function_to_test = \"\"\"\n",
|
|
" def lengthOfLongestSubstring(s):\n",
|
|
" max_length = 0\n",
|
|
" substring = \"\"\n",
|
|
" start_idx = 0\n",
|
|
" while start_idx < len(s):\n",
|
|
" string = s[start_idx:]\n",
|
|
" for i, x in enumerate(string):\n",
|
|
" substring += x\n",
|
|
" if len(substring) == len(set((list(substring)))):\n",
|
|
" \n",
|
|
" if len(set((list(substring)))) > max_length:\n",
|
|
" \n",
|
|
" max_length = len(substring)\n",
|
|
"\n",
|
|
" start_idx += 1\n",
|
|
" substring = \"\"\n",
|
|
" \n",
|
|
" \n",
|
|
" return max_length\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_code = \"\"\"```python\n",
|
|
"import pytest\n",
|
|
"\n",
|
|
"# Unit tests using pytest\n",
|
|
"def test_lengthOfLongestSubstring():\n",
|
|
" assert lengthOfLongestSubstring(\"abcabcbb\") == 3 # Case with repeating characters\n",
|
|
" assert lengthOfLongestSubstring(\"bbbbb\") == 1 # Case with all same characters\n",
|
|
" assert lengthOfLongestSubstring(\"pwwkew\") == 3 # Case with mixed characters\n",
|
|
" assert lengthOfLongestSubstring(\"\") == 0 # Empty string case\n",
|
|
" assert lengthOfLongestSubstring(\"abcdef\") == 6 # All unique characters\n",
|
|
" assert lengthOfLongestSubstring(\"abca\") == 3 # Case with pattern and repeat\n",
|
|
" assert lengthOfLongestSubstring(\"dvdf\") == 3 # Case with repeated characters separated\n",
|
|
" assert lengthOfLongestSubstring(\"a\") == 1 # Case with single character\n",
|
|
" assert lengthOfLongestSubstring(\"au\") == 2 # Case with unique two characters\n",
|
|
"```\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def optimize(code, model):\n",
|
|
" if model == \"GPT\":\n",
|
|
" result = stream_gpt(code)\n",
|
|
" elif model == \"Claude\":\n",
|
|
" result = stream_claude(code)\n",
|
|
" elif model == \"Ollama\":\n",
|
|
" result = stream_ollama(code)\n",
|
|
" else:\n",
|
|
" raise ValueError(\"Unknown model\")\n",
|
|
" for stream_so_far in result:\n",
|
|
" yield stream_so_far\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Gradio interface"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with gr.Blocks() as ui:\n",
|
|
" gr.Markdown(\"## Write unit tests for Python code\")\n",
|
|
" with gr.Row():\n",
|
|
" with gr.Column(scale=1, min_width=300):\n",
|
|
" python = gr.Textbox(label=\"Python code:\", value=function_to_test, lines=10)\n",
|
|
" model = gr.Dropdown([\"GPT\", \"Claude\", \"Ollama\"], label=\"Select model\", value=\"GPT\")\n",
|
|
" unit_tests = gr.Button(\"Write unit tests\")\n",
|
|
" with gr.Column(scale=1, min_width=300):\n",
|
|
" unit_tests_out = gr.TextArea(label=\"Unit tests\", value=test_code, elem_classes=[\"python\"])\n",
|
|
" unit_tests_run = gr.Button(\"Run unit tests\")\n",
|
|
" coverage_run = gr.Button(\"Coverage report\")\n",
|
|
" save_test_run = gr.Button(\"Save unit tests\")\n",
|
|
" with gr.Row():\n",
|
|
" \n",
|
|
" python_out = gr.TextArea(label=\"Unit tests result\", elem_classes=[\"python\"])\n",
|
|
" coverage_out = gr.TextArea(label=\"Coverage report\", elem_classes=[\"python\"])\n",
|
|
" \n",
|
|
"\n",
|
|
" unit_tests.click(optimize, inputs=[python, model], outputs=[unit_tests_out])\n",
|
|
" unit_tests_run.click(execute_tests_in_venv, inputs=[python, unit_tests_out], outputs=[python_out])\n",
|
|
" coverage_run.click(execute_coverage_report, outputs=[coverage_out])\n",
|
|
" save_test_run.click(save_unit_tests, inputs=[unit_tests_out])\n",
|
|
"\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "llm_engineering-yg2xCEUG",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|