381
week3/community-contributions/Week3-Dataset_Generator-DP.ipynb
Normal file
381
week3/community-contributions/Week3-Dataset_Generator-DP.ipynb
Normal file
@@ -0,0 +1,381 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c08309b8-13f0-45bb-a3ea-7b01f05a7346",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"import pandas as pd\n",
|
||||
"import random\n",
|
||||
"import re\n",
|
||||
"import subprocess\n",
|
||||
"import pyarrow as pa\n",
|
||||
"from typing import List\n",
|
||||
"import openai\n",
|
||||
"import anthropic\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import gradio as gr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f5efd903-e683-4e7f-8747-2998e23a0751",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load API\n",
|
||||
"load_dotenv(override=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce49b86a-53f4-4d4f-a721-0d66d9c1b070",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Schema Definition ---\n",
|
||||
"SCHEMA = [\n",
|
||||
" (\"Team\", \"TEXT\", '\"Toronto Raptors\"'),\n",
|
||||
" (\"NAME\", \"TEXT\", '\"Otto Porter Jr.\"'),\n",
|
||||
" (\"Jersey\", \"TEXT\", '\"10\", or \"NA\" if null'),\n",
|
||||
" (\"POS\", \"TEXT\", 'One of [\"PF\",\"SF\",\"G\",\"C\",\"SG\",\"F\",\"PG\"]'),\n",
|
||||
" (\"AGE\", \"INT\", 'integer age in years, e.g., 22'),\n",
|
||||
" (\"HT\", \"TEXT\", '`6\\' 7\"` or `6\\' 10\"`'),\n",
|
||||
" (\"WT\", \"TEXT\", '\"232 lbs\"'),\n",
|
||||
" (\"COLLEGE\", \"TEXT\", '\"Michigan\", or \"--\" if null'),\n",
|
||||
" (\"SALARY\", \"TEXT\", '\"$9,945,830\", or \"--\" if null')\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "93743e57-c2c5-43e5-8fa1-2e242085db07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Default schema text for the textbox\n",
|
||||
"DEFAULT_SCHEMA_TEXT = \"\\n\".join([f\"{i+1}. {col[0]} ({col[1]}) Example: {col[2]}\" for i, col in enumerate(SCHEMA)])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "87c58595-6fdd-48f5-a253-ccba352cb385",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Available models\n",
|
||||
"MODELS = [\n",
|
||||
" \"gpt-4o\",\n",
|
||||
" \"claude-3-5-haiku-20241022\", \n",
|
||||
" \"ollama:llama3.2:latest\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08cd9ce2-8685-46b5-95d0-811b8025696f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Available file formats\n",
|
||||
"FILE_FORMATS = [\".csv\", \".tsv\", \".jsonl\", \".parquet\", \".arrow\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "13d68c7f-6f49-4efa-b075-f1e7db2ab527",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_prompt(n: int, schema_text: str, system_prompt: str) -> str:\n",
|
||||
" prompt = f\"\"\"\n",
|
||||
"{system_prompt}\n",
|
||||
"\n",
|
||||
"Generate {n} rows of realistic basketball player data in JSONL format, each line a JSON object with the following fields:\n",
|
||||
"\n",
|
||||
"{schema_text}\n",
|
||||
"\n",
|
||||
"Do NOT repeat column values from one row to another.\n",
|
||||
"\n",
|
||||
"Only output valid JSONL.\n",
|
||||
"\"\"\"\n",
|
||||
" return prompt.strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cdc68f1e-4fbe-45dc-aa36-ce5f718ef6ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- LLM Interface ---\n",
|
||||
"def query_model(prompt: str, model: str = \"gpt-4o\") -> List[dict]:\n",
|
||||
" \"\"\"Call OpenAI, Claude, or Ollama\"\"\"\n",
|
||||
" try:\n",
|
||||
" if model.lower().startswith(\"gpt\"):\n",
|
||||
" client = openai.OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
|
||||
" response = client.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||
" temperature=0.7\n",
|
||||
" )\n",
|
||||
" content = response.choices[0].message.content\n",
|
||||
"\n",
|
||||
" elif model.lower().startswith(\"claude\"):\n",
|
||||
" client = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n",
|
||||
" response = client.messages.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||
" max_tokens=4000,\n",
|
||||
" temperature=0.7\n",
|
||||
" )\n",
|
||||
" content = response.content[0].text\n",
|
||||
"\n",
|
||||
" elif model.lower().startswith(\"ollama:\"):\n",
|
||||
" ollama_model = model.split(\":\")[1]\n",
|
||||
" result = subprocess.run(\n",
|
||||
" [\"ollama\", \"run\", ollama_model],\n",
|
||||
" input=prompt,\n",
|
||||
" text=True,\n",
|
||||
" capture_output=True\n",
|
||||
" )\n",
|
||||
" if result.returncode != 0:\n",
|
||||
" raise Exception(f\"Ollama error: {result.stderr}\")\n",
|
||||
" content = result.stdout\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"Unsupported model. Use 'gpt-4.1-mini', 'claude-3-5-haiku-20241022', or 'ollama:llama3.2:latest'\")\n",
|
||||
"\n",
|
||||
" # Parse JSONL output\n",
|
||||
" lines = [line.strip() for line in content.strip().splitlines() if line.strip().startswith(\"{\")]\n",
|
||||
" return [json.loads(line) for line in lines]\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" raise Exception(f\"Model query failed: {str(e)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29e3f5f5-e99c-429c-bea9-69d554c58c9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Output Formatter ---\n",
|
||||
"def save_dataset(records: List[dict], file_format: str, filename: str):\n",
|
||||
" df = pd.DataFrame(records)\n",
|
||||
" if file_format == \".csv\":\n",
|
||||
" df.to_csv(filename, index=False)\n",
|
||||
" elif file_format == \".tsv\":\n",
|
||||
" df.to_csv(filename, sep=\"\\t\", index=False)\n",
|
||||
" elif file_format == \".jsonl\":\n",
|
||||
" with open(filename, \"w\") as f:\n",
|
||||
" for record in records:\n",
|
||||
" f.write(json.dumps(record) + \"\\n\")\n",
|
||||
" elif file_format == \".parquet\":\n",
|
||||
" df.to_parquet(filename, engine=\"pyarrow\", index=False)\n",
|
||||
" elif file_format == \".arrow\":\n",
|
||||
" table = pa.Table.from_pandas(df)\n",
|
||||
" with pa.OSFile(filename, \"wb\") as sink:\n",
|
||||
" with pa.ipc.new_file(sink, table.schema) as writer:\n",
|
||||
" writer.write(table)\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"Unsupported file format\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe258e84-66f4-4fe7-99c0-75b24148e147",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Main Generation Function ---\n",
|
||||
"def generate_dataset(schema_text, system_prompt, model, nr_records, file_format, save_as):\n",
|
||||
" try:\n",
|
||||
" # Validation\n",
|
||||
" if nr_records <= 10:\n",
|
||||
" return \"❌ Error: Nr_records must be greater than 10.\", None\n",
|
||||
" \n",
|
||||
" if file_format not in FILE_FORMATS:\n",
|
||||
" return \"❌ Error: Invalid file format specified.\", None\n",
|
||||
" \n",
|
||||
" if not save_as or save_as.strip() == \"\":\n",
|
||||
" save_as = f\"basketball_dataset{file_format}\"\n",
|
||||
" elif not save_as.endswith(file_format):\n",
|
||||
" save_as = save_as + file_format\n",
|
||||
" \n",
|
||||
" # Generate prompt\n",
|
||||
" prompt = get_prompt(nr_records, schema_text, system_prompt)\n",
|
||||
" \n",
|
||||
" # Query model\n",
|
||||
" records = query_model(prompt, model=model)\n",
|
||||
" \n",
|
||||
" if not records:\n",
|
||||
" return \"❌ Error: No valid records generated from the model.\", None\n",
|
||||
" \n",
|
||||
" # Save dataset\n",
|
||||
" save_dataset(records, file_format, save_as)\n",
|
||||
" \n",
|
||||
" # Create preview\n",
|
||||
" df = pd.DataFrame(records)\n",
|
||||
" preview = df.head(10) # Show first 10 rows\n",
|
||||
" \n",
|
||||
" success_message = f\"✅ Dataset generated successfully!\\n📁 Saved to: {save_as}\\n📊 Generated {len(records)} records\"\n",
|
||||
" \n",
|
||||
" return success_message, preview\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"❌ Error: {str(e)}\", None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2405a9d-b4cd-43d9-82f6-ff3512b4541f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Gradio Interface ---\n",
|
||||
"def create_interface():\n",
|
||||
" with gr.Blocks(title=\"Dataset Generator\", theme=gr.themes.Soft()) as interface:\n",
|
||||
" gr.Markdown(\"# Dataset Generator\")\n",
|
||||
" gr.Markdown(\"Generate realistic datasets using AI models\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column(scale=2):\n",
|
||||
" schema_input = gr.Textbox(\n",
|
||||
" label=\"Schema\",\n",
|
||||
" value=DEFAULT_SCHEMA_TEXT,\n",
|
||||
" lines=15,\n",
|
||||
" placeholder=\"Define your dataset schema here...\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" system_prompt_input = gr.Textbox(\n",
|
||||
" label=\"Prompt\",\n",
|
||||
" value=\"You are a helpful assistant that generates realistic basketball player data.\",\n",
|
||||
" lines=1,\n",
|
||||
" placeholder=\"Enter system prompt for the model...\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" model_dropdown = gr.Dropdown(\n",
|
||||
" label=\"Model\",\n",
|
||||
" choices=MODELS,\n",
|
||||
" value=MODELS[1], # Default to Claude\n",
|
||||
" interactive=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" nr_records_input = gr.Number(\n",
|
||||
" label=\"Nr. records\",\n",
|
||||
" value=25,\n",
|
||||
" minimum=11,\n",
|
||||
" maximum=1000,\n",
|
||||
" step=1\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" file_format_dropdown = gr.Dropdown(\n",
|
||||
" label=\"File format\",\n",
|
||||
" choices=FILE_FORMATS,\n",
|
||||
" value=\".csv\",\n",
|
||||
" interactive=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" save_as_input = gr.Textbox(\n",
|
||||
" label=\"Save as\",\n",
|
||||
" value=\"basketball_dataset\",\n",
|
||||
" placeholder=\"Enter filename (extension will be added automatically)\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" generate_btn = gr.Button(\"🚀 Generate\", variant=\"primary\", size=\"lg\")\n",
|
||||
" \n",
|
||||
" with gr.Column(scale=1):\n",
|
||||
" output_status = gr.Textbox(\n",
|
||||
" label=\"Status\",\n",
|
||||
" lines=4,\n",
|
||||
" interactive=False\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" output_preview = gr.Dataframe(\n",
|
||||
" label=\"Preview (First 10 rows)\",\n",
|
||||
" interactive=False,\n",
|
||||
" wrap=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Connect the generate button\n",
|
||||
" generate_btn.click(\n",
|
||||
" fn=generate_dataset,\n",
|
||||
" inputs=[\n",
|
||||
" schema_input,\n",
|
||||
" system_prompt_input, \n",
|
||||
" model_dropdown,\n",
|
||||
" nr_records_input,\n",
|
||||
" file_format_dropdown,\n",
|
||||
" save_as_input\n",
|
||||
" ],\n",
|
||||
" outputs=[output_status, output_preview]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" gr.Markdown(\"\"\"\n",
|
||||
" ### 📝 Instructions:\n",
|
||||
" 1. **Schema**: Define the structure of your dataset (pre-filled with basketball player schema)\n",
|
||||
" 2. **Prompt**: System prompt to guide the AI model\n",
|
||||
" 3. **Model**: Choose between GPT, Claude, or Ollama models\n",
|
||||
" 4. **Nr. records**: Number of records to generate (minimum 11)\n",
|
||||
" 5. **File format**: Choose output format (.csv, .tsv, .jsonl, .parquet, .arrow)\n",
|
||||
" 6. **Save as**: Filename (extension added automatically)\n",
|
||||
" 7. Click **Generate** to create your dataset\n",
|
||||
" \n",
|
||||
" ### 🔧 Requirements:\n",
|
||||
" - Set up your API keys in `.env` file (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`)\n",
|
||||
" - For Ollama models, ensure Ollama is installed and running locally\n",
|
||||
" \"\"\")\n",
|
||||
" \n",
|
||||
" return interface"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50fd2b91-2578-4224-b9dd-e28caf6a0a85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"interface = create_interface()\n",
|
||||
"interface.launch(inbrowser=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
400
week4/community-contributions/Week4-Comments-Generator-DP.ipynb
Normal file
400
week4/community-contributions/Week4-Comments-Generator-DP.ipynb
Normal file
@@ -0,0 +1,400 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3e473bbd-a0c2-43bd-bf99-c749784d00c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gradio as gr\n",
|
||||
"import openai\n",
|
||||
"import anthropic\n",
|
||||
"import google.generativeai as genai\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"from typing import Dict, Any, Optional\n",
|
||||
"import asyncio\n",
|
||||
"from dotenv import load_dotenv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16210512-41f1-4de3-8348-2cd7129e023f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load API\n",
|
||||
"load_dotenv(override=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6747e275-91eb-4d2b-90b6-805f2bd9b6b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CodeCommenter:\n",
|
||||
" def __init__(self):\n",
|
||||
" # Initialize API clients\n",
|
||||
" self.openai_client = None\n",
|
||||
" self.anthropic_client = None\n",
|
||||
" self.gemini_client = None\n",
|
||||
" \n",
|
||||
" # Load API keys from environment variables\n",
|
||||
" self.setup_clients()\n",
|
||||
" \n",
|
||||
" def setup_clients(self):\n",
|
||||
" \"\"\"Initialize API clients with keys from environment variables\"\"\"\n",
|
||||
" try:\n",
|
||||
" # OpenAI\n",
|
||||
" openai_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
" if openai_key:\n",
|
||||
" self.openai_client = openai.OpenAI(api_key=openai_key)\n",
|
||||
" \n",
|
||||
" # Anthropic\n",
|
||||
" anthropic_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
||||
" if anthropic_key:\n",
|
||||
" self.anthropic_client = anthropic.Anthropic(api_key=anthropic_key)\n",
|
||||
" \n",
|
||||
" # Google Gemini\n",
|
||||
" gemini_key = os.getenv('GOOGLE_API_KEY')\n",
|
||||
" if gemini_key:\n",
|
||||
" genai.configure(api_key=gemini_key)\n",
|
||||
" self.gemini_client = genai.GenerativeModel('gemini-2.0-flash-exp')\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Warning: Error setting up API clients: {e}\")\n",
|
||||
" \n",
|
||||
" def create_prompt(self, code: str, language: str) -> str:\n",
|
||||
" \"\"\"Create a prompt for the LLM to add comments and docstrings\"\"\"\n",
|
||||
" return f\"\"\"Please add detailed and helpful comments and docstrings to the following {language} code. \n",
|
||||
" \n",
|
||||
"Guidelines:\n",
|
||||
"1. Add comprehensive docstrings for functions, classes, and modules\n",
|
||||
"2. Add inline comments explaining complex logic\n",
|
||||
"3. Follow the commenting conventions for {language}\n",
|
||||
"4. Maintain the original code structure and functionality\n",
|
||||
"5. Make comments clear and professional\n",
|
||||
"6. Don't change the actual code logic, only add comments\n",
|
||||
"7. Do not add code markdown delimiters like ```python\n",
|
||||
"\n",
|
||||
"Here's the code to comment:\n",
|
||||
"\n",
|
||||
"{code}\n",
|
||||
"\n",
|
||||
"Please return only the commented code without any additional explanation or markdown formatting.\"\"\"\n",
|
||||
"\n",
|
||||
" def call_openai(self, prompt: str, model: str = \"gpt-4o-mini\") -> str:\n",
|
||||
" \"\"\"Make API call to OpenAI\"\"\"\n",
|
||||
" if not self.openai_client:\n",
|
||||
" return \"Error: OpenAI API key not configured. Please set OPENAI_API_KEY environment variable.\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = self.openai_client.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful coding assistant that adds detailed comments and docstrings to code.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" max_tokens=4000,\n",
|
||||
" temperature=0.1\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content.strip()\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error calling OpenAI API: {str(e)}\"\n",
|
||||
" \n",
|
||||
" def call_anthropic(self, prompt: str, model: str = \"claude-3-5-haiku-20241022\") -> str:\n",
|
||||
" \"\"\"Make API call to Anthropic Claude\"\"\"\n",
|
||||
" if not self.anthropic_client:\n",
|
||||
" return \"Error: Anthropic API key not configured. Please set ANTHROPIC_API_KEY environment variable.\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = self.anthropic_client.messages.create(\n",
|
||||
" model=model,\n",
|
||||
" max_tokens=4000,\n",
|
||||
" temperature=0.1,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return response.content[0].text.strip()\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error calling Anthropic API: {str(e)}\"\n",
|
||||
" \n",
|
||||
" def call_gemini(self, prompt: str) -> str:\n",
|
||||
" \"\"\"Make API call to Google Gemini\"\"\"\n",
|
||||
" if not self.gemini_client:\n",
|
||||
" return \"Error: Google API key not configured. Please set GOOGLE_API_KEY environment variable.\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = self.gemini_client.generate_content(\n",
|
||||
" prompt,\n",
|
||||
" generation_config=genai.types.GenerationConfig(\n",
|
||||
" max_output_tokens=4000,\n",
|
||||
" temperature=0.1,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" return response.text.strip()\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error calling Gemini API: {str(e)}\"\n",
|
||||
" \n",
|
||||
" def call_ollama(self, prompt: str, model: str = \"llama3.2:latest\") -> str:\n",
|
||||
" \"\"\"Make API call to Ollama (local)\"\"\"\n",
|
||||
" try:\n",
|
||||
" url = \"http://localhost:11434/api/generate\"\n",
|
||||
" data = {\n",
|
||||
" \"model\": model,\n",
|
||||
" \"prompt\": prompt,\n",
|
||||
" \"stream\": False,\n",
|
||||
" \"options\": {\n",
|
||||
" \"temperature\": 0.1,\n",
|
||||
" \"num_predict\": 4000\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" response = requests.post(url, json=data, timeout=60)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" result = response.json()\n",
|
||||
" return result.get('response', '').strip()\n",
|
||||
" else:\n",
|
||||
" return f\"Error calling Ollama API: HTTP {response.status_code}\"\n",
|
||||
" except requests.exceptions.ConnectionError:\n",
|
||||
" return \"Error: Could not connect to Ollama. Make sure Ollama is running locally on port 11434.\"\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error calling Ollama API: {str(e)}\"\n",
|
||||
"\n",
|
||||
" def generate_comments(self, language: str, code: str, llm: str) -> str:\n",
|
||||
" \"\"\"Generate comments for the given code using the specified LLM\"\"\"\n",
|
||||
" if not code.strip():\n",
|
||||
" return \"Error: Please provide code to comment.\"\n",
|
||||
" \n",
|
||||
" prompt = self.create_prompt(code, language)\n",
|
||||
" \n",
|
||||
" # Route to appropriate LLM\n",
|
||||
" if llm == \"gpt-4o-mini\":\n",
|
||||
" return self.call_openai(prompt, \"gpt-4o-mini\")\n",
|
||||
" elif llm == \"claude-3-5-haiku-20241022\":\n",
|
||||
" return self.call_anthropic(prompt, \"claude-3-5-haiku-20241022\")\n",
|
||||
" elif llm == \"gemini-2.0-flash\":\n",
|
||||
" return self.call_gemini(prompt)\n",
|
||||
" elif llm == \"ollama:llama3.2:latest\":\n",
|
||||
" return self.call_ollama(prompt, \"llama3.2:latest\")\n",
|
||||
" else:\n",
|
||||
" return f\"Error: Unsupported LLM: {llm}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "813f0911-d53f-4887-9341-656712e32d8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_gradio_interface():\n",
|
||||
" \"\"\"Create and configure the Gradio interface\"\"\"\n",
|
||||
" commenter = CodeCommenter()\n",
|
||||
" \n",
|
||||
" # Define the main function for the interface\n",
|
||||
" def process_code(language, code, llm):\n",
|
||||
" \"\"\"Process the code and return commented version\"\"\"\n",
|
||||
" if not code.strip():\n",
|
||||
" return \"Please enter some code to comment.\"\n",
|
||||
" \n",
|
||||
" # Show processing message\n",
|
||||
" processing_msg = f\"Processing {language} code with {llm}...\"\n",
|
||||
" print(processing_msg)\n",
|
||||
" \n",
|
||||
" # Generate comments\n",
|
||||
" result = commenter.generate_comments(language, code, llm)\n",
|
||||
" return result\n",
|
||||
" \n",
|
||||
" # Define default code\n",
|
||||
" default_code = \"\"\"import pyodbc\n",
|
||||
"from tabulate import tabulate\n",
|
||||
"def connect_to_sql_server(server_name, database, username=None, password=None):\n",
|
||||
" try:\n",
|
||||
" if username and password:\n",
|
||||
" connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};UID={username};PWD={password}\"\n",
|
||||
" else:\n",
|
||||
" connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};Trusted_Connection=yes\"\n",
|
||||
" connection = pyodbc.connect(connection_string)\n",
|
||||
" print(f\"Successfully connected to {server_name}/{database}\")\n",
|
||||
" return connection\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Failed to connect to {server_name}/{database}: {str(e)}\")\n",
|
||||
" return None\n",
|
||||
"def get_record_count(connection, table_name):\n",
|
||||
" try:\n",
|
||||
" cursor = connection.cursor()\n",
|
||||
" query = f\"SELECT COUNT(*) FROM {table_name}\"\n",
|
||||
" cursor.execute(query)\n",
|
||||
" count = cursor.fetchone()[0]\n",
|
||||
" cursor.close()\n",
|
||||
" print(f\"Record count for {table_name}: {count}\")\n",
|
||||
" return count\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Failed to get record count for {table_name}: {str(e)}\")\n",
|
||||
" return None\n",
|
||||
"def select_top_records(connection, table_name, n):\n",
|
||||
" try:\n",
|
||||
" cursor = connection.cursor()\n",
|
||||
" query = f\"SELECT TOP {n} * FROM {table_name}\"\n",
|
||||
" cursor.execute(query)\n",
|
||||
" records = cursor.fetchall()\n",
|
||||
" columns = [column[0] for column in cursor.description]\n",
|
||||
" cursor.close()\n",
|
||||
" print(f\"Top {n} records from {table_name}\")\n",
|
||||
" if records:\n",
|
||||
" print(tabulate(records, headers=columns, tablefmt=\"grid\"))\n",
|
||||
" return records\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Failed to retrieve top {n} records from {table_name}: {str(e)}\")\n",
|
||||
" return None\n",
|
||||
"conn = connect_to_sql_server(\"localhost\", \"AdventureWorks_lite\")\n",
|
||||
"if conn:\n",
|
||||
" total_records = get_record_count(conn, \"Sales.SalesOrderDetail\")\n",
|
||||
" top_records = select_top_records(conn, \"Production.Product\", 10)\n",
|
||||
" conn.close()\n",
|
||||
" print(\"Connection closed successfully\")\"\"\"\n",
|
||||
"\n",
|
||||
" css = \"\"\"\n",
|
||||
"textarea[rows]:not([rows=\"1\"]) {\n",
|
||||
" overflow-y: auto !important;\n",
|
||||
" scrollbar-width: thin !important;\n",
|
||||
"}\n",
|
||||
"textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar {\n",
|
||||
" all: initial !important;\n",
|
||||
" background: #f1f1f1 !important;\n",
|
||||
"}\n",
|
||||
"textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar-thumb {\n",
|
||||
" all: initial !important;\n",
|
||||
" background: #a8a8a8 !important;\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
" # Create the interface\n",
|
||||
" with gr.Blocks(title=\"Code Commenter\", theme=gr.themes.Base(), css=css) as interface:\n",
|
||||
" gr.Markdown(\"# 🔧 Code Commenter\")\n",
|
||||
" gr.Markdown(\"Add detailed comments and docstrings to your code using various LLM models.\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column():\n",
|
||||
" code_input = gr.Textbox(\n",
|
||||
" label=\"Input Code\",\n",
|
||||
" value=default_code,\n",
|
||||
" lines=15,\n",
|
||||
" max_lines=20,\n",
|
||||
" info=\"Enter the code you want to add comments to\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Column():\n",
|
||||
" code_output = gr.Textbox(\n",
|
||||
" label=\"Commented Code\",\n",
|
||||
" lines=20,\n",
|
||||
" max_lines=20,\n",
|
||||
" info=\"Your code with added comments and docstrings\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column(scale=1):\n",
|
||||
" language_dropdown = gr.Dropdown(\n",
|
||||
" choices=[\"Python\", \"Ruby\", \"Rust\", \"C++\", \"Java\"],\n",
|
||||
" value=\"Python\",\n",
|
||||
" label=\"Programming Language\",\n",
|
||||
" info=\"Select the programming language of your code\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" llm_dropdown = gr.Dropdown(\n",
|
||||
" choices=[\n",
|
||||
" \"gpt-4o-mini\",\n",
|
||||
" \"claude-3-5-haiku-20241022\", \n",
|
||||
" \"gemini-2.0-flash\",\n",
|
||||
" \"ollama:llama3.2:latest\"\n",
|
||||
" ],\n",
|
||||
" value=\"gpt-4o-mini\",\n",
|
||||
" label=\"LLM Model\",\n",
|
||||
" info=\"Choose the language model to use\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" generate_btn = gr.Button(\n",
|
||||
" \"🚀 Generate Comments\", \n",
|
||||
" variant=\"primary\",\n",
|
||||
" size=\"lg\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Add some API setup information\n",
|
||||
" gr.Markdown(\"## 📝 API Setup Instructions\")\n",
|
||||
" gr.Markdown(\"\"\"\n",
|
||||
" To use this tool, you need to set up API keys as environment variables:\n",
|
||||
" \n",
|
||||
" - **OpenAI**: Set `OPENAI_API_KEY`\n",
|
||||
" - **Anthropic**: Set `ANTHROPIC_API_KEY` \n",
|
||||
" - **Google Gemini**: Set `GOOGLE_API_KEY`\n",
|
||||
" - **Ollama**: Make sure Ollama is running locally on port 11434\n",
|
||||
" \"\"\")\n",
|
||||
" \n",
|
||||
" # Connect the button to the processing function\n",
|
||||
" generate_btn.click(\n",
|
||||
" fn=process_code,\n",
|
||||
" inputs=[language_dropdown, code_input, llm_dropdown],\n",
|
||||
" outputs=code_output,\n",
|
||||
" show_progress=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" return interface"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ef461e08-c1d5-406d-b7d2-a4329f16486e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"🚀 Starting Code Commenter...\")\n",
|
||||
"print(\"📋 Setting up Gradio interface...\")\n",
|
||||
"\n",
|
||||
"# Create and launch the interface\n",
|
||||
"interface = create_gradio_interface()\n",
|
||||
"\n",
|
||||
"print(\"🌐 Launching interface...\")\n",
|
||||
"print(\"💡 The interface will open in your default browser\")\n",
|
||||
"print(\"🔧 Make sure to set up your API keys as environment variables\")\n",
|
||||
"\n",
|
||||
"# Launch with auto-opening in browser\n",
|
||||
"interface.launch(\n",
|
||||
" server_name=\"127.0.0.1\",\n",
|
||||
" server_port=7860,\n",
|
||||
" share=False,\n",
|
||||
" inbrowser=True,\n",
|
||||
" show_error=True\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user