From 756bd0b623be3b8906b677e50ad7f119ae842d14 Mon Sep 17 00:00:00 2001
From: Krabulek <ania.cielas@gmail.com>
Date: Fri, 19 Sep 2025 13:45:07 +0200
Subject: [PATCH 1/3] Week 3 excercise - Intelligent Dataset Generator

---
 .../intelligent_dataset_generator.ipynb       | 600 ++++++++++++++++++
 1 file changed, 600 insertions(+)
 create mode 100644 week3/community-contributions/intelligent_dataset_generator.ipynb

diff --git a/week3/community-contributions/intelligent_dataset_generator.ipynb b/week3/community-contributions/intelligent_dataset_generator.ipynb
new file mode 100644
index 0000000..9a374a1
--- /dev/null
+++ b/week3/community-contributions/intelligent_dataset_generator.ipynb
@@ -0,0 +1,600 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QTJt9pwUTbHo"
+   },
+   "source": [
+    "# Intelligent Synthetic Dataset Generator\n",
+    "\n",
+    "An AI-powered tool that creates realistic synthetic datasets for any business case—whether you provide the schema or let it intelligently design one for you.\n",
+    "\n",
+    "It works with Claude, Gemini, GPT and HugginFace APIs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "l_FljmlTUoka"
+   },
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "aONqZ-SjUJdg",
+    "outputId": "1f5c7b2e-95f0-4f23-cf01-2bd5bda0807a"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -q requests bitsandbytes anthropic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Ub1unBFvTatE"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import json\n",
+    "from google.colab import userdata\n",
+    "\n",
+    "from openai import OpenAI\n",
+    "import anthropic\n",
+    "from huggingface_hub import login\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
+    "import torch\n",
+    "import pandas as pd\n",
+    "\n",
+    "import gradio as gr\n",
+    "import gc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "viZNPtObUOcz"
+   },
+   "outputs": [],
+   "source": [
+    "hf_token = userdata.get('HF_TOKEN')\n",
+    "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
+    "anthropic_api_key = userdata.get('ANTHROPIC_API_KEY')\n",
+    "google_api_key = userdata.get('GOOGLE_API_KEY')\n",
+    "\n",
+    "login(hf_token, add_to_git_credential=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "9Q94S6JTUWn5"
+   },
+   "outputs": [],
+   "source": [
+    "quant_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_use_double_quant=True,\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    "    bnb_4bit_quant_type=\"nf4\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mrjdVEpaUxHz"
+   },
+   "source": [
+    "## Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "LvNE6foEUPaz"
+   },
+   "outputs": [],
+   "source": [
+    "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+    "PHI3 = \"microsoft/Phi-3-mini-4k-instruct\"\n",
+    "GEMMA2 = \"google/gemma-2-2b-it\"\n",
+    "GPT = \"gpt-4o-mini\"\n",
+    "CLAUDE = \"claude-3-haiku-20240307\"\n",
+    "GEMINI = \"gemini-2.0-flash\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "tvafTFD8XmaO"
+   },
+   "outputs": [],
+   "source": [
+    "MODELS = {\n",
+    "    'LLama 3.1' : LLAMA,\n",
+    "    'Phi 3 mini': PHI3,\n",
+    "    'Gemma 2': GEMMA2,\n",
+    "    'GPT 4.o mini': GPT,\n",
+    "    'Claude 3 Haiku': CLAUDE,\n",
+    "    'Gemini 2.0 Flash': GEMINI,\n",
+    "}\n",
+    "\n",
+    "HF_MODELS = [LLAMA, PHI3, GEMMA2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "2LZqA9QXXl0t"
+   },
+   "outputs": [],
+   "source": [
+    "FILE_FORMATS = [\".csv\", \".tsv\", \".jsonl\", \".json\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "d6EnN7SVXhza",
+    "outputId": "55f6ac4d-adeb-4216-b2a8-d67524b005d3"
+   },
+   "outputs": [],
+   "source": [
+    "SCHEMA = [\n",
+    "    (\"Name\", \"TEXT\", \"Name of the restaurant\", \"Blue River Bistro\"),\n",
+    "    (\"Address\", \"TEXT\", \"Restaurant address\", \"742 Evergreen Terrace, Springfield, IL 62704\"),\n",
+    "    (\"Type\", \"TEXT\", \"Kitchen type\", 'One of [\"Thai\",\"Mediterranean\",\"Vegan\",\"Steakhouse\",\"Japanese\"] or other potential types'),\n",
+    "    (\"Average Price\", \"TEXT\", \"Average meal price\", \"$45, or '--' if unknown\"),\n",
+    "    (\"Year\", \"INT\", \"Year of restaurant opening\", 2015),\n",
+    "    (\"Menu\", \"Array\", \"List of meals\", '[\"Grilled Salmon\", \"Caesar Salad\", \"Pad Thai\", \"Margherita Pizza\", ...]'),\n",
+    "]\n",
+    "\n",
+    "DEFAULT_SCHEMA_TEXT = \"\\n\".join([f\"{i+1}. {col[0]} ({col[1]}) - {col[2]}, example: {col[3]}\" for i, col in enumerate(SCHEMA)])\n",
+    "print(DEFAULT_SCHEMA_TEXT)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "W-46TDTOXiS7"
+   },
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"\n",
+    "You are an expert in generating synthetic datasets tailored to a given business case and user requirements.\n",
+    "If the user does not specify output columns, infer and create the most appropriate columns based on your expertise.\n",
+    "Do NOT repeat column values from one row to another. Only output valid JSONL without any comments.\"\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def get_user_prompt(business_case, schema_text, nr_records):\n",
+    "    prompt = f\"The business case is: {business_case}.\\nGenerate {nr_records} rows of data in JSONL format.\\n\"\n",
+    "\n",
+    "    if schema_text is not None:\n",
+    "      prompt += f\"Each line should be a JSON object with the following fields: \\n{schema_text}\\n\"\n",
+    "\n",
+    "    return prompt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gPf1GcAwhwa_"
+   },
+   "source": [
+    "## LLM handler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Tf-WEQUKhY-z"
+   },
+   "outputs": [],
+   "source": [
+    "def ask_gpt(model: str, user_prompt: str):\n",
+    "  client = OpenAI(api_key=openai_api_key)\n",
+    "  messages = [\n",
+    "      {\"role\": \"system\", \"content\": system_prompt},\n",
+    "      {\"role\": \"user\", \"content\": user_prompt}\n",
+    "    ]\n",
+    "  response = client.chat.completions.create(\n",
+    "      model=model,\n",
+    "      messages=messages,\n",
+    "      temperature=0.7\n",
+    "  )\n",
+    "  content = response.choices[0].message.content\n",
+    "\n",
+    "  return content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "856pnIHahzDd"
+   },
+   "outputs": [],
+   "source": [
+    "def ask_claude(model: str, user_prompt: str):\n",
+    "  client = anthropic.Anthropic(api_key=anthropic_api_key)\n",
+    "  response = client.messages.create(\n",
+    "      model=model,\n",
+    "      messages=[{\"role\": \"user\", \"content\": user_prompt}],\n",
+    "      max_tokens=4000,\n",
+    "      temperature=0.7,\n",
+    "      system=system_prompt\n",
+    "  )\n",
+    "  content = response.content[0].text\n",
+    "\n",
+    "  return content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "p0AfSbcBiUlg"
+   },
+   "outputs": [],
+   "source": [
+    "def ask_gemini(model: str, user_prompt: str):\n",
+    "  client = OpenAI(\n",
+    "      api_key=google_api_key,\n",
+    "      base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
+    "  )\n",
+    "  messages = [\n",
+    "      {\"role\": \"system\", \"content\": system_prompt},\n",
+    "      {\"role\": \"user\", \"content\": user_prompt}\n",
+    "    ]\n",
+    "  response = client.chat.completions.create(\n",
+    "      model=model,\n",
+    "      messages=messages,\n",
+    "      temperature=0.7\n",
+    "  )\n",
+    "  content = response.choices[0].message.content\n",
+    "\n",
+    "  return content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "K9LZZPJ9irrH"
+   },
+   "outputs": [],
+   "source": [
+    "def ask_hf(model: str, user_prompt: str):\n",
+    "  global tokenizer, inputs, hf_model, outputs\n",
+    "\n",
+    "  messages = [\n",
+    "        {\"role\": \"system\", \"content\": system_prompt},\n",
+    "        {\"role\": \"user\", \"content\": user_prompt}\n",
+    "      ]\n",
+    "\n",
+    "  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)\n",
+    "  tokenizer.pad_token = tokenizer.eos_token\n",
+    "  inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
+    "  if hf_model == None:\n",
+    "      hf_model = AutoModelForCausalLM.from_pretrained(model, device_map=\"auto\", quantization_config=quant_config)\n",
+    "  outputs = hf_model.generate(inputs, max_new_tokens=4000)\n",
+    "\n",
+    "  _, _, after = tokenizer.decode(outputs[0]).partition(\"assistant<|end_header_id|>\")\n",
+    "  content = after.strip()\n",
+    "\n",
+    "  return content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "eu7Sv3bDhXdI"
+   },
+   "outputs": [],
+   "source": [
+    "def query_llm(model_name: str, user_prompt):\n",
+    "    try:\n",
+    "        model = MODELS[model_name]\n",
+    "\n",
+    "        if \"gpt\" in model.lower():\n",
+    "            response = ask_gpt(model, user_prompt)\n",
+    "\n",
+    "        elif \"claude\" in model.lower():\n",
+    "            response = ask_claude(model, user_prompt)\n",
+    "\n",
+    "        elif \"gemini\" in model.lower():\n",
+    "            response = ask_gemini(model, user_prompt)\n",
+    "\n",
+    "        elif model in HF_MODELS:\n",
+    "            response = ask_hf(model, user_prompt)\n",
+    "\n",
+    "        else:\n",
+    "            raise ValueError(f\"Unsupported model. Use one of {', '.join(MODELS.keys())}\")\n",
+    "\n",
+    "        lines = [line.strip() for line in response.strip().splitlines() if line.strip().startswith(\"{\")]\n",
+    "\n",
+    "        return [json.loads(line) for line in lines]\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        raise Exception(f\"Model query failed: {str(e)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mxuwLUsVlBlY"
+   },
+   "source": [
+    "## Output Formatter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "IAKfqgZIlGuP"
+   },
+   "outputs": [],
+   "source": [
+    "def save_dataset(records, file_format: str, file_name: str):\n",
+    "    df = pd.DataFrame(records)\n",
+    "    print(df.shape)\n",
+    "    if file_format == \".csv\":\n",
+    "        df.to_csv(file_name, index=False)\n",
+    "    elif file_format == \".tsv\":\n",
+    "        df.to_csv(file_name, sep=\"\\t\", index=False)\n",
+    "    elif file_format == \".jsonl\":\n",
+    "        with open(file_name, \"w\") as f:\n",
+    "            for record in records:\n",
+    "                f.write(json.dumps(record) + \"\\n\")\n",
+    "    elif file_format == \".json\":\n",
+    "        df.to_json(file_name, orient=\"records\", index=False)\n",
+    "    else:\n",
+    "        raise ValueError(\"Unsupported file format\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "gkpkQ0nal_5B"
+   },
+   "outputs": [],
+   "source": [
+    "def generate_dataset(\n",
+    "    model_name: str,\n",
+    "    business_case: str,\n",
+    "    num_records: int = 100,\n",
+    "    schema_text: str = None,\n",
+    "    file_format: str = '.jsonl',\n",
+    "    file_name: str = 'test_dataset.jsonl'\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Generates a synthetic dataset using an LLM based on the given business case and optional schema.\n",
+    "\n",
+    "    Returns:\n",
+    "        Tuple[str, pd.DataFrame | None]: A status message and a preview DataFrame (first 10 rows) if successful.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Validate number of records\n",
+    "        if num_records <= 10:\n",
+    "            return \"❌ Error: Number of records must be greater than 10.\", None\n",
+    "        if num_records > 1000:\n",
+    "            return \"❌ Error: Number of records must be less than or equal to 1000.\", None\n",
+    "\n",
+    "        # Validate file format\n",
+    "        if file_format not in FILE_FORMATS:\n",
+    "            return f\"❌ Error: Invalid file format '{file_format}'. Supported formats: {FILE_FORMATS}\", None\n",
+    "\n",
+    "        # Ensure file name has correct extension\n",
+    "        if not file_name.endswith(file_format):\n",
+    "            file_name += file_format\n",
+    "\n",
+    "        # Generate the prompt and query the model\n",
+    "        prompt = get_user_prompt(business_case, schema_text, num_records)\n",
+    "        records = query_llm(model_name, prompt)\n",
+    "\n",
+    "        if not records:\n",
+    "            return \"❌ Error: No valid records were generated by the model.\", None\n",
+    "\n",
+    "        # Save dataset\n",
+    "        save_dataset(records, file_format, file_name)\n",
+    "\n",
+    "        # Prepare preview\n",
+    "        df = pd.DataFrame(records)\n",
+    "        preview = df.head(10)\n",
+    "\n",
+    "        success_message = (\n",
+    "            f\"✅ Generated {len(records)} records successfully!\\n\"\n",
+    "            f\"📁 Saved to: {file_name}\\n\"\n",
+    "        )\n",
+    "\n",
+    "        return success_message, preview\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        return f\"❌ Error: {str(e)}\", None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 702
+    },
+    "id": "Z9WdaSfFUakj",
+    "outputId": "2fbce2c5-a6d3-4dd8-a9d2-0e38c18d202e"
+   },
+   "outputs": [],
+   "source": [
+    "with gr.Blocks(title=\"Synthetic Dataset Generator\", theme=gr.themes.Monochrome()) as interface:\n",
+    "    tokenizer = None\n",
+    "    inputs = None\n",
+    "    hf_model = None\n",
+    "    outputs = None\n",
+    "\n",
+    "    gr.Markdown(\"# Dataset Generator\")\n",
+    "    gr.Markdown(\"Generate synthetic datasets using AI models\")\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column(scale=2):\n",
+    "            schema_input = gr.Textbox(\n",
+    "                label=\"Schema\",\n",
+    "                value=DEFAULT_SCHEMA_TEXT,\n",
+    "                lines=15,\n",
+    "                placeholder=\"Define your dataset schema here... Please follow this format: Name (TYPE) - Description, example: Example\"\n",
+    "            )\n",
+    "\n",
+    "            business_case_input = gr.Textbox(\n",
+    "                label=\"Business Case\",\n",
+    "                value=\"I want to generate restaurant dataset\",\n",
+    "                lines=1,\n",
+    "                placeholder=\"Enter business case description...\"\n",
+    "            )\n",
+    "\n",
+    "            with gr.Row():\n",
+    "                model_dropdown = gr.Dropdown(\n",
+    "                    label=\"Model\",\n",
+    "                    choices=list(MODELS.keys()),\n",
+    "                    value=list(MODELS.keys())[0],\n",
+    "                    interactive=True\n",
+    "                )\n",
+    "\n",
+    "                nr_records_input = gr.Number(\n",
+    "                    label=\"Number of records\",\n",
+    "                    value=27,\n",
+    "                    minimum=11,\n",
+    "                    maximum=1000,\n",
+    "                    step=1\n",
+    "                )\n",
+    "\n",
+    "            with gr.Row():\n",
+    "                filename_input = gr.Textbox(\n",
+    "                      label=\"Save as\",\n",
+    "                      value=\"restaurant_dataset\",\n",
+    "                      placeholder=\"Enter filename (extension will be added automatically)\"\n",
+    "                  )\n",
+    "\n",
+    "                file_format_dropdown = gr.Dropdown(\n",
+    "                    label=\"File format\",\n",
+    "                    choices=FILE_FORMATS,\n",
+    "                    value=FILE_FORMATS[0],\n",
+    "                    interactive=True\n",
+    "                )\n",
+    "\n",
+    "            generate_btn = gr.Button(\"🚀 Generate\", variant=\"secondary\", size=\"lg\")\n",
+    "\n",
+    "        with gr.Column(scale=1):\n",
+    "            gr.Markdown(\"\"\"\n",
+    "            ### 📝 Dataset Generation Instructions\n",
+    "\n",
+    "            1. **🗂 Schema** – Define your dataset structure\n",
+    "              *(default: restaurant schema provided)*\n",
+    "            2. **💡 Business Case** – Enter a prompt to guide the AI for generating data\n",
+    "            3. **🤖 Model** – Choose your AI model: GPT, Claude, Gemini, or Hugging Face\n",
+    "            4. **📊 Number of Records** – Specify entries to generate\n",
+    "              *(min: 11, max: 1000)*\n",
+    "            5. **📁 File Format** – Select output type: `.csv`, `.tsv`, `.jsonl`, or `.json`\n",
+    "            6. **💾 Save As** – Provide a filename *(extension auto-added)*\n",
+    "            7. **🚀 Generate** – Click **Generate** to create your dataset\n",
+    "\n",
+    "            ### 🔧 Requirements\n",
+    "\n",
+    "            Set API keys in Colab’s secret section:\n",
+    "              `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `HF_TOKEN`\n",
+    "            \"\"\")\n",
+    "            output_status = gr.Textbox(\n",
+    "                label=\"Status\",\n",
+    "                lines=4,\n",
+    "                interactive=False\n",
+    "            )\n",
+    "\n",
+    "            output_preview = gr.Dataframe(\n",
+    "                label=\"Preview (first 10 rows)\",\n",
+    "                interactive=False,\n",
+    "                wrap=True\n",
+    "            )\n",
+    "\n",
+    "    generate_btn.click(\n",
+    "        fn=generate_dataset,\n",
+    "        inputs=[\n",
+    "            model_dropdown,\n",
+    "            business_case_input,\n",
+    "            nr_records_input,\n",
+    "            schema_input,\n",
+    "            file_format_dropdown,\n",
+    "            filename_input\n",
+    "        ],\n",
+    "        outputs=[output_status, output_preview]\n",
+    "    )\n",
+    "\n",
+    "interface.launch(debug=True)\n",
+    "\n",
+    "del tokenizer, inputs, hf_model, outputs\n",
+    "gc.collect()\n",
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "w-ewbsjInopm"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 530ff7148b74a3b0f83537d26cd3a90a99c5798e Mon Sep 17 00:00:00 2001
From: Krabulek <ania.cielas@gmail.com>
Date: Fri, 19 Sep 2025 14:03:37 +0200
Subject: [PATCH 2/3] small fix in week 4 contributions - python code
 documentation assistant

---
 .../Python_code_documentation_assistant.ipynb | 828 ++++++++++++++++++
 1 file changed, 828 insertions(+)
 create mode 100644 week4/community-contributions/Python_code_documentation_assistant.ipynb

diff --git a/week4/community-contributions/Python_code_documentation_assistant.ipynb b/week4/community-contributions/Python_code_documentation_assistant.ipynb
new file mode 100644
index 0000000..aebc0e3
--- /dev/null
+++ b/week4/community-contributions/Python_code_documentation_assistant.ipynb
@@ -0,0 +1,828 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
+   "metadata": {},
+   "source": [
+    "# Python Code Documentation Assistant\n",
+    "\n",
+    "The requirement: use a Frontier model to add docstrings and comments to your Python code\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4634170-c444-4326-9e68-5f87c63fa0e0",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -U -q \"google-genai\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import io\n",
+    "import sys\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "from google import genai\n",
+    "from google.genai import types\n",
+    "import anthropic\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "import gradio as gr\n",
+    "import subprocess"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f91e8b32-4c98-4210-a1e1-bfe0b1fddab7",
+   "metadata": {},
+   "source": [
+    "## Environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f672e1c-87e9-4865-b760-370fa605e614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv(override=True)\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+    "\n",
+    "if openai_api_key:\n",
+    "    print(f\"OpenAI API Key exists and begins with: {openai_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"OpenAI API Key not set\")\n",
+    "    \n",
+    "if anthropic_api_key:\n",
+    "    print(f\"Anthropic API Key exists and begins with: {anthropic_api_key[:7]}\")\n",
+    "else:\n",
+    "    print(\"Anthropic API Key not set\")\n",
+    "\n",
+    "if google_api_key:\n",
+    "    print(f\"Google API Key exists and begins with: {google_api_key[:4]}\")\n",
+    "else:\n",
+    "    print(\"Google API Key not set\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "openai = OpenAI()\n",
+    "claude = anthropic.Anthropic()\n",
+    "gemini = genai.Client()\n",
+    "\n",
+    "OPENAI_MODEL = \"o4-mini\"\n",
+    "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n",
+    "GEMINI_MODEL = \"gemini-2.5-flash\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88a18c58-40d5-4592-8dd3-d7c7b0d951aa",
+   "metadata": {},
+   "source": [
+    "## Prompts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"\"\"\n",
+    "You are an assistant that documents Python code.  \n",
+    "Your task:  \n",
+    "- Add concise, clear, and informative docstrings to functions, classes, and modules.  \n",
+    "- Add inline comments only where they improve readability or clarify intent.  \n",
+    "- Do not modify the code logic or structure.  \n",
+    "- Respond with Python code only.  \n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def user_prompt_for(python):\n",
+    "    user_prompt = \"Add docstrings and comments to the following Python code:\\n\"\n",
+    "    user_prompt += python\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def messages_for(python):\n",
+    "    return [\n",
+    "        {\"role\": \"system\", \"content\": system_message},\n",
+    "        {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
+    "    ]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "624e5066-bcf6-490d-a790-608d2bb34184",
+   "metadata": {},
+   "source": [
+    "## Helper functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_output(python, filename_suffix):\n",
+    "    filename = f\"annotated_{filename_suffix}.py\"\n",
+    "    code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n",
+    "    with open(filename, \"w\") as f:\n",
+    "        f.write(code)\n",
+    "    print(f\"\\nWritten code to {filename}\")\n",
+    "    return filename"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def annotate_with_gpt(python, task_name):    \n",
+    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
+    "    reply = \"\"\n",
+    "    for chunk in stream:\n",
+    "        fragment = chunk.choices[0].delta.content or \"\"\n",
+    "        reply += fragment\n",
+    "        print(fragment, end='', flush=True)\n",
+    "    return write_output(reply, f\"{task_name}_gpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def annotate_with_claude(python, task_name):\n",
+    "    result = claude.messages.stream(\n",
+    "        model=CLAUDE_MODEL,\n",
+    "        max_tokens=2000,\n",
+    "        system=system_message,\n",
+    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    with result as stream:\n",
+    "        for text in stream.text_stream:\n",
+    "            reply += text\n",
+    "            print(text, end=\"\", flush=True)\n",
+    "    return write_output(reply, f\"{task_name}_claude\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def annotate_with_gemini(python, task_name):\n",
+    "    reply = gemini.models.generate_content(\n",
+    "        model=GEMINI_MODEL,\n",
+    "        contents=user_prompt_for(python),\n",
+    "        config=types.GenerateContentConfig(\n",
+    "            system_instruction=system_message,\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    print(reply.text)\n",
+    "    return write_output(reply.text, f\"{task_name}_gemini\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "028dcfdd-2d52-4e11-a79e-2214a97cb26d",
+   "metadata": {},
+   "source": [
+    "# Run the Annotator"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7462d9f9-6215-4fb0-9471-1d0141d33205",
+   "metadata": {},
+   "source": [
+    "## Pi example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pi = \"\"\"\n",
+    "import time\n",
+    "\n",
+    "def calculate(iterations, param1, param2):\n",
+    "    result = 1.0\n",
+    "    for i in range(1, iterations+1):\n",
+    "        j = i * param1 - param2\n",
+    "        result -= (1/j)\n",
+    "        j = i * param1 + param2\n",
+    "        result += (1/j)\n",
+    "    return result\n",
+    "\n",
+    "start_time = time.time()\n",
+    "result = calculate(100_000_000, 4, 1) * 4\n",
+    "end_time = time.time()\n",
+    "\n",
+    "print(f\"Result: {result:.12f}\")\n",
+    "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "105db6f9-343c-491d-8e44-3a5328b81719",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gpt_pi = annotate_with_gpt(pi, \"pi))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "415819d0-fc95-4f78-a6ae-5c7d6781c6a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check if the script works\n",
+    "\n",
+    "exec(open(gpt_pi).read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "claude_pi = annotate_with_claude(pi, \"pi\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52f5b710-0dea-4884-8ed7-a94059d88281",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(open(claude_pi).read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01f331f2-caac-48f6-9a03-8a228ee521bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gemini_pi = annotate_with_gemini(pi, \"pi\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23529942-53fa-46ad-a5db-1f3096dd6607",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(open(gemini_pi).read())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f",
+   "metadata": {},
+   "source": [
+    "## Hard example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "python_hard = \"\"\"# Be careful to support large number sizes\n",
+    "\n",
+    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
+    "    value = seed\n",
+    "    while True:\n",
+    "        value = (a * value + c) % m\n",
+    "        yield value\n",
+    "        \n",
+    "def max_subarray_sum(n, seed, min_val, max_val):\n",
+    "    lcg_gen = lcg(seed)\n",
+    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
+    "    max_sum = float('-inf')\n",
+    "    for i in range(n):\n",
+    "        current_sum = 0\n",
+    "        for j in range(i, n):\n",
+    "            current_sum += random_numbers[j]\n",
+    "            if current_sum > max_sum:\n",
+    "                max_sum = current_sum\n",
+    "    return max_sum\n",
+    "\n",
+    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
+    "    total_sum = 0\n",
+    "    lcg_gen = lcg(initial_seed)\n",
+    "    for _ in range(20):\n",
+    "        seed = next(lcg_gen)\n",
+    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
+    "    return total_sum\n",
+    "\n",
+    "# Parameters\n",
+    "n = 10000         # Number of random numbers\n",
+    "initial_seed = 42 # Initial seed for the LCG\n",
+    "min_val = -10     # Minimum value of random numbers\n",
+    "max_val = 10      # Maximum value of random numbers\n",
+    "\n",
+    "# Timing the function\n",
+    "import time\n",
+    "start_time = time.time()\n",
+    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
+    "end_time = time.time()\n",
+    "\n",
+    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
+    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(python_hard)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gpt_hard = annotate_with_gpt(python_hard, \"hard\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80a15259-3d51-47b8-953c-6271fbd4b6fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(open(gpt_hard).read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gemini_hard = annotate_with_gemini(python_hard, \"hard\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ad6eecc8-0517-43d8-bd21-5bbdedae7a10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(open(gemini_hard).read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "claude_hard = annotate_with_claude(python_hard, \"hard\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47af1516-455f-4d1c-8a1c-2da5a38c0ba5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(open(claude_hard).read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f60d33c-f6b7-4fc5-bc2b-57957b076e34",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "This module implements a Linear Congruential Generator (LCG) and uses it\n",
+    "to generate random numbers for calculating the maximum subarray sum.\n",
+    "It includes functions for the LCG, finding the maximum subarray sum, and\n",
+    "aggregating results over multiple runs.\n",
+    "\"\"\"\n",
+    "\n",
+    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
+    "    \"\"\"\n",
+    "    Implements a Linear Congruential Generator (LCG) to produce a sequence of\n",
+    "    pseudorandom numbers.\n",
+    "\n",
+    "    The generator uses the formula: X_{n+1} = (a * X_n + c) % m.\n",
+    "\n",
+    "    Args:\n",
+    "        seed (int): The initial seed value for the generator (X_0).\n",
+    "        a (int, optional): The multiplier. Defaults to 1664525 (common LCG parameter).\n",
+    "        c (int, optional): The increment. Defaults to 1013904223 (common LCG parameter).\n",
+    "        m (int, optional): The modulus. Defaults to 2**32, meaning numbers will be\n",
+    "                           between 0 and m-1.\n",
+    "\n",
+    "    Yields:\n",
+    "        int: The next pseudorandom number in the sequence.\n",
+    "    \"\"\"\n",
+    "    value = seed\n",
+    "    while True:\n",
+    "        # Calculate the next pseudorandom number using the LCG formula.\n",
+    "        value = (a * value + c) % m\n",
+    "        yield value\n",
+    "\n",
+    "def max_subarray_sum(n, seed, min_val, max_val):\n",
+    "    \"\"\"\n",
+    "    Calculates the maximum possible sum of a contiguous subarray within a list\n",
+    "    of 'n' pseudorandom numbers.\n",
+    "\n",
+    "    The random numbers are generated using an LCG based on the provided seed,\n",
+    "    and then mapped to the range [min_val, max_val].\n",
+    "    This implementation uses a brute-force approach with O(n^2) complexity.\n",
+    "\n",
+    "    Args:\n",
+    "        n (int): The number of random integers to generate for the array.\n",
+    "        seed (int): The seed for the LCG to generate the random numbers.\n",
+    "        min_val (int): The minimum possible value for the generated random numbers.\n",
+    "        max_val (int): The maximum possible value for the generated random numbers.\n",
+    "\n",
+    "    Returns:\n",
+    "        int: The maximum sum found among all contiguous subarrays.\n",
+    "    \"\"\"\n",
+    "    lcg_gen = lcg(seed)\n",
+    "    # Generate a list of 'n' random numbers within the specified range [min_val, max_val].\n",
+    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
+    "\n",
+    "    max_sum = float('-inf') # Initialize max_sum to negative infinity to handle all negative numbers.\n",
+    "\n",
+    "    # Iterate through all possible starting points of a subarray.\n",
+    "    for i in range(n):\n",
+    "        current_sum = 0\n",
+    "        # Iterate through all possible ending points for the current starting point.\n",
+    "        for j in range(i, n):\n",
+    "            current_sum += random_numbers[j]\n",
+    "            # Update max_sum if the current subarray sum is greater.\n",
+    "            if current_sum > max_sum:\n",
+    "                max_sum = current_sum\n",
+    "    return max_sum\n",
+    "\n",
+    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
+    "    \"\"\"\n",
+    "    Calculates the sum of maximum subarray sums over 20 separate runs.\n",
+    "\n",
+    "    Each run generates a new set of 'n' random numbers for `max_subarray_sum`\n",
+    "    using a new seed derived from the initial LCG sequence.\n",
+    "\n",
+    "    Args:\n",
+    "        n (int): The number of random integers for each subarray sum calculation.\n",
+    "        initial_seed (int): The initial seed for the LCG that generates seeds\n",
+    "                            for individual `max_subarray_sum` runs.\n",
+    "        min_val (int): The minimum possible value for random numbers in each run.\n",
+    "        max_val (int): The maximum possible value for random numbers in each run.\n",
+    "\n",
+    "    Returns:\n",
+    "        int: The sum of the maximum subarray sums across all 20 runs.\n",
+    "    \"\"\"\n",
+    "    total_sum = 0\n",
+    "    lcg_gen = lcg(initial_seed) # LCG to generate seeds for subsequent runs.\n",
+    "    # Perform 20 independent runs.\n",
+    "    for _ in range(20):\n",
+    "        # Get a new seed for each run from the initial LCG generator.\n",
+    "        seed = next(lcg_gen)\n",
+    "        # Add the maximum subarray sum of the current run to the total sum.\n",
+    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
+    "    return total_sum\n",
+    "\n",
+    "# Parameters for the simulation\n",
+    "n = 10000         # Number of random numbers to generate for each subarray\n",
+    "initial_seed = 42 # Initial seed for the LCG that generates seeds for runs\n",
+    "min_val = -10     # Minimum value for the random numbers\n",
+    "max_val = 10      # Maximum value for the random numbers\n",
+    "\n",
+    "# Import the time module to measure execution time.\n",
+    "import time\n",
+    "\n",
+    "# Record the start time before executing the main function.\n",
+    "start_time = time.time()\n",
+    "# Call the function to calculate the total maximum subarray sum over multiple runs.\n",
+    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
+    "# Record the end time after the function completes.\n",
+    "end_time = time.time()\n",
+    "\n",
+    "# Print the final aggregated result.\n",
+    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
+    "# Print the total execution time, formatted to 6 decimal places.\n",
+    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72",
+   "metadata": {},
+   "source": [
+    "# Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_gpt(python):    \n",
+    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
+    "    reply = \"\"\n",
+    "    for chunk in stream:\n",
+    "        fragment = chunk.choices[0].delta.content or \"\"\n",
+    "        reply += fragment\n",
+    "        yield reply.replace('```python\\n','').replace('```','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8669f56b-8314-4582-a167-78842caea131",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_claude(python):\n",
+    "    result = claude.messages.stream(\n",
+    "        model=CLAUDE_MODEL,\n",
+    "        max_tokens=2000,\n",
+    "        system=system_message,\n",
+    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    with result as stream:\n",
+    "        for text in stream.text_stream:\n",
+    "            reply += text\n",
+    "            yield reply.replace('```python\\n','').replace('```','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_gemini(python):\n",
+    "    stream = gemini.models.generate_content_stream(\n",
+    "        model=GEMINI_MODEL,\n",
+    "        contents=user_prompt_for(python),\n",
+    "        config=types.GenerateContentConfig(\n",
+    "            system_instruction=system_message,\n",
+    "        ),\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    for chunk in stream:\n",
+    "        reply += chunk.text\n",
+    "        yield reply.replace('```python\\n','').replace('```','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def annotate(python, model):\n",
+    "    if model == \"GPT\":\n",
+    "        result = stream_gpt(python)\n",
+    "    elif model == \"Claude\":\n",
+    "        result = stream_claude(python)\n",
+    "    elif model == \"Gemini\":\n",
+    "        result = stream_gemini(python)\n",
+    "    else:\n",
+    "        raise ValueError(\"Unknown model\")\n",
+    "    for stream_so_far in result:\n",
+    "        yield stream_so_far        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19bf2bff-a822-4009-a539-f003b1651383",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def execute_python(code):\n",
+    "    try:\n",
+    "        output = io.StringIO()\n",
+    "        sys.stdout = output\n",
+    "        exec(code)\n",
+    "    finally:\n",
+    "        sys.stdout = sys.__stdout__\n",
+    "    return output.getvalue()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8391444b-b938-4f92-982f-91439b38d901",
+   "metadata": {},
+   "source": [
+    "# Gradio App"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "css = \"\"\"\n",
+    ".python {background-color: #306998;}\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76167ea9-d0a1-4bc6-8d73-633d3b8c8df6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "# Parameters\n",
+    "LINES = 25\n",
+    "LINE_HEIGHT = 20  # px, typical CodeMirror line height\n",
+    "PADDING = 10      # px, top + bottom padding\n",
+    "\n",
+    "CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING\n",
+    "\n",
+    "\n",
+    "with gr.Blocks(\n",
+    "    theme=gr.themes.Soft(),\n",
+    "    css=f\"\"\"\n",
+    "#code_input .cm-editor, #annotated_code .cm-editor {{\n",
+    "    height: {CODE_HEIGHT}px !important;\n",
+    "    overflow-y: auto !important;\n",
+    "}}\n",
+    "\"\"\"\n",
+    ") as demo_v2:\n",
+    "    gr.Markdown(\"## 🐍 Annotate Python Code with Docstrings and Comments\")\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column(scale=1):\n",
+    "            gr.Markdown(\"### Python code:\")\n",
+    "            code_input = gr.Code(\n",
+    "                language=\"python\", \n",
+    "                value=python_hard,\n",
+    "                elem_id=\"code_input\"\n",
+    "            )\n",
+    "        \n",
+    "        with gr.Column(scale=1):\n",
+    "            gr.Markdown(\"### Annotated code:\")\n",
+    "            annotated_output = gr.Code(\n",
+    "                language=\"python\",\n",
+    "                elem_id=\"annotated_code\",\n",
+    "                interactive=False\n",
+    "            )\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column(scale=1):\n",
+    "            model_dropdown = gr.Dropdown(\n",
+    "                choices=[\"Gemini\", \"GPT-4\", \"Claude\"],\n",
+    "                value=\"Gemini\",\n",
+    "                label=\"Select model\"\n",
+    "            )\n",
+    "        with gr.Column(scale=1):\n",
+    "            annotate_btn = gr.Button(\"✨ Annotate code\", variant=\"primary\")\n",
+    "            run_btn = gr.Button(\"▶️ Run Python\", variant=\"secondary\")\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column():\n",
+    "            gr.Markdown(\"### Python result:\")\n",
+    "            result_output = gr.Textbox(\n",
+    "                lines=5, \n",
+    "                label=\"Output\",\n",
+    "                interactive=False\n",
+    "            )\n",
+    "    \n",
+    "    annotate_btn.click(\n",
+    "        annotate,\n",
+    "        inputs=[code_input, model_dropdown],\n",
+    "        outputs=[annotated_output]\n",
+    "    )\n",
+    "    run_btn.click(execute_python, inputs=[annotated_output], outputs=[result_output])\n",
+    "\n",
+    "    \n",
+    "demo_v2.launch(inbrowser=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From c7bda8a44230f7aa053bd1b45516836c1d176bcb Mon Sep 17 00:00:00 2001
From: Krabulek <ania.cielas@gmail.com>
Date: Fri, 19 Sep 2025 19:44:26 +0200
Subject: [PATCH 3/3] removed the python code documentation assistant - it is
 added in another branch:

---
 .../Python_code_documentation_assistant.ipynb | 828 ------------------
 1 file changed, 828 deletions(-)
 delete mode 100644 week4/community-contributions/Python_code_documentation_assistant.ipynb

diff --git a/week4/community-contributions/Python_code_documentation_assistant.ipynb b/week4/community-contributions/Python_code_documentation_assistant.ipynb
deleted file mode 100644
index aebc0e3..0000000
--- a/week4/community-contributions/Python_code_documentation_assistant.ipynb
+++ /dev/null
@@ -1,828 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
-   "metadata": {},
-   "source": [
-    "# Python Code Documentation Assistant\n",
-    "\n",
-    "The requirement: use a Frontier model to add docstrings and comments to your Python code\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d4634170-c444-4326-9e68-5f87c63fa0e0",
-   "metadata": {},
-   "source": [
-    "## Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install -U -q \"google-genai\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import io\n",
-    "import sys\n",
-    "from dotenv import load_dotenv\n",
-    "from openai import OpenAI\n",
-    "from google import genai\n",
-    "from google.genai import types\n",
-    "import anthropic\n",
-    "from IPython.display import Markdown, display, update_display\n",
-    "import gradio as gr\n",
-    "import subprocess"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f91e8b32-4c98-4210-a1e1-bfe0b1fddab7",
-   "metadata": {},
-   "source": [
-    "## Environment"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f672e1c-87e9-4865-b760-370fa605e614",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "load_dotenv(override=True)\n",
-    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
-    "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
-    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
-    "\n",
-    "if openai_api_key:\n",
-    "    print(f\"OpenAI API Key exists and begins with: {openai_api_key[:8]}\")\n",
-    "else:\n",
-    "    print(\"OpenAI API Key not set\")\n",
-    "    \n",
-    "if anthropic_api_key:\n",
-    "    print(f\"Anthropic API Key exists and begins with: {anthropic_api_key[:7]}\")\n",
-    "else:\n",
-    "    print(\"Anthropic API Key not set\")\n",
-    "\n",
-    "if google_api_key:\n",
-    "    print(f\"Google API Key exists and begins with: {google_api_key[:4]}\")\n",
-    "else:\n",
-    "    print(\"Google API Key not set\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "openai = OpenAI()\n",
-    "claude = anthropic.Anthropic()\n",
-    "gemini = genai.Client()\n",
-    "\n",
-    "OPENAI_MODEL = \"o4-mini\"\n",
-    "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n",
-    "GEMINI_MODEL = \"gemini-2.5-flash\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "88a18c58-40d5-4592-8dd3-d7c7b0d951aa",
-   "metadata": {},
-   "source": [
-    "## Prompts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "system_message = \"\"\"\n",
-    "You are an assistant that documents Python code.  \n",
-    "Your task:  \n",
-    "- Add concise, clear, and informative docstrings to functions, classes, and modules.  \n",
-    "- Add inline comments only where they improve readability or clarify intent.  \n",
-    "- Do not modify the code logic or structure.  \n",
-    "- Respond with Python code only.  \n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def user_prompt_for(python):\n",
-    "    user_prompt = \"Add docstrings and comments to the following Python code:\\n\"\n",
-    "    user_prompt += python\n",
-    "    return user_prompt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def messages_for(python):\n",
-    "    return [\n",
-    "        {\"role\": \"system\", \"content\": system_message},\n",
-    "        {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "624e5066-bcf6-490d-a790-608d2bb34184",
-   "metadata": {},
-   "source": [
-    "## Helper functions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def write_output(python, filename_suffix):\n",
-    "    filename = f\"annotated_{filename_suffix}.py\"\n",
-    "    code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n",
-    "    with open(filename, \"w\") as f:\n",
-    "        f.write(code)\n",
-    "    print(f\"\\nWritten code to {filename}\")\n",
-    "    return filename"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def annotate_with_gpt(python, task_name):    \n",
-    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
-    "    reply = \"\"\n",
-    "    for chunk in stream:\n",
-    "        fragment = chunk.choices[0].delta.content or \"\"\n",
-    "        reply += fragment\n",
-    "        print(fragment, end='', flush=True)\n",
-    "    return write_output(reply, f\"{task_name}_gpt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def annotate_with_claude(python, task_name):\n",
-    "    result = claude.messages.stream(\n",
-    "        model=CLAUDE_MODEL,\n",
-    "        max_tokens=2000,\n",
-    "        system=system_message,\n",
-    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
-    "    )\n",
-    "    reply = \"\"\n",
-    "    with result as stream:\n",
-    "        for text in stream.text_stream:\n",
-    "            reply += text\n",
-    "            print(text, end=\"\", flush=True)\n",
-    "    return write_output(reply, f\"{task_name}_claude\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def annotate_with_gemini(python, task_name):\n",
-    "    reply = gemini.models.generate_content(\n",
-    "        model=GEMINI_MODEL,\n",
-    "        contents=user_prompt_for(python),\n",
-    "        config=types.GenerateContentConfig(\n",
-    "            system_instruction=system_message,\n",
-    "        )\n",
-    "    )\n",
-    "\n",
-    "    print(reply.text)\n",
-    "    return write_output(reply.text, f\"{task_name}_gemini\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "028dcfdd-2d52-4e11-a79e-2214a97cb26d",
-   "metadata": {},
-   "source": [
-    "# Run the Annotator"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7462d9f9-6215-4fb0-9471-1d0141d33205",
-   "metadata": {},
-   "source": [
-    "## Pi example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pi = \"\"\"\n",
-    "import time\n",
-    "\n",
-    "def calculate(iterations, param1, param2):\n",
-    "    result = 1.0\n",
-    "    for i in range(1, iterations+1):\n",
-    "        j = i * param1 - param2\n",
-    "        result -= (1/j)\n",
-    "        j = i * param1 + param2\n",
-    "        result += (1/j)\n",
-    "    return result\n",
-    "\n",
-    "start_time = time.time()\n",
-    "result = calculate(100_000_000, 4, 1) * 4\n",
-    "end_time = time.time()\n",
-    "\n",
-    "print(f\"Result: {result:.12f}\")\n",
-    "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "105db6f9-343c-491d-8e44-3a5328b81719",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gpt_pi = annotate_with_gpt(pi, \"pi))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "415819d0-fc95-4f78-a6ae-5c7d6781c6a7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# check if the script works\n",
-    "\n",
-    "exec(open(gpt_pi).read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "claude_pi = annotate_with_claude(pi, \"pi\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52f5b710-0dea-4884-8ed7-a94059d88281",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(open(claude_pi).read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "01f331f2-caac-48f6-9a03-8a228ee521bc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gemini_pi = annotate_with_gemini(pi, \"pi\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "23529942-53fa-46ad-a5db-1f3096dd6607",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(open(gemini_pi).read())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f",
-   "metadata": {},
-   "source": [
-    "## Hard example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "python_hard = \"\"\"# Be careful to support large number sizes\n",
-    "\n",
-    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
-    "    value = seed\n",
-    "    while True:\n",
-    "        value = (a * value + c) % m\n",
-    "        yield value\n",
-    "        \n",
-    "def max_subarray_sum(n, seed, min_val, max_val):\n",
-    "    lcg_gen = lcg(seed)\n",
-    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
-    "    max_sum = float('-inf')\n",
-    "    for i in range(n):\n",
-    "        current_sum = 0\n",
-    "        for j in range(i, n):\n",
-    "            current_sum += random_numbers[j]\n",
-    "            if current_sum > max_sum:\n",
-    "                max_sum = current_sum\n",
-    "    return max_sum\n",
-    "\n",
-    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
-    "    total_sum = 0\n",
-    "    lcg_gen = lcg(initial_seed)\n",
-    "    for _ in range(20):\n",
-    "        seed = next(lcg_gen)\n",
-    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
-    "    return total_sum\n",
-    "\n",
-    "# Parameters\n",
-    "n = 10000         # Number of random numbers\n",
-    "initial_seed = 42 # Initial seed for the LCG\n",
-    "min_val = -10     # Minimum value of random numbers\n",
-    "max_val = 10      # Maximum value of random numbers\n",
-    "\n",
-    "# Timing the function\n",
-    "import time\n",
-    "start_time = time.time()\n",
-    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
-    "end_time = time.time()\n",
-    "\n",
-    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
-    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(python_hard)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gpt_hard = annotate_with_gpt(python_hard, \"hard\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "80a15259-3d51-47b8-953c-6271fbd4b6fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(open(gpt_hard).read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gemini_hard = annotate_with_gemini(python_hard, \"hard\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ad6eecc8-0517-43d8-bd21-5bbdedae7a10",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(open(gemini_hard).read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "claude_hard = annotate_with_claude(python_hard, \"hard\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "47af1516-455f-4d1c-8a1c-2da5a38c0ba5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec(open(claude_hard).read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7f60d33c-f6b7-4fc5-bc2b-57957b076e34",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "This module implements a Linear Congruential Generator (LCG) and uses it\n",
-    "to generate random numbers for calculating the maximum subarray sum.\n",
-    "It includes functions for the LCG, finding the maximum subarray sum, and\n",
-    "aggregating results over multiple runs.\n",
-    "\"\"\"\n",
-    "\n",
-    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
-    "    \"\"\"\n",
-    "    Implements a Linear Congruential Generator (LCG) to produce a sequence of\n",
-    "    pseudorandom numbers.\n",
-    "\n",
-    "    The generator uses the formula: X_{n+1} = (a * X_n + c) % m.\n",
-    "\n",
-    "    Args:\n",
-    "        seed (int): The initial seed value for the generator (X_0).\n",
-    "        a (int, optional): The multiplier. Defaults to 1664525 (common LCG parameter).\n",
-    "        c (int, optional): The increment. Defaults to 1013904223 (common LCG parameter).\n",
-    "        m (int, optional): The modulus. Defaults to 2**32, meaning numbers will be\n",
-    "                           between 0 and m-1.\n",
-    "\n",
-    "    Yields:\n",
-    "        int: The next pseudorandom number in the sequence.\n",
-    "    \"\"\"\n",
-    "    value = seed\n",
-    "    while True:\n",
-    "        # Calculate the next pseudorandom number using the LCG formula.\n",
-    "        value = (a * value + c) % m\n",
-    "        yield value\n",
-    "\n",
-    "def max_subarray_sum(n, seed, min_val, max_val):\n",
-    "    \"\"\"\n",
-    "    Calculates the maximum possible sum of a contiguous subarray within a list\n",
-    "    of 'n' pseudorandom numbers.\n",
-    "\n",
-    "    The random numbers are generated using an LCG based on the provided seed,\n",
-    "    and then mapped to the range [min_val, max_val].\n",
-    "    This implementation uses a brute-force approach with O(n^2) complexity.\n",
-    "\n",
-    "    Args:\n",
-    "        n (int): The number of random integers to generate for the array.\n",
-    "        seed (int): The seed for the LCG to generate the random numbers.\n",
-    "        min_val (int): The minimum possible value for the generated random numbers.\n",
-    "        max_val (int): The maximum possible value for the generated random numbers.\n",
-    "\n",
-    "    Returns:\n",
-    "        int: The maximum sum found among all contiguous subarrays.\n",
-    "    \"\"\"\n",
-    "    lcg_gen = lcg(seed)\n",
-    "    # Generate a list of 'n' random numbers within the specified range [min_val, max_val].\n",
-    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
-    "\n",
-    "    max_sum = float('-inf') # Initialize max_sum to negative infinity to handle all negative numbers.\n",
-    "\n",
-    "    # Iterate through all possible starting points of a subarray.\n",
-    "    for i in range(n):\n",
-    "        current_sum = 0\n",
-    "        # Iterate through all possible ending points for the current starting point.\n",
-    "        for j in range(i, n):\n",
-    "            current_sum += random_numbers[j]\n",
-    "            # Update max_sum if the current subarray sum is greater.\n",
-    "            if current_sum > max_sum:\n",
-    "                max_sum = current_sum\n",
-    "    return max_sum\n",
-    "\n",
-    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
-    "    \"\"\"\n",
-    "    Calculates the sum of maximum subarray sums over 20 separate runs.\n",
-    "\n",
-    "    Each run generates a new set of 'n' random numbers for `max_subarray_sum`\n",
-    "    using a new seed derived from the initial LCG sequence.\n",
-    "\n",
-    "    Args:\n",
-    "        n (int): The number of random integers for each subarray sum calculation.\n",
-    "        initial_seed (int): The initial seed for the LCG that generates seeds\n",
-    "                            for individual `max_subarray_sum` runs.\n",
-    "        min_val (int): The minimum possible value for random numbers in each run.\n",
-    "        max_val (int): The maximum possible value for random numbers in each run.\n",
-    "\n",
-    "    Returns:\n",
-    "        int: The sum of the maximum subarray sums across all 20 runs.\n",
-    "    \"\"\"\n",
-    "    total_sum = 0\n",
-    "    lcg_gen = lcg(initial_seed) # LCG to generate seeds for subsequent runs.\n",
-    "    # Perform 20 independent runs.\n",
-    "    for _ in range(20):\n",
-    "        # Get a new seed for each run from the initial LCG generator.\n",
-    "        seed = next(lcg_gen)\n",
-    "        # Add the maximum subarray sum of the current run to the total sum.\n",
-    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
-    "    return total_sum\n",
-    "\n",
-    "# Parameters for the simulation\n",
-    "n = 10000         # Number of random numbers to generate for each subarray\n",
-    "initial_seed = 42 # Initial seed for the LCG that generates seeds for runs\n",
-    "min_val = -10     # Minimum value for the random numbers\n",
-    "max_val = 10      # Maximum value for the random numbers\n",
-    "\n",
-    "# Import the time module to measure execution time.\n",
-    "import time\n",
-    "\n",
-    "# Record the start time before executing the main function.\n",
-    "start_time = time.time()\n",
-    "# Call the function to calculate the total maximum subarray sum over multiple runs.\n",
-    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
-    "# Record the end time after the function completes.\n",
-    "end_time = time.time()\n",
-    "\n",
-    "# Print the final aggregated result.\n",
-    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
-    "# Print the total execution time, formatted to 6 decimal places.\n",
-    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72",
-   "metadata": {},
-   "source": [
-    "# Streaming"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def stream_gpt(python):    \n",
-    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
-    "    reply = \"\"\n",
-    "    for chunk in stream:\n",
-    "        fragment = chunk.choices[0].delta.content or \"\"\n",
-    "        reply += fragment\n",
-    "        yield reply.replace('```python\\n','').replace('```','')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8669f56b-8314-4582-a167-78842caea131",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def stream_claude(python):\n",
-    "    result = claude.messages.stream(\n",
-    "        model=CLAUDE_MODEL,\n",
-    "        max_tokens=2000,\n",
-    "        system=system_message,\n",
-    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
-    "    )\n",
-    "    reply = \"\"\n",
-    "    with result as stream:\n",
-    "        for text in stream.text_stream:\n",
-    "            reply += text\n",
-    "            yield reply.replace('```python\\n','').replace('```','')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def stream_gemini(python):\n",
-    "    stream = gemini.models.generate_content_stream(\n",
-    "        model=GEMINI_MODEL,\n",
-    "        contents=user_prompt_for(python),\n",
-    "        config=types.GenerateContentConfig(\n",
-    "            system_instruction=system_message,\n",
-    "        ),\n",
-    "    )\n",
-    "    reply = \"\"\n",
-    "    for chunk in stream:\n",
-    "        reply += chunk.text\n",
-    "        yield reply.replace('```python\\n','').replace('```','')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def annotate(python, model):\n",
-    "    if model == \"GPT\":\n",
-    "        result = stream_gpt(python)\n",
-    "    elif model == \"Claude\":\n",
-    "        result = stream_claude(python)\n",
-    "    elif model == \"Gemini\":\n",
-    "        result = stream_gemini(python)\n",
-    "    else:\n",
-    "        raise ValueError(\"Unknown model\")\n",
-    "    for stream_so_far in result:\n",
-    "        yield stream_so_far        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "19bf2bff-a822-4009-a539-f003b1651383",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def execute_python(code):\n",
-    "    try:\n",
-    "        output = io.StringIO()\n",
-    "        sys.stdout = output\n",
-    "        exec(code)\n",
-    "    finally:\n",
-    "        sys.stdout = sys.__stdout__\n",
-    "    return output.getvalue()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8391444b-b938-4f92-982f-91439b38d901",
-   "metadata": {},
-   "source": [
-    "# Gradio App"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "css = \"\"\"\n",
-    ".python {background-color: #306998;}\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "76167ea9-d0a1-4bc6-8d73-633d3b8c8df6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import gradio as gr\n",
-    "\n",
-    "# Parameters\n",
-    "LINES = 25\n",
-    "LINE_HEIGHT = 20  # px, typical CodeMirror line height\n",
-    "PADDING = 10      # px, top + bottom padding\n",
-    "\n",
-    "CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING\n",
-    "\n",
-    "\n",
-    "with gr.Blocks(\n",
-    "    theme=gr.themes.Soft(),\n",
-    "    css=f\"\"\"\n",
-    "#code_input .cm-editor, #annotated_code .cm-editor {{\n",
-    "    height: {CODE_HEIGHT}px !important;\n",
-    "    overflow-y: auto !important;\n",
-    "}}\n",
-    "\"\"\"\n",
-    ") as demo_v2:\n",
-    "    gr.Markdown(\"## 🐍 Annotate Python Code with Docstrings and Comments\")\n",
-    "\n",
-    "    with gr.Row():\n",
-    "        with gr.Column(scale=1):\n",
-    "            gr.Markdown(\"### Python code:\")\n",
-    "            code_input = gr.Code(\n",
-    "                language=\"python\", \n",
-    "                value=python_hard,\n",
-    "                elem_id=\"code_input\"\n",
-    "            )\n",
-    "        \n",
-    "        with gr.Column(scale=1):\n",
-    "            gr.Markdown(\"### Annotated code:\")\n",
-    "            annotated_output = gr.Code(\n",
-    "                language=\"python\",\n",
-    "                elem_id=\"annotated_code\",\n",
-    "                interactive=False\n",
-    "            )\n",
-    "\n",
-    "    with gr.Row():\n",
-    "        with gr.Column(scale=1):\n",
-    "            model_dropdown = gr.Dropdown(\n",
-    "                choices=[\"Gemini\", \"GPT-4\", \"Claude\"],\n",
-    "                value=\"Gemini\",\n",
-    "                label=\"Select model\"\n",
-    "            )\n",
-    "        with gr.Column(scale=1):\n",
-    "            annotate_btn = gr.Button(\"✨ Annotate code\", variant=\"primary\")\n",
-    "            run_btn = gr.Button(\"▶️ Run Python\", variant=\"secondary\")\n",
-    "\n",
-    "    with gr.Row():\n",
-    "        with gr.Column():\n",
-    "            gr.Markdown(\"### Python result:\")\n",
-    "            result_output = gr.Textbox(\n",
-    "                lines=5, \n",
-    "                label=\"Output\",\n",
-    "                interactive=False\n",
-    "            )\n",
-    "    \n",
-    "    annotate_btn.click(\n",
-    "        annotate,\n",
-    "        inputs=[code_input, model_dropdown],\n",
-    "        outputs=[annotated_output]\n",
-    "    )\n",
-    "    run_btn.click(execute_python, inputs=[annotated_output], outputs=[result_output])\n",
-    "\n",
-    "    \n",
-    "demo_v2.launch(inbrowser=True)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}