From 2a0eff02c27d7c6477d9b078865cb5602165f9b0 Mon Sep 17 00:00:00 2001 From: Krabulek Date: Fri, 19 Sep 2025 12:35:00 +0200 Subject: [PATCH] week 4 excercises: added Gemini and Python Code Documentation Assistant --- .../Python_code_documentation_assistant.ipynb | 700 ++++++++++++++++++ .../day3-with-gemini.ipynb | 690 +++++++++++++++++ 2 files changed, 1390 insertions(+) create mode 100644 week4/community-contributions/Python_code_documentation_assistant.ipynb create mode 100644 week4/community-contributions/day3-with-gemini.ipynb diff --git a/week4/community-contributions/Python_code_documentation_assistant.ipynb b/week4/community-contributions/Python_code_documentation_assistant.ipynb new file mode 100644 index 0000000..2bb5c6f --- /dev/null +++ b/week4/community-contributions/Python_code_documentation_assistant.ipynb @@ -0,0 +1,700 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9", + "metadata": {}, + "source": [ + "# Python Code Documentation Assistant\n", + "\n", + "The requirement: use a Frontier model to add docstrings and comments to your Python code\n" + ] + }, + { + "cell_type": "markdown", + "id": "d4634170-c444-4326-9e68-5f87c63fa0e0", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U -q \"google-genai\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import io\n", + "import sys\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from google import genai\n", + "from google.genai import types\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import gradio as gr\n", + "import subprocess" + ] + }, + { + "cell_type": "markdown", + "id": "f91e8b32-4c98-4210-a1e1-bfe0b1fddab7", + "metadata": {}, + "source": [ + "## Environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f672e1c-87e9-4865-b760-370fa605e614", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if anthropic_api_key:\n", + " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", + "else:\n", + " print(\"Anthropic API Key not set\")\n", + "\n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:4]}\")\n", + "else:\n", + " print(\"Google API Key not set\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "gemini = genai.Client()\n", + "\n", + "OPENAI_MODEL = \"o4-mini\"\n", + "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n", + "GEMINI_MODEL = \"gemini-2.5-flash\"" + ] + }, + { + "cell_type": "markdown", + "id": "88a18c58-40d5-4592-8dd3-d7c7b0d951aa", + "metadata": {}, + "source": [ + "## Prompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6896636f-923e-4a2c-9d6c-fac07828a201", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"\"\"\n", + "You are an assistant that documents Python code. \n", + "Your task: \n", + "- Add concise, clear, and informative docstrings to functions, classes, and modules. \n", + "- Add inline comments only where they improve readability or clarify intent. \n", + "- Do not modify the code logic or structure. \n", + "- Respond with Python code only. \n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(python):\n", + " user_prompt = \"Add docstrings and comments to the following Python code:\\n\"\n", + " user_prompt += python\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6190659-f54c-4951-bef4-4960f8e51cc4", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(python):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(python)}\n", + " ]" + ] + }, + { + "cell_type": "markdown", + "id": "624e5066-bcf6-490d-a790-608d2bb34184", + "metadata": {}, + "source": [ + "## Helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b", + "metadata": {}, + "outputs": [], + "source": [ + "def write_output(python, filename_suffix):\n", + " filename = f\"annotated_{filename_suffix}.py\"\n", + " code = python.replace(\"```python\",\"\").replace(\"```\",\"\")\n", + " with open(filename, \"w\") as f:\n", + " f.write(code)\n", + " print(f\"\\nWritten code to {filename}\")\n", + " return filename" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9", + "metadata": {}, + "outputs": [], + "source": [ + "def annotate_with_gpt(python, task_name): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " print(fragment, end='', flush=True)\n", + " return write_output(reply, f\"{task_name}_gpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d", + "metadata": {}, + "outputs": [], + "source": [ + "def annotate_with_claude(python, task_name):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " print(text, end=\"\", flush=True)\n", + " return write_output(reply, f\"{task_name}_claude\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf", + "metadata": {}, + "outputs": [], + "source": [ + "def annotate_with_gemini(python, task_name):\n", + " reply = gemini.models.generate_content(\n", + " model=GEMINI_MODEL,\n", + " contents=user_prompt_for(python),\n", + " config=types.GenerateContentConfig(\n", + " system_instruction=system_message,\n", + " )\n", + " )\n", + "\n", + " print(reply.text)\n", + " return write_output(reply.text, f\"{task_name}_gemini\")" + ] + }, + { + "cell_type": "markdown", + "id": "028dcfdd-2d52-4e11-a79e-2214a97cb26d", + "metadata": {}, + "source": [ + "# Run the Annotator" + ] + }, + { + "cell_type": "markdown", + "id": "7462d9f9-6215-4fb0-9471-1d0141d33205", + "metadata": {}, + "source": [ + "## Pi example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1cbb778-fa57-43de-b04b-ed523f396c38", + "metadata": {}, + "outputs": [], + "source": [ + "pi = \"\"\"\n", + "import time\n", + "\n", + "def calculate(iterations, param1, param2):\n", + " result = 1.0\n", + " for i in range(1, iterations+1):\n", + " j = i * param1 - param2\n", + " result -= (1/j)\n", + " j = i * param1 + param2\n", + " result += (1/j)\n", + " return result\n", + "\n", + "start_time = time.time()\n", + "result = calculate(100_000_000, 4, 1) * 4\n", + "end_time = time.time()\n", + "\n", + "print(f\"Result: {result:.12f}\")\n", + "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "105db6f9-343c-491d-8e44-3a5328b81719", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_pi = annotate_with_gpt(pi, \"pi))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "415819d0-fc95-4f78-a6ae-5c7d6781c6a7", + "metadata": {}, + "outputs": [], + "source": [ + "# check if the script works\n", + "\n", + "exec(open(gpt_pi).read())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "claude_pi = annotate_with_claude(pi, \"pi\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52f5b710-0dea-4884-8ed7-a94059d88281", + "metadata": {}, + "outputs": [], + "source": [ + "exec(open(claude_pi).read())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01f331f2-caac-48f6-9a03-8a228ee521bc", + "metadata": {}, + "outputs": [], + "source": [ + "gemini_pi = annotate_with_gemini(pi, \"pi\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23529942-53fa-46ad-a5db-1f3096dd6607", + "metadata": {}, + "outputs": [], + "source": [ + "exec(open(gemini_pi).read())" + ] + }, + { + "cell_type": "markdown", + "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f", + "metadata": {}, + "source": [ + "## Hard example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0", + "metadata": {}, + "outputs": [], + "source": [ + "python_hard = \"\"\"# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + " \n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Parameters\n", + "n = 10000 # Number of random numbers\n", + "initial_seed = 42 # Initial seed for the LCG\n", + "min_val = -10 # Minimum value of random numbers\n", + "max_val = 10 # Maximum value of random numbers\n", + "\n", + "# Timing the function\n", + "import time\n", + "start_time = time.time()\n", + "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "end_time = time.time()\n", + "\n", + "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899", + "metadata": {}, + "outputs": [], + "source": [ + "exec(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_hard = annotate_with_gpt(python_hard, \"hard\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80a15259-3d51-47b8-953c-6271fbd4b6fb", + "metadata": {}, + "outputs": [], + "source": [ + "exec(open(gpt_hard).read())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c", + "metadata": {}, + "outputs": [], + "source": [ + "gemini_hard = annotate_with_gemini(python_hard, \"hard\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad6eecc8-0517-43d8-bd21-5bbdedae7a10", + "metadata": {}, + "outputs": [], + "source": [ + "exec(open(gemini_hard).read())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "claude_hard = annotate_with_claude(python_hard, \"hard\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47af1516-455f-4d1c-8a1c-2da5a38c0ba5", + "metadata": {}, + "outputs": [], + "source": [ + "exec(open(claude_hard).read())" + ] + }, + { + "cell_type": "markdown", + "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72", + "metadata": {}, + "source": [ + "# Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(python): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply.replace('```python\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8669f56b-8314-4582-a167-78842caea131", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(python):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply.replace('```python\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(python):\n", + " stream = gemini.models.generate_content_stream(\n", + " model=GEMINI_MODEL,\n", + " contents=user_prompt_for(python),\n", + " config=types.GenerateContentConfig(\n", + " system_instruction=system_message,\n", + " ),\n", + " )\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " reply += chunk.text\n", + " yield reply.replace('```python\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d", + "metadata": {}, + "outputs": [], + "source": [ + "def annotate(python, model):\n", + " if model == \"GPT\":\n", + " result = stream_gpt(python)\n", + " elif model == \"Claude\":\n", + " result = stream_claude(python)\n", + " elif model == \"Gemini\":\n", + " result = stream_gemini(python)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19bf2bff-a822-4009-a539-f003b1651383", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_python(code):\n", + " try:\n", + " output = io.StringIO()\n", + " sys.stdout = output\n", + " exec(code)\n", + " finally:\n", + " sys.stdout = sys.__stdout__\n", + " return output.getvalue()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442", + "metadata": {}, + "outputs": [], + "source": [ + "css = \"\"\"\n", + ".python {background-color: #306998;}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76167ea9-d0a1-4bc6-8d73-633d3b8c8df6", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "\n", + "# Parameters\n", + "LINES = 25\n", + "LINE_HEIGHT = 20 # px, typical CodeMirror line height\n", + "PADDING = 10 # px, top + bottom padding\n", + "\n", + "CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING\n", + "\n", + "\n", + "with gr.Blocks(\n", + " theme=gr.themes.Soft(),\n", + " css=f\"\"\"\n", + "#code_input .cm-editor, #annotated_code .cm-editor {{\n", + " height: {CODE_HEIGHT}px !important;\n", + " overflow-y: auto !important;\n", + "}}\n", + "\"\"\"\n", + ") as demo_v2:\n", + " gr.Markdown(\"## 🐍 Annotate Python Code with Docstrings and Comments\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " gr.Markdown(\"### Python code:\")\n", + " code_input = gr.Code(\n", + " language=\"python\", \n", + " value=python_hard,\n", + " lines=25,\n", + " elem_id=\"code_input\"\n", + " )\n", + " \n", + " with gr.Column(scale=1):\n", + " gr.Markdown(\"### Annotated code:\")\n", + " annotated_output = gr.Code(\n", + " language=\"python\",\n", + " lines=25,\n", + " elem_id=\"annotated_code\"\n", + " )\n", + "\n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " model_dropdown = gr.Dropdown(\n", + " choices=[\"Gemini\", \"GPT-4\", \"Claude\"],\n", + " value=\"Gemini\",\n", + " label=\"Select model\"\n", + " )\n", + " with gr.Column(scale=1):\n", + " annotate_btn = gr.Button(\"✨ Annotate code\", variant=\"primary\")\n", + " run_btn = gr.Button(\"▶️ Run Python\", variant=\"secondary\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column():\n", + " gr.Markdown(\"### Python result:\")\n", + " result_output = gr.Textbox(\n", + " lines=5, \n", + " label=\"Output\",\n", + " interactive=False\n", + " )\n", + " \n", + " annotate_btn.click(\n", + " annotate,\n", + " inputs=[code_input, model_dropdown],\n", + " outputs=[annotated_output]\n", + " )\n", + " run_btn.click(execute_python, inputs=[code_input], outputs=[result_output])\n", + "\n", + " \n", + "demo_v2.launch(inbrowser=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week4/community-contributions/day3-with-gemini.ipynb b/week4/community-contributions/day3-with-gemini.ipynb new file mode 100644 index 0000000..4e2b89d --- /dev/null +++ b/week4/community-contributions/day3-with-gemini.ipynb @@ -0,0 +1,690 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9", + "metadata": {}, + "source": [ + "# Code Generator\n", + "\n", + "The requirement: use a Frontier model to generate high performance C++ code from Python code\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f72dfaf-9f20-4d81-b082-018eda152c9f", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U -q \"google-genai\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import io\n", + "import sys\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from google import genai\n", + "from google.genai import types\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import gradio as gr\n", + "import subprocess" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f672e1c-87e9-4865-b760-370fa605e614", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if anthropic_api_key:\n", + " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", + "else:\n", + " print(\"Anthropic API Key not set\")\n", + "\n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", + "else:\n", + " print(\"Google API Key not set\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize\n", + "\n", + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "gemini = genai.Client()\n", + "\n", + "OPENAI_MODEL = \"o4-mini\"\n", + "CLAUDE_MODEL = \"claude-3-7-sonnet-latest\"\n", + "GEMINI_MODEL = \"gemini-2.5-flash\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6896636f-923e-4a2c-9d6c-fac07828a201", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are an assistant that reimplements Python code in high performance C++ for an M1 Mac. \"\n", + "system_message += \"Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n", + "system_message += \"The C++ response needs to produce an identical output in the fastest possible time.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(python):\n", + " user_prompt = \"Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. \"\n", + " user_prompt += \"Respond only with C++ code; do not explain your work other than a few comments. \"\n", + " user_prompt += \"Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\\n\\n\"\n", + " user_prompt += python\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6190659-f54c-4951-bef4-4960f8e51cc4", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(python):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(python)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b", + "metadata": {}, + "outputs": [], + "source": [ + "# write to a file called optimized.cpp\n", + "\n", + "def write_output(cpp):\n", + " code = cpp.replace(\"```cpp\",\"\").replace(\"```\",\"\")\n", + " with open(\"optimized.cpp\", \"w\") as f:\n", + " f.write(code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize_gpt(python): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " print(fragment, end='', flush=True)\n", + " write_output(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize_claude(python):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " print(text, end=\"\", flush=True)\n", + " write_output(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8a35102-1c95-469b-8855-e85f4c9bdbdf", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize_gemini(python):\n", + " reply = gemini.models.generate_content(\n", + " model=GEMINI_MODEL,\n", + " contents=user_prompt_for(python),\n", + " config=types.GenerateContentConfig(\n", + " system_instruction=system_message,\n", + " )\n", + " )\n", + "\n", + " print(reply.text)\n", + " write_output(reply.text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1cbb778-fa57-43de-b04b-ed523f396c38", + "metadata": {}, + "outputs": [], + "source": [ + "pi = \"\"\"\n", + "import time\n", + "\n", + "def calculate(iterations, param1, param2):\n", + " result = 1.0\n", + " for i in range(1, iterations+1):\n", + " j = i * param1 - param2\n", + " result -= (1/j)\n", + " j = i * param1 + param2\n", + " result += (1/j)\n", + " return result\n", + "\n", + "start_time = time.time()\n", + "result = calculate(100_000_000, 4, 1) * 4\n", + "end_time = time.time()\n", + "\n", + "print(f\"Result: {result:.12f}\")\n", + "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fe1cd4b-d2c5-4303-afed-2115a3fef200", + "metadata": {}, + "outputs": [], + "source": [ + "exec(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "105db6f9-343c-491d-8e44-3a5328b81719", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gpt(pi)" + ] + }, + { + "cell_type": "markdown", + "id": "bf8f8018-f64d-425c-a0e1-d7862aa9592d", + "metadata": {}, + "source": [ + "# Compiling C++ and executing\n", + "\n", + "This next cell contains the command to compile a C++ file on my M1 Mac. \n", + "It compiles the file `optimized.cpp` into an executable called `optimized` \n", + "Then it runs the program called `optimized`\n", + "\n", + "In the next lab (day4), a student has contributed a full solution that compiles to efficient code on Mac, PC and Linux!\n", + "\n", + "You can wait for this, or you can google (or ask ChatGPT!) for how to do this on your platform, then replace the lines below.\n", + "If you're not comfortable with this step, you can skip it for sure - I'll show you exactly how it performs on my Mac.\n", + "\n", + "\n", + "OR alternatively: student Sandeep K.G. points out that you can run Python and C++ code online to test it out that way. Thank you Sandeep! \n", + "> Not an exact comparison but you can still get the idea of performance difference.\n", + "> For example here: https://www.programiz.com/cpp-programming/online-compiler/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4194e40c-04ab-4940-9d64-b4ad37c5bb40", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile C++ and run the executable\n", + "\n", + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_claude(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5a766f9-3d23-4bb4-a1d4-88ec44b61ddf", + "metadata": {}, + "outputs": [], + "source": [ + "# Repeat for Claude - again, use the right approach for your platform\n", + "\n", + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01f331f2-caac-48f6-9a03-8a228ee521bc", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gemini(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ef707a4-930e-4b8b-9443-e7e4fd309c2a", + "metadata": {}, + "outputs": [], + "source": [ + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "markdown", + "id": "7d1eaeca-61be-4d0a-a525-dd09f52aaa0f", + "metadata": {}, + "source": [ + "# Python Hard Version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0", + "metadata": {}, + "outputs": [], + "source": [ + "python_hard = \"\"\"# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + " \n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Parameters\n", + "n = 10000 # Number of random numbers\n", + "initial_seed = 42 # Initial seed for the LCG\n", + "min_val = -10 # Minimum value of random numbers\n", + "max_val = 10 # Maximum value of random numbers\n", + "\n", + "# Timing the function\n", + "import time\n", + "start_time = time.time()\n", + "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "end_time = time.time()\n", + "\n", + "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899", + "metadata": {}, + "outputs": [], + "source": [ + "exec(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gpt(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0b3d073-88a2-40b2-831c-6f0c345c256f", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace this with the right C++ compile + execute command for your platform\n", + "\n", + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gemini(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c181036-8193-4fdd-aef3-fc513b218d43", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace this with the right C++ compile + execute command for your platform\n", + "\n", + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ee75e72-9ecb-4edd-a74a-4d3a83c1eb79", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_claude(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a4ab43c-7df2-4770-bd05-6bbc198a8c45", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace this with the right C++ compile + execute command for your platform\n", + "\n", + "!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp\n", + "!./optimized" + ] + }, + { + "cell_type": "markdown", + "id": "ff02ce09-0544-49a5-944d-a57b25bf9b72", + "metadata": {}, + "source": [ + "# Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(python): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply.replace('```cpp\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8669f56b-8314-4582-a167-78842caea131", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(python):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply.replace('```cpp\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d48d44df-c082-4ed1-b3ea-fc2a880591c2", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(python):\n", + " stream = gemini.models.generate_content_stream(\n", + " model=GEMINI_MODEL,\n", + " contents=user_prompt_for(python),\n", + " config=types.GenerateContentConfig(\n", + " system_instruction=system_message,\n", + " ),\n", + " )\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " reply += chunk.text\n", + " yield reply.replace('```cpp\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize(python, model):\n", + " if model==\"GPT\":\n", + " result = stream_gpt(python)\n", + " elif model==\"Claude\":\n", + " result = stream_claude(python)\n", + " elif model==\"Gemini\":\n", + " result = stream_gemini(python)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1ddb38e-6b0a-4c37-baa4-ace0b7de887a", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " with gr.Row():\n", + " python = gr.Textbox(label=\"Python code:\", lines=10, value=python_hard)\n", + " cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n", + " with gr.Row():\n", + " model = gr.Dropdown([\"GPT\", \"Claude\", \"Gemini\"], label=\"Select model\", value=\"GPT\")\n", + " convert = gr.Button(\"Convert code\")\n", + "\n", + " convert.click(optimize, inputs=[python, model], outputs=[cpp])\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19bf2bff-a822-4009-a539-f003b1651383", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_python(code):\n", + " try:\n", + " output = io.StringIO()\n", + " sys.stdout = output\n", + " exec(code)\n", + " finally:\n", + " sys.stdout = sys.__stdout__\n", + " return output.getvalue()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77f3ab5d-fcfb-4d3f-8728-9cacbf833ea6", + "metadata": {}, + "outputs": [], + "source": [ + "# M1 Mac version to compile and execute optimized C++ code:\n", + "\n", + "def execute_cpp(code):\n", + " write_output(code)\n", + " try:\n", + " compile_cmd = [\"clang++\", \"-Ofast\", \"-std=c++17\", \"-march=armv8.5-a\", \"-mtune=apple-m1\", \"-mcpu=apple-m1\", \"-o\", \"optimized\", \"optimized.cpp\"]\n", + " compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)\n", + " run_cmd = [\"./optimized\"]\n", + " run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n", + " return run_result.stdout\n", + " except subprocess.CalledProcessError as e:\n", + " return f\"An error occurred:\\n{e.stderr}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442", + "metadata": {}, + "outputs": [], + "source": [ + "css = \"\"\"\n", + ".python {background-color: #306998;}\n", + ".cpp {background-color: #050;}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1303932-160c-424b-97a8-d28c816721b2", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks(css=css) as ui:\n", + " gr.Markdown(\"## Convert code from Python to C++\")\n", + " with gr.Row():\n", + " python = gr.Textbox(label=\"Python code:\", value=python_hard, lines=20)\n", + " cpp = gr.Textbox(label=\"C++ code:\", lines=20)\n", + " with gr.Row():\n", + " model = gr.Dropdown([\"GPT\", \"Claude\", \"Gemini\"], label=\"Select model\", value=\"GPT\")\n", + " convert = gr.Button(\"Convert code\")\n", + " with gr.Row():\n", + " python_run = gr.Button(\"Run Python\")\n", + " cpp_run = gr.Button(\"Run C++\")\n", + " with gr.Row():\n", + " python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n", + " cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n", + "\n", + " convert.click(optimize, inputs=[python, model], outputs=[cpp])\n", + " python_run.click(execute_python, inputs=[python], outputs=[python_out])\n", + " cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea42883b-fdba-46ed-97be-f42e3cb41f11", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}