From 8100512956bf0ed61c659e0f612d0eb180068a9d Mon Sep 17 00:00:00 2001 From: Praveen M <32341624+Praveenm79@users.noreply.github.com> Date: Mon, 23 Jun 2025 20:17:14 +0530 Subject: [PATCH] Week 4 Contribution: Code conversion using Gemini and Codestral --- .../Week4_day3_Gemini_Codestral.ipynb | 643 ++++++++++++++++++ 1 file changed, 643 insertions(+) create mode 100644 week4/community-contributions/Week4_day3_Gemini_Codestral.ipynb diff --git a/week4/community-contributions/Week4_day3_Gemini_Codestral.ipynb b/week4/community-contributions/Week4_day3_Gemini_Codestral.ipynb new file mode 100644 index 0000000..8fa0417 --- /dev/null +++ b/week4/community-contributions/Week4_day3_Gemini_Codestral.ipynb @@ -0,0 +1,643 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac833f26-d429-4fd2-8f83-92174f1c951a", + "metadata": {}, + "source": [ + "# Code conversion using Gemini and Codestral in Windows 11" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c230178c-6f31-4c5a-a888-16b7037ffbf9", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import io\n", + "import sys\n", + "import gradio as gr\n", + "import subprocess\n", + "from google import genai\n", + "from google.genai import types\n", + "from mistralai import Mistral\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display, update_display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d824484-eaaa-456a-b7dc-7e3277fec34a", + "metadata": {}, + "outputs": [], + "source": [ + "# Load Gemini and Mistral API Keys\n", + "\n", + "load_dotenv(override=True)\n", + "gemini_api_key = os.getenv(\"GOOGLE_API_KEY\")\n", + "mistral_api_key = os.getenv(\"MISTRAL_API_KEY\")\n", + "\n", + "if not mistral_api_key or not gemini_api_key:\n", + " print(\"API Key not found!\")\n", + "else:\n", + " print(\"API Key loaded in memory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86f3633e-81f9-4c13-b7b5-793ddc4f886f", + "metadata": {}, + "outputs": [], + "source": [ + "# Models to be used\n", + "\n", + "MODEL_GEMINI = 'gemini-2.5-flash'\n", + "MODEL_CODESTRAL = 'codestral-latest'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f3a6d53-70f9-46b8-a490-a50f3a1adf9e", + "metadata": {}, + "outputs": [], + "source": [ + "# Load Gemini client\n", + "try:\n", + " gemini_client = genai.Client(api_key=gemini_api_key)\n", + " print(\"Google GenAI Client initialized successfully!\")\n", + "\n", + " codestral_client = Mistral(api_key=mistral_api_key)\n", + " print(\"Mistral Client initialized successfully!\")\n", + "except Exception as e:\n", + " print(f\"Error initializing Client: {e}\")\n", + " exit() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f816fbe8-e094-499f-98a5-588ebecf8c72", + "metadata": {}, + "outputs": [], + "source": [ + "# Gemini System prompt\n", + "\n", + "system_message = \"You are an assistant that reimplements Python code in high-performance C++ optimized for a Windows PC. \"\n", + "system_message += \"Use Windows-specific optimizations where applicable (e.g., multithreading with std::thread, SIMD, or WinAPI if necessary). \"\n", + "system_message += \"Respond only with the equivalent C++ code; include comments only where absolutely necessary. \"\n", + "system_message += \"Avoid any explanation or text outside the code. \"\n", + "system_message += \"The C++ output must produce identical functionality with the fastest possible execution time on Windows.\"\n", + "\n", + "generate_content_config = types.GenerateContentConfig(system_instruction=system_message)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01227835-15d2-40bd-a9dd-2ef35ad371dc", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(python):\n", + " user_prompt = (\n", + " \"Convert the following Python code into high-performance C++ optimized for Windows. \"\n", + " \"Use standard C++20 or newer with Windows-compatible libraries and best practices. \"\n", + " \"Ensure the implementation runs as fast as possible and produces identical output. \"\n", + " \"Use appropriate numeric types to avoid overflow or precision loss. \"\n", + " \"Avoid unnecessary abstraction; prefer direct computation and memory-efficient structures. \"\n", + " \"Respond only with C++ code, include all required headers (like , , etc.), and limit comments to only what's essential.\\n\\n\"\n", + " )\n", + " user_prompt += python\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d9fc8e2-acf0-4122-a8a9-5aadadf982ab", + "metadata": {}, + "outputs": [], + "source": [ + "def user_message_gemini(python): \n", + " return types.Content(role=\"user\", parts=[types.Part.from_text(text=user_prompt_for(python))]) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334c8b84-6e37-40fc-97ac-40a1b3aa29fa", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(python):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(python)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4aca87ac-6330-4ed4-a36f-1726fd0ada1a", + "metadata": {}, + "outputs": [], + "source": [ + "def write_output(cpp):\n", + " code = cpp.replace(\"```cpp\", \"\").replace(\"```c++\", \"\").replace(\"```\", \"\").strip()\n", + " \n", + " if not \"#include\" in code:\n", + " raise ValueError(\"C++ code appears invalid: missing #include directives.\")\n", + "\n", + " with open(\"optimized.cpp\", \"w\", encoding=\"utf-8\", newline=\"\\n\") as f:\n", + " f.write(code) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcf42642-1a55-4556-8738-0c8c02effa9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate CPP code using Gemini\n", + "\n", + "def optimize_gemini(python):\n", + " stream = gemini_client.models.generate_content_stream(\n", + " model = MODEL_GEMINI,\n", + " config=generate_content_config,\n", + " contents=user_message_gemini(python)\n", + " )\n", + " cpp_code = \"\"\n", + " for chunk in stream:\n", + " chunk_text = chunk.text\n", + " cpp_code += chunk_text\n", + " print(chunk_text, end=\"\", flush=True) \n", + " write_output(cpp_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f06a301-4397-4d63-9226-657bb2ddb792", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate CPP code using Codestral\n", + "\n", + "def optimize_codestral(python):\n", + " stream = codestral_client.chat.stream(\n", + " model = MODEL_CODESTRAL,\n", + " messages = messages_for(python), \n", + " )\n", + " \n", + " cpp_code = \"\"\n", + " for chunk in stream:\n", + " chunk_text = chunk.data.choices[0].delta.content\n", + " cpp_code += chunk_text\n", + " print(chunk_text, end=\"\", flush=True) \n", + " write_output(cpp_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bd51601-7c1d-478d-b043-6f92739e5c4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Actual code to convert\n", + "\n", + "pi = \"\"\"\n", + "import time\n", + "\n", + "def calculate(iterations, param1, param2):\n", + " result = 1.0\n", + " for i in range(1, iterations+1):\n", + " j = i * param1 - param2\n", + " result -= (1/j)\n", + " j = i * param1 + param2\n", + " result += (1/j)\n", + " return result\n", + "\n", + "start_time = time.time()\n", + "result = calculate(100_000_000, 4, 1) * 4\n", + "end_time = time.time()\n", + "\n", + "print(f\"Result: {result:.12f}\")\n", + "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db9ea24e-d381-48ac-9196-853d2527dcca", + "metadata": {}, + "outputs": [], + "source": [ + "exec(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3e26708-8475-474d-8e96-e602c3d5ef9f", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gemini(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cc23ea7-6062-4354-92bc-730baa52a50b", + "metadata": {}, + "outputs": [], + "source": [ + "# CPP Compilation\n", + "\n", + "!g++ -O3 -std=c++20 -o optimized.exe optimized.cpp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b14704d-95fe-4ed2-861f-af591bf3090e", + "metadata": {}, + "outputs": [], + "source": [ + "!.\\optimized.exe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d756d1a-1d49-4cfb-bed7-8748d848b083", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_codestral(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e286dc8-9532-48b1-b748-a7950972e7df", + "metadata": {}, + "outputs": [], + "source": [ + "!g++ -O3 -std=c++20 -o optimized.exe optimized.cpp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61fe0044-7679-4245-9e59-50642f3d80c6", + "metadata": {}, + "outputs": [], + "source": [ + "!.\\optimized.exe" + ] + }, + { + "cell_type": "markdown", + "id": "f0c0392c-d2a7-4619-82a2-f7b9fa7c43f9", + "metadata": {}, + "source": [ + "## Hard Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ca53eb4-46cd-435b-a950-0e2a8f845535", + "metadata": {}, + "outputs": [], + "source": [ + "python_hard = \"\"\"# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + " \n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Parameters\n", + "n = 10000 # Number of random numbers\n", + "initial_seed = 42 # Initial seed for the LCG\n", + "min_val = -10 # Minimum value of random numbers\n", + "max_val = 10 # Maximum value of random numbers\n", + "\n", + "# Timing the function\n", + "import time\n", + "start_time = time.time()\n", + "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "end_time = time.time()\n", + "\n", + "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "697cc9fe-efdb-40b7-8e43-871bd2df940e", + "metadata": {}, + "outputs": [], + "source": [ + "exec(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17ed6329-6c5f-45af-91ff-06d73830dd0d", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_gemini(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b57f0e7-46c9-4235-86eb-389faf37b7bb", + "metadata": {}, + "outputs": [], + "source": [ + "# CPP Compilation\n", + "\n", + "!g++ -O3 -std=c++20 -o optimized.exe optimized.cpp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8ce8d01-fda8-400d-b3d4-6f1ad3008d28", + "metadata": {}, + "outputs": [], + "source": [ + "!.\\optimized.exe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adbcdac7-8656-41c9-8707-d8a71998d393", + "metadata": {}, + "outputs": [], + "source": [ + "optimize_codestral(python_hard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f9fc9b1-29cf-4510-83f8-1484d26e871e", + "metadata": {}, + "outputs": [], + "source": [ + "# CPP Compilation\n", + "\n", + "!g++ -O3 -std=c++20 -o optimized.exe optimized.cpp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52170458-c4a1-4920-8d83-8c5ba7250759", + "metadata": {}, + "outputs": [], + "source": [ + "!.\\optimized.exe" + ] + }, + { + "cell_type": "markdown", + "id": "da6aee85-2792-487b-bef3-fec5dcf12623", + "metadata": {}, + "source": [ + "## Accommodating the entire code in Gradio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2a90c4f-c289-4658-a6ce-51b80e20f91f", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(python):\n", + " stream = gemini_client.models.generate_content_stream(\n", + " model = MODEL_GEMINI,\n", + " config=generate_content_config,\n", + " contents=user_message_gemini(python)\n", + " )\n", + "\n", + " cpp_code = \"\"\n", + " for chunk in stream:\n", + " chunk_text = chunk.text or \"\"\n", + " cpp_code += chunk_text\n", + " yield cpp_code.replace('```cpp\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e872171-96d8-4041-8cb0-0c632c5e957f", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_codestral(python):\n", + " stream = codestral_client.chat.stream(\n", + " model = MODEL_CODESTRAL,\n", + " messages = messages_for(python), \n", + " )\n", + "\n", + " cpp_code = \"\"\n", + " for chunk in stream:\n", + " chunk_text = chunk.data.choices[0].delta.content or \"\"\n", + " cpp_code += chunk_text\n", + " yield cpp_code.replace('```cpp\\n','').replace('```','') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3340b36b-1241-4b0f-9e69-d4e5cc215a27", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize(python, model):\n", + " if model.lower() == 'gemini':\n", + " result = stream_gemini(python)\n", + " elif model.lower() == 'codestral':\n", + " result = stream_codestral(python)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " \n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "markdown", + "id": "277ddd6c-e71e-4512-965a-57fca341487a", + "metadata": {}, + "source": [ + "### Gradio Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "222a9eae-236e-4ba3-8f23-3d9b879ec2d0", + "metadata": {}, + "outputs": [], + "source": [ + "custom_css = \"\"\"\n", + ".scrollable-box textarea {\n", + " overflow: auto !important;\n", + " height: 400px;\n", + "}\n", + "\n", + ".python {background-color: #306998;}\n", + ".cpp {background-color: #050;}\n", + "\n", + "\"\"\"\n", + "\n", + "theme = gr.themes.Soft()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4bd6ed1-ff8c-42d4-8da6-24b9cfd134db", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_python(code):\n", + " try:\n", + " result = subprocess.run(\n", + " [\"python\", \"-c\", code],\n", + " capture_output=True,\n", + " text=True,\n", + " timeout=60\n", + " )\n", + " if result.returncode == 0:\n", + " return result.stdout or \"[No output]\"\n", + " else:\n", + " return f\"[Error]\\n{result.stderr}\"\n", + " except subprocess.TimeoutExpired:\n", + " return \"[Error] Execution timed out.\"\n", + " except Exception as e:\n", + " return f\"[Exception] {str(e)}\" " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1507c973-8699-48b2-80cd-45900c97a867", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_cpp(code):\n", + " write_output(code)\n", + " \n", + " try:\n", + " compile_cmd = [\"g++\", \"-O3\", \"-std=c++20\", \"-o\", \"optimized.exe\", \"optimized.cpp\"]\n", + " compile_result = subprocess.run(compile_cmd, capture_output=True, text=True, check=True)\n", + " \n", + " run_cmd = [\"optimized.exe\"]\n", + " run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True, timeout=60)\n", + " \n", + " return run_result.stdout or \"[No output]\"\n", + " \n", + " except subprocess.CalledProcessError as e:\n", + " return f\"[Compile/Runtime Error]\\n{e.stderr}\"\n", + " except subprocess.TimeoutExpired:\n", + " return \"[Error] Execution timed out.\"\n", + " except Exception as e:\n", + " return f\"[Exception] {str(e)}\" " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "374f00f3-8fcf-4ae9-bf54-c5a44dd74844", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks(css=custom_css, theme=theme) as ui:\n", + " gr.Markdown(\"## Convert code from Python to C++\")\n", + " with gr.Row():\n", + " python = gr.Textbox(label=\"Python code:\", lines=10, value=python_hard, elem_classes=[\"scrollable-box\"])\n", + " cpp = gr.Textbox(label=\"C++ code:\", lines=10, elem_classes=[\"scrollable-box\"])\n", + " with gr.Row():\n", + " model = gr.Dropdown([\"Gemini\", \"Codestral\"], label=\"Select model\", value=\"Gemini\")\n", + " convert = gr.Button(\"Convert code\")\n", + " with gr.Row():\n", + " python_run = gr.Button(\"Run Python\")\n", + " cpp_run = gr.Button(\"Run C++\")\n", + " with gr.Row():\n", + " python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n", + " cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n", + "\n", + " convert.click(optimize, inputs=[python,model], outputs=[cpp])\n", + " python_run.click(execute_python,inputs=[python], outputs=[python_out])\n", + " cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n", + "\n", + "ui.launch(inbrowser=True) " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}