diff --git a/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb b/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb
new file mode 100644
index 0000000..65b480a
--- /dev/null
+++ b/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb
@@ -0,0 +1,1616 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
+ "metadata": {},
+ "source": [
+ "# Python C extension generator\n",
+ "\n",
+ "Use a Frontier model to generate a high performance Python C extension code from Python code.\n",
+ "\n",
+ "Python C extension modules allows to integrate C coded and compiled modules into Python applications.\n",
+ "\n",
+ "* [Python C Extensions](https://docs.python.org/3.13/extending/index.html)\n",
+ "* [Python's C API](https://docs.python.org/3.13/c-api/index.html)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d90e04a2-5b8a-4fd5-9db8-27c02f033313",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Important Note\n",
+ " \n",
+ " In this lab, I use GPT-4o or GPT-5, which are slightly higher priced models.\n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Imports.\n",
+ "\n",
+ "import io\n",
+ "import os\n",
+ "import subprocess\n",
+ "import sys\n",
+ "from time import perf_counter\n",
+ "from timeit import timeit\n",
+ "\n",
+ "import gradio as gr\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "from pydantic import BaseModel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "4f672e1c-87e9-4865-b760-370fa605e614",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load environment variables from '.env' file.\n",
+ "\n",
+ "load_dotenv(override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize client and set the default LLM model to use.\n",
+ "\n",
+ "# OPENAI_MODEL = \"gpt-4o\"\n",
+ "OPENAI_MODEL = \"gpt-5\"\n",
+ "\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "c6f37bf0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define Pydantic model class for GPT response parsing.\n",
+ "\n",
+ "class Extension_codes(BaseModel):\n",
+ " \"\"\"Pydantic model of a response containing the generated C code, the 'setup.py' code and an usage example.\"\"\"\n",
+ " c_code: str\n",
+ " setup: str\n",
+ " usage: str"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "cb6ce77a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a function to print the optimization codes.\n",
+ "\n",
+ "def print_optimization(optimization):\n",
+ " \"\"\"Print the optimization codes.\"\"\"\n",
+ " print(f\"C CODE:\\n{optimization.c_code}\")\n",
+ " print(\"---------------------------\")\n",
+ " print(f\"setup.py:\\n{optimization.setup}\")\n",
+ " print(\"---------------------------\")\n",
+ " print(f\"USAGE:\\n{optimization.usage}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a function to write outputs to a file with a given filename.\n",
+ "\n",
+ "def write_file(data, filename):\n",
+ " \"\"\"Write data to a file with the specified filename.\"\"\"\n",
+ " with open(filename, \"w\") as file:\n",
+ " file.write(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f13c9c97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a function to write the optimization codes to files.\n",
+ "\n",
+ "def write_optimization(optimization, module_name):\n",
+ " \"\"\"Write the optimization codes to files.\"\"\"\n",
+ " write_file(optimization.c_code, f\"{module_name}.c\")\n",
+ " write_file(optimization.setup, \"setup.py\")\n",
+ " write_file(optimization.usage, \"usage_example.py\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define system message for the LLM with instructions for generating the C extension code.\n",
+ "\n",
+ "system_message = \"\"\"\n",
+ "You are an assistant that reimplements Python code in high performance C extensions for Python.\n",
+ "Your responses must always be a JSON with the following structure:\n",
+ "\n",
+ "{\n",
+ " \"c_code\": \"Optimized C extension for Python code\",\n",
+ " \"setup\": \"The 'setup.py' code to compile the C extension for Python\",\n",
+ " \"usage\": \"An example of usage of the C extension for Python code with time measurement and comparing with the original Python code\"\n",
+ "}\n",
+ "\n",
+ "Use comments sparingly and do not provide any explanation other than occasional comments.\n",
+ "The C extension for Python needs to produce an identical output in the fastest possible time.\n",
+ "Make sure the C extension for Python code is correct and can be compiled with 'python setup.py build' and used in Python.\n",
+ "The usage example must include a time measurement and a comparison with the original Python code.\n",
+ "Do not include any additional text or explanation outside the JSON structure.\n",
+ "Make sure the JSON is correctly formatted.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define user prompt template and function to fill it.\n",
+ "\n",
+ "def user_prompt_for(python_code, module_name):\n",
+ " user_prompt = f\"\"\"\n",
+ " Reimplement this Python code as a C extension for Python with the fastest possible implementation that produces identical output in the least time.\n",
+ " Respond only with C extension for Python code, do not explain your work other than a few code comments.\n",
+ " The module name, used to import, must be \"{module_name}\", the generated C file will be named \"{module_name}.c\".\n",
+ " Pay attention to number types to ensure no int overflows.\n",
+ " Remember to #include all necessary C packages such as iomanip or \n",
+ "\n",
+ " The target architecture is {sys.platform}, take that in mind while generating the C code, specially\n",
+ " when choosing types to use, and use the appropriate compiler flags.\n",
+ " Make sure to use the Python C API correctly and manage memory properly to avoid leaks or crashes.\n",
+ "\n",
+ " Here is the Python code to reimplement:\n",
+ "\n",
+ " {python_code}\"\"\"\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define function to create the messages for the LLM.\n",
+ "\n",
+ "def messages_for(python_code, module_name):\n",
+ " \"\"\"Create the messages for the LLM given the Python code and the desired module name.\"\"\"\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(python_code, module_name)}]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "3c57bc55",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SYSTEM: \n",
+ "You are an assistant that reimplements Python code in high performance C extensions for Python.\n",
+ "Your responses must always be a JSON with the following structure:\n",
+ "\n",
+ "{\n",
+ " \"c_code\": \"Optimized C extension for Python code\",\n",
+ " \"setup\": \"The 'setup.py' code to compile the C extension for Python\",\n",
+ " \"usage\": \"An example of usage of the C extension for Python code with time measurement and comparing with the original Python code\"\n",
+ "}\n",
+ "\n",
+ "Use comments sparingly and do not provide any explanation other than occasional comments.\n",
+ "The C extension for Python needs to produce an identical output in the fastest possible time.\n",
+ "Make sure the C extension for Python code is correct and can be compiled with 'python setup.py build' and used in Python.\n",
+ "The usage example must include a time measurement and a comparison with the original Python code.\n",
+ "Do not include any additional text or explanation outside the JSON structure.\n",
+ "Make sure the JSON is correctly formatted.\n",
+ "\n",
+ "--------------------------------\n",
+ "USER: \n",
+ " Reimplement this Python code as a C extension for Python with the fastest possible implementation that produces identical output in the least time.\n",
+ " Respond only with C extension for Python code, do not explain your work other than a few code comments.\n",
+ " The module name, used to import, must be \"say_hello\", the generated C file will be named \"say_hello.c\".\n",
+ " Pay attention to number types to ensure no int overflows.\n",
+ " Remember to #include all necessary C packages such as iomanip or \n",
+ "\n",
+ " The target architecture is win32, take that in mind while generating the C code, specially\n",
+ " when choosing types to use, and use the appropriate compiler flags.\n",
+ " Make sure to use the Python C API correctly and manage memory properly to avoid leaks or crashes.\n",
+ "\n",
+ " Here is the Python code to reimplement:\n",
+ "\n",
+ " print('Hello World')\n",
+ "--------------------------------\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the messages function and print the messages.\n",
+ "\n",
+ "for message in messages_for(\"print('Hello World')\", \"say_hello\"):\n",
+ " print(f\"{message['role'].upper()}: {message['content']}\")\n",
+ " print(\"--------------------------------\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define optimization function using OpenAI's GPT model.\n",
+ "\n",
+ "def optimize_gpt(python_code, module_name, model=OPENAI_MODEL):\n",
+ " \"\"\"Optimize the given Python code by generating a C extension for Python with the specified module name using the specified LLM model.\"\"\"\n",
+ " response = openai.chat.completions.parse(\n",
+ " model=model,\n",
+ " messages=messages_for(python_code, module_name),\n",
+ " response_format=Extension_codes).choices[0].message.parsed\n",
+ " return response"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c05b263a",
+ "metadata": {},
+ "source": [
+ "# Start with a math function that calculates ***π*** using the Leibniz formula.\n",
+ "\n",
+ "This formula implies the iterative approximation of *π* using an alternating series,\n",
+ "the more iterations the more the precision but with a cost of more computation.\n",
+ "* [Leibniz formula for π](https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80)\n",
+ "\n",
+ "This is a good candidate to get a noticeable improvement by coding and compiling it into a Python C extension. \n",
+ "\n",
+ "> NOTE:\n",
+ ">\n",
+ "> We are creating an importable module not an executable program so the code to be optimized must contain only declarations such as DEF or CLASS."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the Python function to be converted to a C extension and its module name.\n",
+ "\n",
+ "module_name = \"calculate_pi\"\n",
+ "\n",
+ "calculate_pi_code = f\"\"\"\n",
+ "def leibniz_pi(iterations):\n",
+ " result = 1.0\n",
+ " for i in range(1, iterations+1):\n",
+ " j = i * 4 - 1\n",
+ " result -= (1/j)\n",
+ " j = i * 4 + 1\n",
+ " result += (1/j)\n",
+ " return result * 4\n",
+ "\"\"\"\n",
+ "\n",
+ "# Define a function to test the performance of the calculus function.\n",
+ "\n",
+ "def test_pi_calculation(calculus_function ,iterations=100_000_000):\n",
+ " \"\"\"Test the performance of the given calculus function.\"\"\"\n",
+ " start_time = perf_counter()\n",
+ " result = calculus_function(iterations)\n",
+ " end_time = perf_counter()\n",
+ " print(f\"Result: {result:.12f}\")\n",
+ " print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
+ "\n",
+ "# Execute function declaration.\n",
+ "exec(calculate_pi_code)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "7fe1cd4b-d2c5-4303-afed-2115a3fef200",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Result: 3.141592658589\n",
+ "Execution Time: 20.556854 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Run original python code and time it.\n",
+ "\n",
+ "test_pi_calculation(leibniz_pi, 100_000_000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "4c0be0f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Timing...\n",
+ "Python average execution time: 21.158541 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Average timing the original Python code running it several times.\n",
+ "# (Increase 'iterations' for better timing)\n",
+ "\n",
+ "print(\"Timing...\")\n",
+ "iterations = 5\n",
+ "average = timeit(lambda: leibniz_pi(100_000_000), number=iterations) / iterations\n",
+ "print(f\"Python average execution time: {average:.6f} seconds\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "105db6f9-343c-491d-8e44-3a5328b81719",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Request code optimization using GPT.\n",
+ "\n",
+ "optimization = optimize_gpt(calculate_pi_code, module_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "378981c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C CODE:\n",
+ "#define PY_SSIZE_T_CLEAN\n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "\n",
+ "static PyObject* leibniz_pi(PyObject* self, PyObject* args) {\n",
+ " PyObject* iterations_obj;\n",
+ " if (!PyArg_ParseTuple(args, \"O\", &iterations_obj)) {\n",
+ " return NULL;\n",
+ " }\n",
+ "\n",
+ " long long n_signed;\n",
+ " int overflow = 0;\n",
+ " n_signed = PyLong_AsLongLongAndOverflow(iterations_obj, &overflow);\n",
+ " if (n_signed == -1 && PyErr_Occurred() && overflow == 0) {\n",
+ " return NULL;\n",
+ " }\n",
+ "\n",
+ " unsigned long long n = 0ULL;\n",
+ " if (overflow < 0) {\n",
+ " n = 0ULL;\n",
+ " } else if (overflow > 0) {\n",
+ " unsigned long long tmp = PyLong_AsUnsignedLongLong(iterations_obj);\n",
+ " if (tmp == (unsigned long long)-1 && PyErr_Occurred()) {\n",
+ " return NULL;\n",
+ " }\n",
+ " n = tmp;\n",
+ " } else {\n",
+ " if (n_signed <= 0) {\n",
+ " n = 0ULL;\n",
+ " } else {\n",
+ " n = (unsigned long long)n_signed;\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " double result = 1.0;\n",
+ " if (n == 0ULL) {\n",
+ " return PyFloat_FromDouble(result * 4.0);\n",
+ " }\n",
+ "\n",
+ " Py_BEGIN_ALLOW_THREADS\n",
+ " for (unsigned long long i = 1ULL; i <= n; ++i) {\n",
+ " double jd1;\n",
+ " if (i <= ULLONG_MAX / 4ULL) {\n",
+ " unsigned long long j1 = i * 4ULL - 1ULL;\n",
+ " jd1 = (double)j1;\n",
+ " } else {\n",
+ " jd1 = (double)i * 4.0 - 1.0;\n",
+ " }\n",
+ " result -= 1.0 / jd1;\n",
+ "\n",
+ " double jd2;\n",
+ " if (i <= (ULLONG_MAX - 1ULL) / 4ULL) {\n",
+ " unsigned long long j2 = i * 4ULL + 1ULL;\n",
+ " jd2 = (double)j2;\n",
+ " } else {\n",
+ " jd2 = (double)i * 4.0 + 1.0;\n",
+ " }\n",
+ " result += 1.0 / jd2;\n",
+ " }\n",
+ " Py_END_ALLOW_THREADS\n",
+ "\n",
+ " return PyFloat_FromDouble(result * 4.0);\n",
+ "}\n",
+ "\n",
+ "static PyMethodDef CalculatePiMethods[] = {\n",
+ " {\"leibniz_pi\", leibniz_pi, METH_VARARGS, \"Compute pi using the Leibniz series with the given number of iterations.\"},\n",
+ " {NULL, NULL, 0, NULL}\n",
+ "};\n",
+ "\n",
+ "static struct PyModuleDef calculate_pimodule = {\n",
+ " PyModuleDef_HEAD_INIT,\n",
+ " \"calculate_pi\",\n",
+ " \"High-performance Leibniz pi calculation.\",\n",
+ " -1,\n",
+ " CalculatePiMethods\n",
+ "};\n",
+ "\n",
+ "PyMODINIT_FUNC PyInit_calculate_pi(void) {\n",
+ " return PyModule_Create(&calculate_pimodule);\n",
+ "}\n",
+ "\n",
+ "---------------------------\n",
+ "setup.py:\n",
+ "from setuptools import setup, Extension\n",
+ "import sys\n",
+ "import os\n",
+ "\n",
+ "extra_compile_args = []\n",
+ "extra_link_args = []\n",
+ "\n",
+ "if os.name == 'nt':\n",
+ " extra_compile_args.extend(['/O2', '/fp:precise'])\n",
+ "else:\n",
+ " extra_compile_args.extend(['-O3', '-fno-strict-aliasing'])\n",
+ "\n",
+ "module = Extension(\n",
+ " 'calculate_pi',\n",
+ " sources=['calculate_pi.c'],\n",
+ " extra_compile_args=extra_compile_args,\n",
+ " extra_link_args=extra_link_args,\n",
+ ")\n",
+ "\n",
+ "setup(\n",
+ " name='calculate_pi',\n",
+ " version='1.0.0',\n",
+ " description='High-performance C extension for computing pi via the Leibniz series',\n",
+ " ext_modules=[module],\n",
+ ")\n",
+ "\n",
+ "---------------------------\n",
+ "USAGE:\n",
+ "# Build first: python setup.py build_ext --inplace\n",
+ "import time\n",
+ "import math\n",
+ "import calculate_pi\n",
+ "\n",
+ "# Original Python implementation\n",
+ "def py_leibniz_pi(iterations):\n",
+ " result = 1.0\n",
+ " for i in range(1, iterations + 1):\n",
+ " j = i * 4 - 1\n",
+ " result -= (1 / j)\n",
+ " j = i * 4 + 1\n",
+ " result += (1 / j)\n",
+ " return result * 4\n",
+ "\n",
+ "iters = 5_000_000\n",
+ "\n",
+ "# Warm-up\n",
+ "calculate_pi.leibniz_pi(10)\n",
+ "py_leibniz_pi(10)\n",
+ "\n",
+ "start = time.perf_counter()\n",
+ "res_c = calculate_pi.leibniz_pi(iters)\n",
+ "end = time.perf_counter()\n",
+ "ctime = end - start\n",
+ "\n",
+ "start = time.perf_counter()\n",
+ "res_py = py_leibniz_pi(iters)\n",
+ "end = time.perf_counter()\n",
+ "pytime = end - start\n",
+ "\n",
+ "print(f\"Iterations: {iters}\")\n",
+ "print(f\"C extension result: {res_c}\")\n",
+ "print(f\"Python result: {res_py}\")\n",
+ "print(f\"Absolute difference: {abs(res_c - res_py)}\")\n",
+ "print(f\"C extension time: {ctime:.6f} s\")\n",
+ "print(f\"Python time: {pytime:.6f} s\")\n",
+ "print(f\"Speedup: {pytime/ctime if ctime > 0 else float('inf'):.2f}x\")\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Print generated extension code.\n",
+ "\n",
+ "print_optimization(optimization)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "ae9a4a64",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Write the generated code to files.\n",
+ "# (Will overwrite existing files)\n",
+ "\n",
+ "write_optimization(optimization, module_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bf8f8018-f64d-425c-a0e1-d7862aa9592d",
+ "metadata": {},
+ "source": [
+ "# Compiling C Extension and executing\n",
+ "\n",
+ "The python setup command may fail inside Jupyter lab, if that's the case try it directly on the command line.\n",
+ "\n",
+ "There are two cells with WINDOWS ONLY, those are to manage the fact windows comes with two command lines,\n",
+ "the old CMD (MS-DOS style) and the new POWERSHELL (Unix style).\n",
+ "\n",
+ "It is controlled by the COMSPEC environment variable.\\\n",
+ "*(Using this variable is completely innocuous on UNIX systems, they will simply ignore it)*\n",
+ "\n",
+ "Most of command lines present here are Unix style but the building one requires CMD so\n",
+ "we switch to CMD before compiling to later restore the preset one."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "22a9130e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Clean previous builds.\n",
+ "# (Make sure to run this cell before running the compile cell a second time only)\n",
+ "# (May cast errors if no previous build exists)\n",
+ "\n",
+ "!rm -r build/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "816e7c9d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [WINDOWS ONLY]\n",
+ "# Set COMSPEC to cmd.exe to avoid issues with some C compilers on Windows.\n",
+ "# (Remember to restore original COMSPEC after compilation and testing)\n",
+ "preset_comspec = os.environ.get(\"COMSPEC\")\n",
+ "os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4194e40c-04ab-4940-9d64-b4ad37c5bb40",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Compile the C extension.\n",
+ "# (Will fail no C compiler is installed)\n",
+ "# (In case of errors, try directly on the command line)\n",
+ "\n",
+ "!python setup.py build_ext --inplace"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "8db12c4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [WINDOWS ONLY]\n",
+ "# Restore original COMSPEC.\n",
+ "\n",
+ "os.environ[\"COMSPEC\"] = preset_comspec"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "a8f5169f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Iterations: 5000000\n",
+ "C extension result: 3.1415927535897814\n",
+ "Python result: 3.1415927535897814\n",
+ "Absolute difference: 0.0\n",
+ "C extension time: 0.037515 s\n",
+ "Python time: 1.046732 s\n",
+ "Speedup: 27.90x\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Run the usage example to test the compiled C extension.\n",
+ "exec(optimization.usage)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "a1972472",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Testing original Python code:\n",
+ "Result: 3.141592658589\n",
+ "Execution Time: 20.350486 seconds\n",
+ "Testing C extension code:\n",
+ "Result: 3.141592658589\n",
+ "Execution Time: 0.759571 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Import newly created C extension and compare performance with original Python code.\n",
+ "\n",
+ "from calculate_pi import leibniz_pi as c_leibniz_pi\n",
+ "\n",
+ "print(\"Testing original Python code:\")\n",
+ "test_pi_calculation(leibniz_pi, 100_000_000)\n",
+ "print(\"Testing C extension code:\")\n",
+ "test_pi_calculation(c_leibniz_pi, 100_000_000)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77304493",
+ "metadata": {},
+ "source": [
+ "# Lets try with a more complex code\n",
+ "\n",
+ "Now we define three functions that together implements the calculation of the \"total maximum subarray sum\"\n",
+ "by finding the largest sum of a contiguous subarray within a given array of numbers.\n",
+ "\n",
+ "* [Maximum subarray problem](https://en.wikipedia.org/wiki/Maximum_subarray_problem)\n",
+ "\n",
+ "This algorithm requires much more computation and steps than the previous one, we may expect a heavy\n",
+ "improvement by coding and compiling it into a Python C extension. \n",
+ "\n",
+ "> NOTE:\n",
+ ">\n",
+ "> We are creating an importable module not an executable program so the code to be optimized must contain only declarations such as DEF or CLASS."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the Python function to be converted to a C extension and its module name.\n",
+ "\n",
+ "module_name = \"python_hard\"\n",
+ "\n",
+ "python_hard_code = \"\"\"\n",
+ "# Be careful to support large number sizes\n",
+ "\n",
+ "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
+ " value = seed\n",
+ " while True:\n",
+ " value = (a * value + c) % m\n",
+ " yield value\n",
+ "\n",
+ "def max_subarray_sum(n, seed, min_val, max_val):\n",
+ " lcg_gen = lcg(seed)\n",
+ " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
+ " max_sum = float('-inf')\n",
+ " for i in range(n):\n",
+ " current_sum = 0\n",
+ " for j in range(i, n):\n",
+ " current_sum += random_numbers[j]\n",
+ " if current_sum > max_sum:\n",
+ " max_sum = current_sum\n",
+ " return max_sum\n",
+ "\n",
+ "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
+ " total_sum = 0\n",
+ " lcg_gen = lcg(initial_seed)\n",
+ " for _ in range(20):\n",
+ " seed = next(lcg_gen)\n",
+ " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
+ " return total_sum\n",
+ "\"\"\"\n",
+ "\n",
+ "# Define a function to test the performance of the calculus function.\n",
+ "\n",
+ "def test_subarray_sum(calculus_function ,n=1000, initial_seed=42, min_val=-10, max_val=10):\n",
+ " \"\"\"Test the performance of the given calculus function.\"\"\"\n",
+ " start_time = perf_counter()\n",
+ " result = calculus_function(n, initial_seed, min_val, max_val)\n",
+ " end_time = perf_counter()\n",
+ " print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
+ " print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
+ "\n",
+ "\n",
+ "# Execute function declarations.\n",
+ "exec(python_hard_code)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total Maximum Subarray Sum (20 runs): 10980\n",
+ "Execution Time: 61.362418 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Run original python code and time it.\n",
+ "\n",
+ "test_subarray_sum(total_max_subarray_sum, 10000, 42, -10, 10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Request code optimization using GPT.\n",
+ "\n",
+ "optimization = optimize_gpt(python_hard_code, module_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "b888d5af",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C CODE:\n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "#include \n",
+ "\n",
+ "// LCG step with 32-bit wrap-around\n",
+ "static inline uint32_t lcg_next(uint32_t *state) {\n",
+ " *state = (uint32_t)(1664525u * (*state) + 1013904223u);\n",
+ " return *state;\n",
+ "}\n",
+ "\n",
+ "static inline int add_overflow_int64(int64_t a, int64_t b, int64_t *res) {\n",
+ " if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) return 1;\n",
+ " *res = a + b;\n",
+ " return 0;\n",
+ "}\n",
+ "\n",
+ "// Kadane for int64 array with overflow detection; returns PyLong or NULL (on overflow -> signal via *overflowed)\n",
+ "static PyObject* kadane_int64(const int64_t *arr, Py_ssize_t n, int *overflowed) {\n",
+ " if (n <= 0) {\n",
+ " return PyFloat_FromDouble(-INFINITY);\n",
+ " }\n",
+ " int64_t meh = arr[0];\n",
+ " int64_t msf = arr[0];\n",
+ " for (Py_ssize_t i = 1; i < n; ++i) {\n",
+ " int64_t x = arr[i];\n",
+ " if (meh > 0) {\n",
+ " int64_t tmp;\n",
+ " if (add_overflow_int64(meh, x, &tmp)) { *overflowed = 1; return NULL; }\n",
+ " meh = tmp;\n",
+ " } else {\n",
+ " meh = x;\n",
+ " }\n",
+ " if (meh > msf) msf = meh;\n",
+ " }\n",
+ " return PyLong_FromLongLong(msf);\n",
+ "}\n",
+ "\n",
+ "// Kadane for PyObject* integer array\n",
+ "static PyObject* kadane_big(PyObject **arr, Py_ssize_t n) {\n",
+ " if (n <= 0) {\n",
+ " return PyFloat_FromDouble(-INFINITY);\n",
+ " }\n",
+ " PyObject *meh = arr[0]; Py_INCREF(meh);\n",
+ " PyObject *msf = arr[0]; Py_INCREF(msf);\n",
+ " PyObject *zero = PyLong_FromLong(0);\n",
+ " if (!zero) { Py_DECREF(meh); Py_DECREF(msf); return NULL; }\n",
+ "\n",
+ " for (Py_ssize_t i = 1; i < n; ++i) {\n",
+ " int cmp = PyObject_RichCompareBool(meh, zero, Py_GT);\n",
+ " if (cmp < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n",
+ " if (cmp == 1) {\n",
+ " PyObject *t = PyNumber_Add(meh, arr[i]);\n",
+ " if (!t) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n",
+ " Py_DECREF(meh);\n",
+ " meh = t;\n",
+ " } else {\n",
+ " Py_DECREF(meh);\n",
+ " meh = arr[i]; Py_INCREF(meh);\n",
+ " }\n",
+ " int cmp2 = PyObject_RichCompareBool(meh, msf, Py_GT);\n",
+ " if (cmp2 < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n",
+ " if (cmp2 == 1) {\n",
+ " Py_DECREF(msf);\n",
+ " msf = meh; Py_INCREF(msf);\n",
+ " }\n",
+ " }\n",
+ " Py_DECREF(meh);\n",
+ " Py_DECREF(zero);\n",
+ " return msf; // new reference\n",
+ "}\n",
+ "\n",
+ "// Generate int64 array fast path; returns 0 on success\n",
+ "static int gen_array_int64(Py_ssize_t n, uint32_t seed, int64_t min_v, int64_t max_v, int64_t *out) {\n",
+ " uint32_t state = seed;\n",
+ " uint64_t umax = (uint64_t)max_v;\n",
+ " uint64_t umin = (uint64_t)min_v;\n",
+ " uint64_t range = (umax - umin) + 1ULL; // max>=min guaranteed by caller\n",
+ " for (Py_ssize_t i = 0; i < n; ++i) {\n",
+ " state = lcg_next(&state);\n",
+ " uint32_t r32 = state;\n",
+ " uint64_t r = (range > 0x100000000ULL) ? (uint64_t)r32 : ((uint64_t)r32 % range);\n",
+ " int64_t val = (int64_t)(min_v + (int64_t)r);\n",
+ " out[i] = val;\n",
+ " }\n",
+ " return 0;\n",
+ "}\n",
+ "\n",
+ "// Generate PyObject* int array general path using Python arithmetic\n",
+ "static PyObject** gen_array_big(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {\n",
+ " PyObject **arr = (PyObject**)PyMem_Malloc((n > 0 ? n : 1) * sizeof(PyObject*));\n",
+ " if (!arr) {\n",
+ " PyErr_NoMemory();\n",
+ " return NULL;\n",
+ " }\n",
+ " PyObject *one = PyLong_FromLong(1);\n",
+ " if (!one) { PyMem_Free(arr); return NULL; }\n",
+ " PyObject *diff = PyNumber_Subtract(max_val, min_val);\n",
+ " if (!diff) { Py_DECREF(one); PyMem_Free(arr); return NULL; }\n",
+ " PyObject *range_obj = PyNumber_Add(diff, one);\n",
+ " Py_DECREF(diff);\n",
+ " Py_DECREF(one);\n",
+ " if (!range_obj) { PyMem_Free(arr); return NULL; }\n",
+ "\n",
+ " uint32_t state = seed;\n",
+ " for (Py_ssize_t i = 0; i < n; ++i) {\n",
+ " state = lcg_next(&state);\n",
+ " PyObject *v = PyLong_FromUnsignedLong((unsigned long)state);\n",
+ " if (!v) {\n",
+ " Py_DECREF(range_obj);\n",
+ " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n",
+ " PyMem_Free(arr);\n",
+ " return NULL;\n",
+ " }\n",
+ " PyObject *r = PyNumber_Remainder(v, range_obj);\n",
+ " Py_DECREF(v);\n",
+ " if (!r) {\n",
+ " Py_DECREF(range_obj);\n",
+ " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n",
+ " PyMem_Free(arr);\n",
+ " return NULL;\n",
+ " }\n",
+ " PyObject *val = PyNumber_Add(r, min_val);\n",
+ " Py_DECREF(r);\n",
+ " if (!val) {\n",
+ " Py_DECREF(range_obj);\n",
+ " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n",
+ " PyMem_Free(arr);\n",
+ " return NULL;\n",
+ " }\n",
+ " arr[i] = val;\n",
+ " }\n",
+ " Py_DECREF(range_obj);\n",
+ " return arr;\n",
+ "}\n",
+ "\n",
+ "static PyObject* max_subarray_sum_internal(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {\n",
+ " if (n <= 0) {\n",
+ " return PyFloat_FromDouble(-INFINITY);\n",
+ " }\n",
+ "\n",
+ " if (PyLong_Check(min_val) && PyLong_Check(max_val)) {\n",
+ " int overflow1 = 0, overflow2 = 0;\n",
+ " long long min64 = PyLong_AsLongLongAndOverflow(min_val, &overflow1);\n",
+ " if (overflow1) goto BIGINT_PATH;\n",
+ " long long max64 = PyLong_AsLongLongAndOverflow(max_val, &overflow2);\n",
+ " if (overflow2) goto BIGINT_PATH;\n",
+ " if (max64 >= min64) {\n",
+ " int64_t *arr = (int64_t*)PyMem_Malloc((size_t)n * sizeof(int64_t));\n",
+ " if (!arr) { PyErr_NoMemory(); return NULL; }\n",
+ " if (gen_array_int64(n, seed, (int64_t)min64, (int64_t)max64, arr) != 0) {\n",
+ " PyMem_Free(arr);\n",
+ " return NULL;\n",
+ " }\n",
+ " int overflowed = 0;\n",
+ " PyObject *res = kadane_int64(arr, n, &overflowed);\n",
+ " if (!res && overflowed) {\n",
+ " // fallback to big-int Kadane\n",
+ " PyObject **arr_obj = (PyObject**)PyMem_Malloc((size_t)n * sizeof(PyObject*));\n",
+ " if (!arr_obj) { PyMem_Free(arr); PyErr_NoMemory(); return NULL; }\n",
+ " for (Py_ssize_t i = 0; i < n; ++i) {\n",
+ " arr_obj[i] = PyLong_FromLongLong(arr[i]);\n",
+ " if (!arr_obj[i]) {\n",
+ " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr_obj[k]);\n",
+ " PyMem_Free(arr_obj);\n",
+ " PyMem_Free(arr);\n",
+ " return NULL;\n",
+ " }\n",
+ " }\n",
+ " PyObject *bires = kadane_big(arr_obj, n);\n",
+ " for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);\n",
+ " PyMem_Free(arr_obj);\n",
+ " PyMem_Free(arr);\n",
+ " return bires;\n",
+ " }\n",
+ " PyMem_Free(arr);\n",
+ " return res;\n",
+ " }\n",
+ " }\n",
+ "BIGINT_PATH: ;\n",
+ " PyObject **arr_obj = gen_array_big(n, seed, min_val, max_val);\n",
+ " if (!arr_obj) return NULL;\n",
+ " PyObject *res = kadane_big(arr_obj, n);\n",
+ " for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);\n",
+ " PyMem_Free(arr_obj);\n",
+ " return res;\n",
+ "}\n",
+ "\n",
+ "static PyObject* py_max_subarray_sum(PyObject *self, PyObject *args) {\n",
+ " Py_ssize_t n;\n",
+ " PyObject *seed_obj, *min_val, *max_val;\n",
+ " if (!PyArg_ParseTuple(args, \"nOOO\", &n, &seed_obj, &min_val, &max_val)) return NULL;\n",
+ " if (n < 0) n = 0;\n",
+ " uint32_t seed = (uint32_t)(PyLong_AsUnsignedLongLongMask(seed_obj) & 0xFFFFFFFFULL);\n",
+ " if (PyErr_Occurred()) return NULL;\n",
+ " return max_subarray_sum_internal(n, seed, min_val, max_val);\n",
+ "}\n",
+ "\n",
+ "static PyObject* py_total_max_subarray_sum(PyObject *self, PyObject *args) {\n",
+ " Py_ssize_t n;\n",
+ " PyObject *init_seed_obj, *min_val, *max_val;\n",
+ " if (!PyArg_ParseTuple(args, \"nOOO\", &n, &init_seed_obj, &min_val, &max_val)) return NULL;\n",
+ " if (n < 0) n = 0;\n",
+ " uint32_t state = (uint32_t)(PyLong_AsUnsignedLongLongMask(init_seed_obj) & 0xFFFFFFFFULL);\n",
+ " if (PyErr_Occurred()) return NULL;\n",
+ "\n",
+ " PyObject *total = PyLong_FromLong(0);\n",
+ " if (!total) return NULL;\n",
+ "\n",
+ " for (int i = 0; i < 20; ++i) {\n",
+ " uint32_t seed = lcg_next(&state);\n",
+ " PyObject *part = max_subarray_sum_internal(n, seed, min_val, max_val);\n",
+ " if (!part) { Py_DECREF(total); return NULL; }\n",
+ " PyObject *new_total = PyNumber_Add(total, part);\n",
+ " Py_DECREF(part);\n",
+ " if (!new_total) { Py_DECREF(total); return NULL; }\n",
+ " Py_DECREF(total);\n",
+ " total = new_total;\n",
+ " }\n",
+ " return total;\n",
+ "}\n",
+ "\n",
+ "static PyMethodDef module_methods[] = {\n",
+ " {\"max_subarray_sum\", (PyCFunction)py_max_subarray_sum, METH_VARARGS, \"Compute maximum subarray sum using LCG-generated array.\"},\n",
+ " {\"total_max_subarray_sum\", (PyCFunction)py_total_max_subarray_sum, METH_VARARGS, \"Compute total of maximum subarray sums over 20 LCG seeds.\"},\n",
+ " {NULL, NULL, 0, NULL}\n",
+ "};\n",
+ "\n",
+ "static struct PyModuleDef moduledef = {\n",
+ " PyModuleDef_HEAD_INIT,\n",
+ " \"python_hard\",\n",
+ " NULL,\n",
+ " -1,\n",
+ " module_methods,\n",
+ " NULL,\n",
+ " NULL,\n",
+ " NULL,\n",
+ " NULL\n",
+ "};\n",
+ "\n",
+ "PyMODINIT_FUNC PyInit_python_hard(void) {\n",
+ " return PyModule_Create(&moduledef);\n",
+ "}\n",
+ "\n",
+ "---------------------------\n",
+ "setup.py:\n",
+ "from setuptools import setup, Extension\n",
+ "import sys\n",
+ "\n",
+ "extra_compile_args = []\n",
+ "extra_link_args = []\n",
+ "if sys.platform == 'win32':\n",
+ " extra_compile_args = ['/O2', '/Ot', '/GL', '/fp:fast']\n",
+ " extra_link_args = ['/LTCG']\n",
+ "else:\n",
+ " extra_compile_args = ['-O3', '-march=native']\n",
+ "\n",
+ "module = Extension(\n",
+ " name='python_hard',\n",
+ " sources=['python_hard.c'],\n",
+ " extra_compile_args=extra_compile_args,\n",
+ " extra_link_args=extra_link_args,\n",
+ " language='c'\n",
+ ")\n",
+ "\n",
+ "setup(\n",
+ " name='python_hard',\n",
+ " version='1.0.0',\n",
+ " description='High-performance C extension reimplementation',\n",
+ " ext_modules=[module]\n",
+ ")\n",
+ "\n",
+ "---------------------------\n",
+ "USAGE:\n",
+ "import time\n",
+ "\n",
+ "# Original Python code\n",
+ "\n",
+ "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
+ " value = seed\n",
+ " while True:\n",
+ " value = (a * value + c) % m\n",
+ " yield value\n",
+ "\n",
+ "def max_subarray_sum_py(n, seed, min_val, max_val):\n",
+ " lcg_gen = lcg(seed)\n",
+ " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
+ " max_sum = float('-inf')\n",
+ " for i in range(n):\n",
+ " current_sum = 0\n",
+ " for j in range(i, n):\n",
+ " current_sum += random_numbers[j]\n",
+ " if current_sum > max_sum:\n",
+ " max_sum = current_sum\n",
+ " return max_sum\n",
+ "\n",
+ "def total_max_subarray_sum_py(n, initial_seed, min_val, max_val):\n",
+ " total_sum = 0\n",
+ " lcg_gen = lcg(initial_seed)\n",
+ " for _ in range(20):\n",
+ " seed = next(lcg_gen)\n",
+ " total_sum += max_subarray_sum_py(n, seed, min_val, max_val)\n",
+ " return total_sum\n",
+ "\n",
+ "# Build and import extension (after running: python setup.py build && install or develop)\n",
+ "import python_hard as ext\n",
+ "\n",
+ "# Example parameters\n",
+ "n = 600\n",
+ "initial_seed = 12345678901234567890\n",
+ "min_val = -1000\n",
+ "max_val = 1000\n",
+ "\n",
+ "# Time Python\n",
+ "t0 = time.perf_counter()\n",
+ "py_res1 = max_subarray_sum_py(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)\n",
+ "t1 = time.perf_counter()\n",
+ "py_time1 = t1 - t0\n",
+ "\n",
+ "# Time C extension\n",
+ "t0 = time.perf_counter()\n",
+ "ext_res1 = ext.max_subarray_sum(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)\n",
+ "t1 = time.perf_counter()\n",
+ "ext_time1 = t1 - t0\n",
+ "\n",
+ "print('max_subarray_sum equality:', py_res1 == ext_res1)\n",
+ "print('Python time:', py_time1)\n",
+ "print('C ext time:', ext_time1)\n",
+ "\n",
+ "# Total over 20 seeds\n",
+ "t0 = time.perf_counter()\n",
+ "py_res2 = total_max_subarray_sum_py(n, initial_seed, min_val, max_val)\n",
+ "t1 = time.perf_counter()\n",
+ "py_time2 = t1 - t0\n",
+ "\n",
+ "t0 = time.perf_counter()\n",
+ "ext_res2 = ext.total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
+ "t1 = time.perf_counter()\n",
+ "ext_time2 = t1 - t0\n",
+ "\n",
+ "print('total_max_subarray_sum equality:', py_res2 == ext_res2)\n",
+ "print('Python total time:', py_time2)\n",
+ "print('C ext total time:', ext_time2)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Print generated extension code.\n",
+ "\n",
+ "print_optimization(optimization)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "356969b8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Write the generated extension code to files.\n",
+ "# (Will overwrite existing files)\n",
+ "\n",
+ "write_optimization(optimization, module_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "e0b3d073-88a2-40b2-831c-6f0c345c256f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Clean previous builds.\n",
+ "# (Make sure to run this cell before running the compile cell a second time only)\n",
+ "# (May cast errors if no previous build exists)\n",
+ "\n",
+ "!rm -r build/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "29ae1993",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [WINDOWS ONLY]\n",
+ "# Set COMSPEC to cmd.exe to avoid issues with some C compilers on Windows.\n",
+ "# (Remember to restore original COMSPEC after compilation and testing)\n",
+ "preset_comspec = os.environ.get(\"COMSPEC\")\n",
+ "os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "772328a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Compile the C extension.\n",
+ "# (Will fail no C compiler is installed)\n",
+ "# (In case of errors, try directly on the command line)\n",
+ "\n",
+ "!python setup.py build_ext --inplace"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "4186a19e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [WINDOWS ONLY]\n",
+ "# Restore original COMSPEC.\n",
+ "\n",
+ "os.environ[\"COMSPEC\"] = preset_comspec"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "64899bb1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "max_subarray_sum equality: True\n",
+ "Python time: 0.010335999992094003\n",
+ "C ext time: 1.4399993233382702e-05\n",
+ "total_max_subarray_sum equality: True\n",
+ "Python total time: 0.21065390000876505\n",
+ "C ext total time: 0.00012310000602155924\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Run the usage example to test the compiled C extension.\n",
+ "exec(optimization.usage)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "ee4f8f62",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Testing original Python code:\n",
+ "Total Maximum Subarray Sum (20 runs): 10980\n",
+ "Execution Time: 57.275276 seconds\n",
+ "Testing C extension code:\n",
+ "Total Maximum Subarray Sum (20 runs): 10980\n",
+ "Execution Time: 0.002317 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Import newly created C extension and compare performance with original Python code.\n",
+ "\n",
+ "from python_hard import total_max_subarray_sum as c_total_max_subarray_sum\n",
+ "\n",
+ "print(\"Testing original Python code:\")\n",
+ "test_subarray_sum(total_max_subarray_sum, 10000, 42, -10, 10)\n",
+ "print(\"Testing C extension code:\")\n",
+ "test_subarray_sum(c_total_max_subarray_sum, 10000, 42, -10, 10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85945330",
+ "metadata": {},
+ "source": [
+ "# Let's build a Gradio service"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a function to call the optimization process and return the generated codes.\n",
+ "\n",
+ "def optimize(python_code, module_name, model):\n",
+ " \"\"\"Call the optimization process and return the generated codes.\"\"\"\n",
+ " optimization = optimize_gpt(python_code, module_name, model)\n",
+ " return optimization.c_code, optimization.setup, optimization.usage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "19bf2bff-a822-4009-a539-f003b1651383",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a function to execute Python code and capture its output.\n",
+ "\n",
+ "def execute_python(code):\n",
+ " \"\"\"Execute the given Python code and capture its output.\"\"\"\n",
+ " try:\n",
+ " output = io.StringIO()\n",
+ " sys.stdout = output\n",
+ " exec(code)\n",
+ " finally:\n",
+ " sys.stdout = sys.__stdout__\n",
+ " return output.getvalue()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "5fd83307",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extension compilation function.\n",
+ "\n",
+ "def build_extension():\n",
+ " \"\"\"Compile the C extension using 'setup.py' and return the compilation output.\"\"\"\n",
+ " # Set default COMSPEC to cmd.exe on Windows to avoid issues with some C compilers.\n",
+ " preset_comspec = os.environ.get(\"COMSPEC\")\n",
+ " os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\"\n",
+ " try:\n",
+ " compile_cmd = [\"python\", \"setup.py\", \"build_ext\", \"--inplace\"]\n",
+ " compile_result = subprocess.run(compile_cmd, env=os.environ,\n",
+ " check=True, text=True, capture_output=True)\n",
+ " except subprocess.CalledProcessError as ex:\n",
+ " raise Exception(f\"An error occurred while building:\\n{ex.stdout}\\n{ex.stderr}\")\n",
+ " finally: # The 'finally' clauses executes always whether there was an exception or not.\n",
+ " # Restore original COMSPEC.\n",
+ " os.environ[\"COMSPEC\"] = preset_comspec\n",
+ " return compile_result.stdout"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "77f3ab5d-fcfb-4d3f-8728-9cacbf833ea6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extension compilation function.\n",
+ "\n",
+ "def generate_extension(c_code, setup_code, usage_code, module_name):\n",
+ " \"\"\"Build and install the C extension from the provided codes.\"\"\"\n",
+ " try: # Write the provided codes to their respective files.\n",
+ " write_file(c_code, f\"{module_name}.c\")\n",
+ " write_file(setup_code, \"setup.py\")\n",
+ " except Exception as ex:\n",
+ " return f\"An error occurred while writing files:\\n{ex}\"\n",
+ " # Build the extension and capture the output.\n",
+ " try:\n",
+ " build_output = build_extension()\n",
+ " except Exception as ex: # If build fails, return the error message.\n",
+ " return str(ex)\n",
+ " # Return the combined output of build and install processes.\n",
+ " return build_output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "51f78871",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extension testing function.\n",
+ "\n",
+ "def test_extension(usage_code):\n",
+ " \"\"\"Test the installed C extension by executing the provided usage code and capturing its output.\"\"\"\n",
+ " try: # Write the provided codes to their respective files.\n",
+ " write_file(usage_code, \"usage_example.py\")\n",
+ " except Exception as ex:\n",
+ " return f\"An error occurred while writing test file:\\n{ex}\"\n",
+ " try:\n",
+ " output = execute_python(usage_code)\n",
+ " except Exception as ex:\n",
+ " return f\"An error occurred while testing the extension:\\n{ex}\"\n",
+ " return output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define custom CSS for Gradio interface.\n",
+ "\n",
+ "css = \"\"\"\n",
+ ".python {background-color: #306998;}\n",
+ ".c_ext {background-color: #050;}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "c7c8f5fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define default codes for the interface.\n",
+ "\n",
+ "default_p_code = \"\"\"\n",
+ "def hello_world():\n",
+ " print(\"Hello, World!\")\n",
+ "\"\"\"\n",
+ "# default_p_code = python_hard_code # Run the declaration cell before use.\n",
+ "# default_p_code = calculate_pi_code # Run the declaration cell before use.\n",
+ "\n",
+ "default_c_code = r\"\"\"\n",
+ "#include \n",
+ "\n",
+ "// Function to be called from Python\n",
+ "static PyObject* zz_hello_world(PyObject* self, PyObject* args) {\n",
+ " printf(\"Hello, World!\\n\");\n",
+ " Py_RETURN_NONE;\n",
+ "}\n",
+ "\n",
+ "// Method definition structure\n",
+ "static PyMethodDef zz_my_methods[] = {\n",
+ " {\"hello_world\", zz_hello_world, METH_VARARGS, \"Print 'Hello, World!'\"},\n",
+ " {NULL, NULL, 0, NULL} // Sentinel\n",
+ "};\n",
+ "\n",
+ "// Module definition\n",
+ "static struct PyModuleDef zz_my_module = {\n",
+ " PyModuleDef_HEAD_INIT,\n",
+ " \"zz_my_module\",\n",
+ " \"Extension module that prints Hello, World!\",\n",
+ " -1,\n",
+ " zz_my_methods\n",
+ "};\n",
+ "\n",
+ "// Module initialization function\n",
+ "PyMODINIT_FUNC PyInit_zz_my_module(void) {\n",
+ " return PyModule_Create(&zz_my_module);\n",
+ "}\n",
+ "\"\"\"\n",
+ "\n",
+ "default_setup = \"\"\"\n",
+ "from setuptools import setup, Extension\n",
+ "\n",
+ "module = Extension(\n",
+ " 'zz_my_module',\n",
+ " sources=['zz_my_module.c'],\n",
+ ")\n",
+ "\n",
+ "setup(\n",
+ " name='zz_my_module',\n",
+ " version='1.0',\n",
+ " description='This is a custom C extension module.',\n",
+ " ext_modules=[module]\n",
+ ")\n",
+ "\"\"\"\n",
+ "\n",
+ "default_test = \"\"\"\n",
+ "import time\n",
+ "import zz_my_module\n",
+ "\n",
+ "def python_hello_world():\n",
+ " print(\"Hello, World!\")\n",
+ "\n",
+ "start = time.time()\n",
+ "python_hello_world()\n",
+ "end = time.time()\n",
+ "print(f\"Python function execution time: {end - start:.6f} seconds\")\n",
+ "\n",
+ "start = time.time()\n",
+ "zz_my_module.hello_world()\n",
+ "end = time.time()\n",
+ "print(f\"C extension execution time: {end - start:.6f} seconds\")\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "fa64577a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We will use gradio auto reload feature, this way we do not need to restart the app to see changes in the code.\n",
+ "# * https://www.gradio.app/guides/developing-faster-with-reload-mode\n",
+ "\n",
+ "%load_ext gradio\n",
+ "\n",
+ "# This mandatory requires naming the 'gr.Blocks' interface as 'demo'.\n",
+ "# Now, each time we edit the code, we just need to re-run Gradio interface cell to see the changes in the app.\n",
+ "# The '.launch()' method is not needed anymore."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f1303932-160c-424b-97a8-d28c816721b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%blocks\n",
+ "\n",
+ "with gr.Blocks(css=css) as demo:\n",
+ " gr.Markdown(\"## Convert code from Python to C++\")\n",
+ " with gr.Row():\n",
+ " module_name = gr.Textbox(label=\"Module name:\", lines=1, value=\"zz_my_module\")\n",
+ " model = gr.Dropdown([\"gpt-4o\", \"gpt-5\"], label=\"Select model\", value=\"gpt-4o\")\n",
+ " with gr.Row():\n",
+ " python = gr.Textbox(label=\"Python code:\", lines=30, value=default_p_code, elem_classes=[\"python\"])\n",
+ " c_code = gr.Textbox(label=\"C Extension code:\", lines=30, value=default_c_code, elem_classes=[\"c_ext\"])\n",
+ " with gr.Row():\n",
+ " get_extension = gr.Button(\"Generate extension code\")\n",
+ " with gr.Row():\n",
+ " setup_code = gr.Textbox(label=\"Compilation code:\", lines=10, value=default_setup, elem_classes=[\"python\"])\n",
+ " usage_code = gr.Textbox(label=\"Test compare code:\", lines=10, value=default_test, elem_classes=[\"python\"])\n",
+ " with gr.Row():\n",
+ " compile_ext = gr.Button(\"Compile extension\")\n",
+ " with gr.Row():\n",
+ " c_ext_out = gr.TextArea(label=\"C Extension result:\", elem_classes=[\"c_ext\"])\n",
+ " with gr.Row():\n",
+ " test_run = gr.Button(\"Test code\")\n",
+ " with gr.Row():\n",
+ " test_out = gr.TextArea(label=\"Test result:\", elem_classes=[\"python\"])\n",
+ "\n",
+ " get_extension.click(optimize, inputs=[python, module_name, model], outputs=[c_code, setup_code, usage_code])\n",
+ " compile_ext.click(generate_extension, inputs=[c_code, setup_code, usage_code, module_name ], outputs=[c_ext_out])\n",
+ " test_run.click(test_extension, inputs=[usage_code], outputs=[test_out])\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llm-engineering-pYAzjokc",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}