diff --git a/week4/community-contributions/c_extension_generator/LICENSE b/week4/community-contributions/c_extension_generator/LICENSE new file mode 100644 index 0000000..0ca1b61 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Carlos Bazaga + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/week4/community-contributions/c_extension_generator/README.md b/week4/community-contributions/c_extension_generator/README.md new file mode 100644 index 0000000..87d25db --- /dev/null +++ b/week4/community-contributions/c_extension_generator/README.md @@ -0,0 +1,134 @@ +# Python C Extension code generator + +Written by Carlos Bazaga [@carbaz] based on the work of Ed Donner [@ed-donner] +under the MIT License. + +This folder contains a Jupyter notebook that demonstrates how to use a Frontier model +to generate high-performance Python C extension code from Python code. + +The notebook includes examples of generating C extensions for calculating Pi using the +Leibniz formula and finding the maximum sub-array in an array. + +Also, it includes a Gradio app that provides an interactive interface for users to input +Python code, generate C extension code, compile it, and test its performance against +the original Python code. + +> [!CAUTION] +> +> **Always review the generated codes before running them, as they will be executed in +> your local environment and may contain code that could be harmful or unwanted.** +> +> AI-generated code may contain errors or unsafe practices, so it's crucial to +> thoroughly review and test any code before using it in a production environment. +> +> Never run code generated by AI models without understanding its implications and +> ensuring it adheres to your security and safety standards. + +> [!IMPORTANT] +> +> **Disclaimer:** This notebook and the Gradio app are provided for educational purposes +> only. Use them at your own risk. + +## Gradio app overview + +In this image, you can see the Gradio app dashboard whose main sections are +described below. + +![Gradio app dashboard](gradio_dashboard.jpg)\ +*Image: Gradio app dashboard with default example `hello world` code loaded.* +*(compile output redacted for privacy)* + +Sections: + +* **Dropdown selectors and input fields**: + * **Module name input**: + A text input field where users can specify the name of the C extension module to be + generated. + + That name will be used to create the C extension file `.c` and + the `setup.py` file required to compile the extension. + + That name will also be used to import the compiled module as usual in Python: + + ```python + import + ``` + + Or + + ```python + from import + ``` + + * **Model selector**: + A dropdown menu to select the Frontier model to use for code generation. + + Currently it includes: + * `gpt-4o` (default) + * `gpt-5` + +* **Text input areas**: + + This areas are all editable, included those filled with generated code by the model. + this allows users to modify and experiment with the code as needed. + + * **Python code**: + A text area where users can input their Python code. + * **C extension code**: + A text area that displays the generated C extension code and allows to edit it. + * **Compilation code**: + A text area that shows the `setup.py` file generated, + this file is required to compile the C extension. + * **Test compare code**: + A text area that provides example code to run the compiled C extension. + +* **Output areas**: + + This are non-editable areas that display the results of various operations. + + * **C Extension result**: + A text area that displays the output of the C extension code build. + + Beware that this area can contain a large amount of text including warnings during + the compilation process and sensible information about the local environment, + like: paths, Python version, etc may be included. + + Redact that information if you plan to share the output. + + * **Test result**: + A text area that displays the output of the test code run. + +* **Buttons**: + * **Generate extension code**: + A button that triggers the generation of the C extension code from the provided + Python code. + + It will call the Frontier model to generate the C code, the setup.py file and + the test code, filling the corresponding text areas automatically. + + * **Compile extension**: + A button that compiles the generated C extension using the provided `setup.py` file. + It will create the extension c file, `.c` and the `setup.py` files in + the local folder, then it will run the compilation command and build the C extension. + + > [!CAUTION] + > + > **Always review the `setup.py` code before running it, as it will be executed in + > your local environment and may contain code that could be harmful or unwanted.** + > + > **Also review the generated C code, as it will be compiled and executed in your + > local environment and may contain code that could be harmful or unwanted.** + + It will display the compilation output in the "C Extension result" area. + + * **Test code**: + A button that executes the test code to compare the performance of the original + Python code and the generated C extension. + + > [!CAUTION] + > + > **Always review the test code before running it, as it will be executed in + > your local environment and may contain code that could be harmful or unwanted.** + + Will save the test code provided in the "Test compare code" into the + `usage_example.py` file and execute it showing the output in the "Test result" area. diff --git a/week4/community-contributions/c_extension_generator/calculate_pi.c b/week4/community-contributions/c_extension_generator/calculate_pi.c new file mode 100644 index 0000000..c0b7f03 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/calculate_pi.c @@ -0,0 +1,83 @@ +#define PY_SSIZE_T_CLEAN +#include +#include +#include +#include +#include + +static PyObject* leibniz_pi(PyObject* self, PyObject* args) { + PyObject* iterations_obj; + if (!PyArg_ParseTuple(args, "O", &iterations_obj)) { + return NULL; + } + + long long n_signed; + int overflow = 0; + n_signed = PyLong_AsLongLongAndOverflow(iterations_obj, &overflow); + if (n_signed == -1 && PyErr_Occurred() && overflow == 0) { + return NULL; + } + + unsigned long long n = 0ULL; + if (overflow < 0) { + n = 0ULL; + } else if (overflow > 0) { + unsigned long long tmp = PyLong_AsUnsignedLongLong(iterations_obj); + if (tmp == (unsigned long long)-1 && PyErr_Occurred()) { + return NULL; + } + n = tmp; + } else { + if (n_signed <= 0) { + n = 0ULL; + } else { + n = (unsigned long long)n_signed; + } + } + + double result = 1.0; + if (n == 0ULL) { + return PyFloat_FromDouble(result * 4.0); + } + + Py_BEGIN_ALLOW_THREADS + for (unsigned long long i = 1ULL; i <= n; ++i) { + double jd1; + if (i <= ULLONG_MAX / 4ULL) { + unsigned long long j1 = i * 4ULL - 1ULL; + jd1 = (double)j1; + } else { + jd1 = (double)i * 4.0 - 1.0; + } + result -= 1.0 / jd1; + + double jd2; + if (i <= (ULLONG_MAX - 1ULL) / 4ULL) { + unsigned long long j2 = i * 4ULL + 1ULL; + jd2 = (double)j2; + } else { + jd2 = (double)i * 4.0 + 1.0; + } + result += 1.0 / jd2; + } + Py_END_ALLOW_THREADS + + return PyFloat_FromDouble(result * 4.0); +} + +static PyMethodDef CalculatePiMethods[] = { + {"leibniz_pi", leibniz_pi, METH_VARARGS, "Compute pi using the Leibniz series with the given number of iterations."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef calculate_pimodule = { + PyModuleDef_HEAD_INIT, + "calculate_pi", + "High-performance Leibniz pi calculation.", + -1, + CalculatePiMethods +}; + +PyMODINIT_FUNC PyInit_calculate_pi(void) { + return PyModule_Create(&calculate_pimodule); +} diff --git a/week4/community-contributions/c_extension_generator/gradio_dashboard.jpg b/week4/community-contributions/c_extension_generator/gradio_dashboard.jpg new file mode 100644 index 0000000..7b07689 Binary files /dev/null and b/week4/community-contributions/c_extension_generator/gradio_dashboard.jpg differ diff --git a/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb b/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb new file mode 100644 index 0000000..65b480a --- /dev/null +++ b/week4/community-contributions/c_extension_generator/python_c_ext_generator.ipynb @@ -0,0 +1,1616 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9", + "metadata": {}, + "source": [ + "# Python C extension generator\n", + "\n", + "Use a Frontier model to generate a high performance Python C extension code from Python code.\n", + "\n", + "Python C extension modules allows to integrate C coded and compiled modules into Python applications.\n", + "\n", + "* [Python C Extensions](https://docs.python.org/3.13/extending/index.html)\n", + "* [Python's C API](https://docs.python.org/3.13/c-api/index.html)" + ] + }, + { + "cell_type": "markdown", + "id": "d90e04a2-5b8a-4fd5-9db8-27c02f033313", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Important Note

\n", + " \n", + " In this lab, I use GPT-4o or GPT-5, which are slightly higher priced models.\n", + " \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports.\n", + "\n", + "import io\n", + "import os\n", + "import subprocess\n", + "import sys\n", + "from time import perf_counter\n", + "from timeit import timeit\n", + "\n", + "import gradio as gr\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from pydantic import BaseModel" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4f672e1c-87e9-4865-b760-370fa605e614", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load environment variables from '.env' file.\n", + "\n", + "load_dotenv(override=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize client and set the default LLM model to use.\n", + "\n", + "# OPENAI_MODEL = \"gpt-4o\"\n", + "OPENAI_MODEL = \"gpt-5\"\n", + "\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c6f37bf0", + "metadata": {}, + "outputs": [], + "source": [ + "# Define Pydantic model class for GPT response parsing.\n", + "\n", + "class Extension_codes(BaseModel):\n", + " \"\"\"Pydantic model of a response containing the generated C code, the 'setup.py' code and an usage example.\"\"\"\n", + " c_code: str\n", + " setup: str\n", + " usage: str" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cb6ce77a", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to print the optimization codes.\n", + "\n", + "def print_optimization(optimization):\n", + " \"\"\"Print the optimization codes.\"\"\"\n", + " print(f\"C CODE:\\n{optimization.c_code}\")\n", + " print(\"---------------------------\")\n", + " print(f\"setup.py:\\n{optimization.setup}\")\n", + " print(\"---------------------------\")\n", + " print(f\"USAGE:\\n{optimization.usage}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to write outputs to a file with a given filename.\n", + "\n", + "def write_file(data, filename):\n", + " \"\"\"Write data to a file with the specified filename.\"\"\"\n", + " with open(filename, \"w\") as file:\n", + " file.write(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f13c9c97", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to write the optimization codes to files.\n", + "\n", + "def write_optimization(optimization, module_name):\n", + " \"\"\"Write the optimization codes to files.\"\"\"\n", + " write_file(optimization.c_code, f\"{module_name}.c\")\n", + " write_file(optimization.setup, \"setup.py\")\n", + " write_file(optimization.usage, \"usage_example.py\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6896636f-923e-4a2c-9d6c-fac07828a201", + "metadata": {}, + "outputs": [], + "source": [ + "# Define system message for the LLM with instructions for generating the C extension code.\n", + "\n", + "system_message = \"\"\"\n", + "You are an assistant that reimplements Python code in high performance C extensions for Python.\n", + "Your responses must always be a JSON with the following structure:\n", + "\n", + "{\n", + " \"c_code\": \"Optimized C extension for Python code\",\n", + " \"setup\": \"The 'setup.py' code to compile the C extension for Python\",\n", + " \"usage\": \"An example of usage of the C extension for Python code with time measurement and comparing with the original Python code\"\n", + "}\n", + "\n", + "Use comments sparingly and do not provide any explanation other than occasional comments.\n", + "The C extension for Python needs to produce an identical output in the fastest possible time.\n", + "Make sure the C extension for Python code is correct and can be compiled with 'python setup.py build' and used in Python.\n", + "The usage example must include a time measurement and a comparison with the original Python code.\n", + "Do not include any additional text or explanation outside the JSON structure.\n", + "Make sure the JSON is correctly formatted.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb", + "metadata": {}, + "outputs": [], + "source": [ + "# Define user prompt template and function to fill it.\n", + "\n", + "def user_prompt_for(python_code, module_name):\n", + " user_prompt = f\"\"\"\n", + " Reimplement this Python code as a C extension for Python with the fastest possible implementation that produces identical output in the least time.\n", + " Respond only with C extension for Python code, do not explain your work other than a few code comments.\n", + " The module name, used to import, must be \"{module_name}\", the generated C file will be named \"{module_name}.c\".\n", + " Pay attention to number types to ensure no int overflows.\n", + " Remember to #include all necessary C packages such as iomanip or \n", + "\n", + " The target architecture is {sys.platform}, take that in mind while generating the C code, specially\n", + " when choosing types to use, and use the appropriate compiler flags.\n", + " Make sure to use the Python C API correctly and manage memory properly to avoid leaks or crashes.\n", + "\n", + " Here is the Python code to reimplement:\n", + "\n", + " {python_code}\"\"\"\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c6190659-f54c-4951-bef4-4960f8e51cc4", + "metadata": {}, + "outputs": [], + "source": [ + "# Define function to create the messages for the LLM.\n", + "\n", + "def messages_for(python_code, module_name):\n", + " \"\"\"Create the messages for the LLM given the Python code and the desired module name.\"\"\"\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(python_code, module_name)}]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3c57bc55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SYSTEM: \n", + "You are an assistant that reimplements Python code in high performance C extensions for Python.\n", + "Your responses must always be a JSON with the following structure:\n", + "\n", + "{\n", + " \"c_code\": \"Optimized C extension for Python code\",\n", + " \"setup\": \"The 'setup.py' code to compile the C extension for Python\",\n", + " \"usage\": \"An example of usage of the C extension for Python code with time measurement and comparing with the original Python code\"\n", + "}\n", + "\n", + "Use comments sparingly and do not provide any explanation other than occasional comments.\n", + "The C extension for Python needs to produce an identical output in the fastest possible time.\n", + "Make sure the C extension for Python code is correct and can be compiled with 'python setup.py build' and used in Python.\n", + "The usage example must include a time measurement and a comparison with the original Python code.\n", + "Do not include any additional text or explanation outside the JSON structure.\n", + "Make sure the JSON is correctly formatted.\n", + "\n", + "--------------------------------\n", + "USER: \n", + " Reimplement this Python code as a C extension for Python with the fastest possible implementation that produces identical output in the least time.\n", + " Respond only with C extension for Python code, do not explain your work other than a few code comments.\n", + " The module name, used to import, must be \"say_hello\", the generated C file will be named \"say_hello.c\".\n", + " Pay attention to number types to ensure no int overflows.\n", + " Remember to #include all necessary C packages such as iomanip or \n", + "\n", + " The target architecture is win32, take that in mind while generating the C code, specially\n", + " when choosing types to use, and use the appropriate compiler flags.\n", + " Make sure to use the Python C API correctly and manage memory properly to avoid leaks or crashes.\n", + "\n", + " Here is the Python code to reimplement:\n", + "\n", + " print('Hello World')\n", + "--------------------------------\n" + ] + } + ], + "source": [ + "# Test the messages function and print the messages.\n", + "\n", + "for message in messages_for(\"print('Hello World')\", \"say_hello\"):\n", + " print(f\"{message['role'].upper()}: {message['content']}\")\n", + " print(\"--------------------------------\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9", + "metadata": {}, + "outputs": [], + "source": [ + "# Define optimization function using OpenAI's GPT model.\n", + "\n", + "def optimize_gpt(python_code, module_name, model=OPENAI_MODEL):\n", + " \"\"\"Optimize the given Python code by generating a C extension for Python with the specified module name using the specified LLM model.\"\"\"\n", + " response = openai.chat.completions.parse(\n", + " model=model,\n", + " messages=messages_for(python_code, module_name),\n", + " response_format=Extension_codes).choices[0].message.parsed\n", + " return response" + ] + }, + { + "cell_type": "markdown", + "id": "c05b263a", + "metadata": {}, + "source": [ + "# Start with a math function that calculates ***π*** using the Leibniz formula.\n", + "\n", + "This formula implies the iterative approximation of *π* using an alternating series,\n", + "the more iterations the more the precision but with a cost of more computation.\n", + "* [Leibniz formula for π](https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80)\n", + "\n", + "This is a good candidate to get a noticeable improvement by coding and compiling it into a Python C extension. \n", + "\n", + "> NOTE:\n", + ">\n", + "> We are creating an importable module not an executable program so the code to be optimized must contain only declarations such as DEF or CLASS." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a1cbb778-fa57-43de-b04b-ed523f396c38", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the Python function to be converted to a C extension and its module name.\n", + "\n", + "module_name = \"calculate_pi\"\n", + "\n", + "calculate_pi_code = f\"\"\"\n", + "def leibniz_pi(iterations):\n", + " result = 1.0\n", + " for i in range(1, iterations+1):\n", + " j = i * 4 - 1\n", + " result -= (1/j)\n", + " j = i * 4 + 1\n", + " result += (1/j)\n", + " return result * 4\n", + "\"\"\"\n", + "\n", + "# Define a function to test the performance of the calculus function.\n", + "\n", + "def test_pi_calculation(calculus_function ,iterations=100_000_000):\n", + " \"\"\"Test the performance of the given calculus function.\"\"\"\n", + " start_time = perf_counter()\n", + " result = calculus_function(iterations)\n", + " end_time = perf_counter()\n", + " print(f\"Result: {result:.12f}\")\n", + " print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", + "\n", + "# Execute function declaration.\n", + "exec(calculate_pi_code)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7fe1cd4b-d2c5-4303-afed-2115a3fef200", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result: 3.141592658589\n", + "Execution Time: 20.556854 seconds\n" + ] + } + ], + "source": [ + "# Run original python code and time it.\n", + "\n", + "test_pi_calculation(leibniz_pi, 100_000_000)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4c0be0f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Timing...\n", + "Python average execution time: 21.158541 seconds\n" + ] + } + ], + "source": [ + "# Average timing the original Python code running it several times.\n", + "# (Increase 'iterations' for better timing)\n", + "\n", + "print(\"Timing...\")\n", + "iterations = 5\n", + "average = timeit(lambda: leibniz_pi(100_000_000), number=iterations) / iterations\n", + "print(f\"Python average execution time: {average:.6f} seconds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "105db6f9-343c-491d-8e44-3a5328b81719", + "metadata": {}, + "outputs": [], + "source": [ + "# Request code optimization using GPT.\n", + "\n", + "optimization = optimize_gpt(calculate_pi_code, module_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "378981c7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C CODE:\n", + "#define PY_SSIZE_T_CLEAN\n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "\n", + "static PyObject* leibniz_pi(PyObject* self, PyObject* args) {\n", + " PyObject* iterations_obj;\n", + " if (!PyArg_ParseTuple(args, \"O\", &iterations_obj)) {\n", + " return NULL;\n", + " }\n", + "\n", + " long long n_signed;\n", + " int overflow = 0;\n", + " n_signed = PyLong_AsLongLongAndOverflow(iterations_obj, &overflow);\n", + " if (n_signed == -1 && PyErr_Occurred() && overflow == 0) {\n", + " return NULL;\n", + " }\n", + "\n", + " unsigned long long n = 0ULL;\n", + " if (overflow < 0) {\n", + " n = 0ULL;\n", + " } else if (overflow > 0) {\n", + " unsigned long long tmp = PyLong_AsUnsignedLongLong(iterations_obj);\n", + " if (tmp == (unsigned long long)-1 && PyErr_Occurred()) {\n", + " return NULL;\n", + " }\n", + " n = tmp;\n", + " } else {\n", + " if (n_signed <= 0) {\n", + " n = 0ULL;\n", + " } else {\n", + " n = (unsigned long long)n_signed;\n", + " }\n", + " }\n", + "\n", + " double result = 1.0;\n", + " if (n == 0ULL) {\n", + " return PyFloat_FromDouble(result * 4.0);\n", + " }\n", + "\n", + " Py_BEGIN_ALLOW_THREADS\n", + " for (unsigned long long i = 1ULL; i <= n; ++i) {\n", + " double jd1;\n", + " if (i <= ULLONG_MAX / 4ULL) {\n", + " unsigned long long j1 = i * 4ULL - 1ULL;\n", + " jd1 = (double)j1;\n", + " } else {\n", + " jd1 = (double)i * 4.0 - 1.0;\n", + " }\n", + " result -= 1.0 / jd1;\n", + "\n", + " double jd2;\n", + " if (i <= (ULLONG_MAX - 1ULL) / 4ULL) {\n", + " unsigned long long j2 = i * 4ULL + 1ULL;\n", + " jd2 = (double)j2;\n", + " } else {\n", + " jd2 = (double)i * 4.0 + 1.0;\n", + " }\n", + " result += 1.0 / jd2;\n", + " }\n", + " Py_END_ALLOW_THREADS\n", + "\n", + " return PyFloat_FromDouble(result * 4.0);\n", + "}\n", + "\n", + "static PyMethodDef CalculatePiMethods[] = {\n", + " {\"leibniz_pi\", leibniz_pi, METH_VARARGS, \"Compute pi using the Leibniz series with the given number of iterations.\"},\n", + " {NULL, NULL, 0, NULL}\n", + "};\n", + "\n", + "static struct PyModuleDef calculate_pimodule = {\n", + " PyModuleDef_HEAD_INIT,\n", + " \"calculate_pi\",\n", + " \"High-performance Leibniz pi calculation.\",\n", + " -1,\n", + " CalculatePiMethods\n", + "};\n", + "\n", + "PyMODINIT_FUNC PyInit_calculate_pi(void) {\n", + " return PyModule_Create(&calculate_pimodule);\n", + "}\n", + "\n", + "---------------------------\n", + "setup.py:\n", + "from setuptools import setup, Extension\n", + "import sys\n", + "import os\n", + "\n", + "extra_compile_args = []\n", + "extra_link_args = []\n", + "\n", + "if os.name == 'nt':\n", + " extra_compile_args.extend(['/O2', '/fp:precise'])\n", + "else:\n", + " extra_compile_args.extend(['-O3', '-fno-strict-aliasing'])\n", + "\n", + "module = Extension(\n", + " 'calculate_pi',\n", + " sources=['calculate_pi.c'],\n", + " extra_compile_args=extra_compile_args,\n", + " extra_link_args=extra_link_args,\n", + ")\n", + "\n", + "setup(\n", + " name='calculate_pi',\n", + " version='1.0.0',\n", + " description='High-performance C extension for computing pi via the Leibniz series',\n", + " ext_modules=[module],\n", + ")\n", + "\n", + "---------------------------\n", + "USAGE:\n", + "# Build first: python setup.py build_ext --inplace\n", + "import time\n", + "import math\n", + "import calculate_pi\n", + "\n", + "# Original Python implementation\n", + "def py_leibniz_pi(iterations):\n", + " result = 1.0\n", + " for i in range(1, iterations + 1):\n", + " j = i * 4 - 1\n", + " result -= (1 / j)\n", + " j = i * 4 + 1\n", + " result += (1 / j)\n", + " return result * 4\n", + "\n", + "iters = 5_000_000\n", + "\n", + "# Warm-up\n", + "calculate_pi.leibniz_pi(10)\n", + "py_leibniz_pi(10)\n", + "\n", + "start = time.perf_counter()\n", + "res_c = calculate_pi.leibniz_pi(iters)\n", + "end = time.perf_counter()\n", + "ctime = end - start\n", + "\n", + "start = time.perf_counter()\n", + "res_py = py_leibniz_pi(iters)\n", + "end = time.perf_counter()\n", + "pytime = end - start\n", + "\n", + "print(f\"Iterations: {iters}\")\n", + "print(f\"C extension result: {res_c}\")\n", + "print(f\"Python result: {res_py}\")\n", + "print(f\"Absolute difference: {abs(res_c - res_py)}\")\n", + "print(f\"C extension time: {ctime:.6f} s\")\n", + "print(f\"Python time: {pytime:.6f} s\")\n", + "print(f\"Speedup: {pytime/ctime if ctime > 0 else float('inf'):.2f}x\")\n", + "\n" + ] + } + ], + "source": [ + "# Print generated extension code.\n", + "\n", + "print_optimization(optimization)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ae9a4a64", + "metadata": {}, + "outputs": [], + "source": [ + "# Write the generated code to files.\n", + "# (Will overwrite existing files)\n", + "\n", + "write_optimization(optimization, module_name)" + ] + }, + { + "cell_type": "markdown", + "id": "bf8f8018-f64d-425c-a0e1-d7862aa9592d", + "metadata": {}, + "source": [ + "# Compiling C Extension and executing\n", + "\n", + "The python setup command may fail inside Jupyter lab, if that's the case try it directly on the command line.\n", + "\n", + "There are two cells with WINDOWS ONLY, those are to manage the fact windows comes with two command lines,\n", + "the old CMD (MS-DOS style) and the new POWERSHELL (Unix style).\n", + "\n", + "It is controlled by the COMSPEC environment variable.\\\n", + "*(Using this variable is completely innocuous on UNIX systems, they will simply ignore it)*\n", + "\n", + "Most of command lines present here are Unix style but the building one requires CMD so\n", + "we switch to CMD before compiling to later restore the preset one." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "22a9130e", + "metadata": {}, + "outputs": [], + "source": [ + "# Clean previous builds.\n", + "# (Make sure to run this cell before running the compile cell a second time only)\n", + "# (May cast errors if no previous build exists)\n", + "\n", + "!rm -r build/" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "816e7c9d", + "metadata": {}, + "outputs": [], + "source": [ + "# [WINDOWS ONLY]\n", + "# Set COMSPEC to cmd.exe to avoid issues with some C compilers on Windows.\n", + "# (Remember to restore original COMSPEC after compilation and testing)\n", + "preset_comspec = os.environ.get(\"COMSPEC\")\n", + "os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4194e40c-04ab-4940-9d64-b4ad37c5bb40", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the C extension.\n", + "# (Will fail no C compiler is installed)\n", + "# (In case of errors, try directly on the command line)\n", + "\n", + "!python setup.py build_ext --inplace" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8db12c4d", + "metadata": {}, + "outputs": [], + "source": [ + "# [WINDOWS ONLY]\n", + "# Restore original COMSPEC.\n", + "\n", + "os.environ[\"COMSPEC\"] = preset_comspec" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a8f5169f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iterations: 5000000\n", + "C extension result: 3.1415927535897814\n", + "Python result: 3.1415927535897814\n", + "Absolute difference: 0.0\n", + "C extension time: 0.037515 s\n", + "Python time: 1.046732 s\n", + "Speedup: 27.90x\n" + ] + } + ], + "source": [ + "# Run the usage example to test the compiled C extension.\n", + "exec(optimization.usage)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a1972472", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing original Python code:\n", + "Result: 3.141592658589\n", + "Execution Time: 20.350486 seconds\n", + "Testing C extension code:\n", + "Result: 3.141592658589\n", + "Execution Time: 0.759571 seconds\n" + ] + } + ], + "source": [ + "# Import newly created C extension and compare performance with original Python code.\n", + "\n", + "from calculate_pi import leibniz_pi as c_leibniz_pi\n", + "\n", + "print(\"Testing original Python code:\")\n", + "test_pi_calculation(leibniz_pi, 100_000_000)\n", + "print(\"Testing C extension code:\")\n", + "test_pi_calculation(c_leibniz_pi, 100_000_000)\n" + ] + }, + { + "cell_type": "markdown", + "id": "77304493", + "metadata": {}, + "source": [ + "# Lets try with a more complex code\n", + "\n", + "Now we define three functions that together implements the calculation of the \"total maximum subarray sum\"\n", + "by finding the largest sum of a contiguous subarray within a given array of numbers.\n", + "\n", + "* [Maximum subarray problem](https://en.wikipedia.org/wiki/Maximum_subarray_problem)\n", + "\n", + "This algorithm requires much more computation and steps than the previous one, we may expect a heavy\n", + "improvement by coding and compiling it into a Python C extension. \n", + "\n", + "> NOTE:\n", + ">\n", + "> We are creating an importable module not an executable program so the code to be optimized must contain only declarations such as DEF or CLASS." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the Python function to be converted to a C extension and its module name.\n", + "\n", + "module_name = \"python_hard\"\n", + "\n", + "python_hard_code = \"\"\"\n", + "# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + "\n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\"\"\"\n", + "\n", + "# Define a function to test the performance of the calculus function.\n", + "\n", + "def test_subarray_sum(calculus_function ,n=1000, initial_seed=42, min_val=-10, max_val=10):\n", + " \"\"\"Test the performance of the given calculus function.\"\"\"\n", + " start_time = perf_counter()\n", + " result = calculus_function(n, initial_seed, min_val, max_val)\n", + " end_time = perf_counter()\n", + " print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + " print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\n", + "\n", + "# Execute function declarations.\n", + "exec(python_hard_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Maximum Subarray Sum (20 runs): 10980\n", + "Execution Time: 61.362418 seconds\n" + ] + } + ], + "source": [ + "# Run original python code and time it.\n", + "\n", + "test_subarray_sum(total_max_subarray_sum, 10000, 42, -10, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8", + "metadata": {}, + "outputs": [], + "source": [ + "# Request code optimization using GPT.\n", + "\n", + "optimization = optimize_gpt(python_hard_code, module_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b888d5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C CODE:\n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "\n", + "// LCG step with 32-bit wrap-around\n", + "static inline uint32_t lcg_next(uint32_t *state) {\n", + " *state = (uint32_t)(1664525u * (*state) + 1013904223u);\n", + " return *state;\n", + "}\n", + "\n", + "static inline int add_overflow_int64(int64_t a, int64_t b, int64_t *res) {\n", + " if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) return 1;\n", + " *res = a + b;\n", + " return 0;\n", + "}\n", + "\n", + "// Kadane for int64 array with overflow detection; returns PyLong or NULL (on overflow -> signal via *overflowed)\n", + "static PyObject* kadane_int64(const int64_t *arr, Py_ssize_t n, int *overflowed) {\n", + " if (n <= 0) {\n", + " return PyFloat_FromDouble(-INFINITY);\n", + " }\n", + " int64_t meh = arr[0];\n", + " int64_t msf = arr[0];\n", + " for (Py_ssize_t i = 1; i < n; ++i) {\n", + " int64_t x = arr[i];\n", + " if (meh > 0) {\n", + " int64_t tmp;\n", + " if (add_overflow_int64(meh, x, &tmp)) { *overflowed = 1; return NULL; }\n", + " meh = tmp;\n", + " } else {\n", + " meh = x;\n", + " }\n", + " if (meh > msf) msf = meh;\n", + " }\n", + " return PyLong_FromLongLong(msf);\n", + "}\n", + "\n", + "// Kadane for PyObject* integer array\n", + "static PyObject* kadane_big(PyObject **arr, Py_ssize_t n) {\n", + " if (n <= 0) {\n", + " return PyFloat_FromDouble(-INFINITY);\n", + " }\n", + " PyObject *meh = arr[0]; Py_INCREF(meh);\n", + " PyObject *msf = arr[0]; Py_INCREF(msf);\n", + " PyObject *zero = PyLong_FromLong(0);\n", + " if (!zero) { Py_DECREF(meh); Py_DECREF(msf); return NULL; }\n", + "\n", + " for (Py_ssize_t i = 1; i < n; ++i) {\n", + " int cmp = PyObject_RichCompareBool(meh, zero, Py_GT);\n", + " if (cmp < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n", + " if (cmp == 1) {\n", + " PyObject *t = PyNumber_Add(meh, arr[i]);\n", + " if (!t) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n", + " Py_DECREF(meh);\n", + " meh = t;\n", + " } else {\n", + " Py_DECREF(meh);\n", + " meh = arr[i]; Py_INCREF(meh);\n", + " }\n", + " int cmp2 = PyObject_RichCompareBool(meh, msf, Py_GT);\n", + " if (cmp2 < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }\n", + " if (cmp2 == 1) {\n", + " Py_DECREF(msf);\n", + " msf = meh; Py_INCREF(msf);\n", + " }\n", + " }\n", + " Py_DECREF(meh);\n", + " Py_DECREF(zero);\n", + " return msf; // new reference\n", + "}\n", + "\n", + "// Generate int64 array fast path; returns 0 on success\n", + "static int gen_array_int64(Py_ssize_t n, uint32_t seed, int64_t min_v, int64_t max_v, int64_t *out) {\n", + " uint32_t state = seed;\n", + " uint64_t umax = (uint64_t)max_v;\n", + " uint64_t umin = (uint64_t)min_v;\n", + " uint64_t range = (umax - umin) + 1ULL; // max>=min guaranteed by caller\n", + " for (Py_ssize_t i = 0; i < n; ++i) {\n", + " state = lcg_next(&state);\n", + " uint32_t r32 = state;\n", + " uint64_t r = (range > 0x100000000ULL) ? (uint64_t)r32 : ((uint64_t)r32 % range);\n", + " int64_t val = (int64_t)(min_v + (int64_t)r);\n", + " out[i] = val;\n", + " }\n", + " return 0;\n", + "}\n", + "\n", + "// Generate PyObject* int array general path using Python arithmetic\n", + "static PyObject** gen_array_big(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {\n", + " PyObject **arr = (PyObject**)PyMem_Malloc((n > 0 ? n : 1) * sizeof(PyObject*));\n", + " if (!arr) {\n", + " PyErr_NoMemory();\n", + " return NULL;\n", + " }\n", + " PyObject *one = PyLong_FromLong(1);\n", + " if (!one) { PyMem_Free(arr); return NULL; }\n", + " PyObject *diff = PyNumber_Subtract(max_val, min_val);\n", + " if (!diff) { Py_DECREF(one); PyMem_Free(arr); return NULL; }\n", + " PyObject *range_obj = PyNumber_Add(diff, one);\n", + " Py_DECREF(diff);\n", + " Py_DECREF(one);\n", + " if (!range_obj) { PyMem_Free(arr); return NULL; }\n", + "\n", + " uint32_t state = seed;\n", + " for (Py_ssize_t i = 0; i < n; ++i) {\n", + " state = lcg_next(&state);\n", + " PyObject *v = PyLong_FromUnsignedLong((unsigned long)state);\n", + " if (!v) {\n", + " Py_DECREF(range_obj);\n", + " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n", + " PyMem_Free(arr);\n", + " return NULL;\n", + " }\n", + " PyObject *r = PyNumber_Remainder(v, range_obj);\n", + " Py_DECREF(v);\n", + " if (!r) {\n", + " Py_DECREF(range_obj);\n", + " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n", + " PyMem_Free(arr);\n", + " return NULL;\n", + " }\n", + " PyObject *val = PyNumber_Add(r, min_val);\n", + " Py_DECREF(r);\n", + " if (!val) {\n", + " Py_DECREF(range_obj);\n", + " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);\n", + " PyMem_Free(arr);\n", + " return NULL;\n", + " }\n", + " arr[i] = val;\n", + " }\n", + " Py_DECREF(range_obj);\n", + " return arr;\n", + "}\n", + "\n", + "static PyObject* max_subarray_sum_internal(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {\n", + " if (n <= 0) {\n", + " return PyFloat_FromDouble(-INFINITY);\n", + " }\n", + "\n", + " if (PyLong_Check(min_val) && PyLong_Check(max_val)) {\n", + " int overflow1 = 0, overflow2 = 0;\n", + " long long min64 = PyLong_AsLongLongAndOverflow(min_val, &overflow1);\n", + " if (overflow1) goto BIGINT_PATH;\n", + " long long max64 = PyLong_AsLongLongAndOverflow(max_val, &overflow2);\n", + " if (overflow2) goto BIGINT_PATH;\n", + " if (max64 >= min64) {\n", + " int64_t *arr = (int64_t*)PyMem_Malloc((size_t)n * sizeof(int64_t));\n", + " if (!arr) { PyErr_NoMemory(); return NULL; }\n", + " if (gen_array_int64(n, seed, (int64_t)min64, (int64_t)max64, arr) != 0) {\n", + " PyMem_Free(arr);\n", + " return NULL;\n", + " }\n", + " int overflowed = 0;\n", + " PyObject *res = kadane_int64(arr, n, &overflowed);\n", + " if (!res && overflowed) {\n", + " // fallback to big-int Kadane\n", + " PyObject **arr_obj = (PyObject**)PyMem_Malloc((size_t)n * sizeof(PyObject*));\n", + " if (!arr_obj) { PyMem_Free(arr); PyErr_NoMemory(); return NULL; }\n", + " for (Py_ssize_t i = 0; i < n; ++i) {\n", + " arr_obj[i] = PyLong_FromLongLong(arr[i]);\n", + " if (!arr_obj[i]) {\n", + " for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr_obj[k]);\n", + " PyMem_Free(arr_obj);\n", + " PyMem_Free(arr);\n", + " return NULL;\n", + " }\n", + " }\n", + " PyObject *bires = kadane_big(arr_obj, n);\n", + " for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);\n", + " PyMem_Free(arr_obj);\n", + " PyMem_Free(arr);\n", + " return bires;\n", + " }\n", + " PyMem_Free(arr);\n", + " return res;\n", + " }\n", + " }\n", + "BIGINT_PATH: ;\n", + " PyObject **arr_obj = gen_array_big(n, seed, min_val, max_val);\n", + " if (!arr_obj) return NULL;\n", + " PyObject *res = kadane_big(arr_obj, n);\n", + " for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);\n", + " PyMem_Free(arr_obj);\n", + " return res;\n", + "}\n", + "\n", + "static PyObject* py_max_subarray_sum(PyObject *self, PyObject *args) {\n", + " Py_ssize_t n;\n", + " PyObject *seed_obj, *min_val, *max_val;\n", + " if (!PyArg_ParseTuple(args, \"nOOO\", &n, &seed_obj, &min_val, &max_val)) return NULL;\n", + " if (n < 0) n = 0;\n", + " uint32_t seed = (uint32_t)(PyLong_AsUnsignedLongLongMask(seed_obj) & 0xFFFFFFFFULL);\n", + " if (PyErr_Occurred()) return NULL;\n", + " return max_subarray_sum_internal(n, seed, min_val, max_val);\n", + "}\n", + "\n", + "static PyObject* py_total_max_subarray_sum(PyObject *self, PyObject *args) {\n", + " Py_ssize_t n;\n", + " PyObject *init_seed_obj, *min_val, *max_val;\n", + " if (!PyArg_ParseTuple(args, \"nOOO\", &n, &init_seed_obj, &min_val, &max_val)) return NULL;\n", + " if (n < 0) n = 0;\n", + " uint32_t state = (uint32_t)(PyLong_AsUnsignedLongLongMask(init_seed_obj) & 0xFFFFFFFFULL);\n", + " if (PyErr_Occurred()) return NULL;\n", + "\n", + " PyObject *total = PyLong_FromLong(0);\n", + " if (!total) return NULL;\n", + "\n", + " for (int i = 0; i < 20; ++i) {\n", + " uint32_t seed = lcg_next(&state);\n", + " PyObject *part = max_subarray_sum_internal(n, seed, min_val, max_val);\n", + " if (!part) { Py_DECREF(total); return NULL; }\n", + " PyObject *new_total = PyNumber_Add(total, part);\n", + " Py_DECREF(part);\n", + " if (!new_total) { Py_DECREF(total); return NULL; }\n", + " Py_DECREF(total);\n", + " total = new_total;\n", + " }\n", + " return total;\n", + "}\n", + "\n", + "static PyMethodDef module_methods[] = {\n", + " {\"max_subarray_sum\", (PyCFunction)py_max_subarray_sum, METH_VARARGS, \"Compute maximum subarray sum using LCG-generated array.\"},\n", + " {\"total_max_subarray_sum\", (PyCFunction)py_total_max_subarray_sum, METH_VARARGS, \"Compute total of maximum subarray sums over 20 LCG seeds.\"},\n", + " {NULL, NULL, 0, NULL}\n", + "};\n", + "\n", + "static struct PyModuleDef moduledef = {\n", + " PyModuleDef_HEAD_INIT,\n", + " \"python_hard\",\n", + " NULL,\n", + " -1,\n", + " module_methods,\n", + " NULL,\n", + " NULL,\n", + " NULL,\n", + " NULL\n", + "};\n", + "\n", + "PyMODINIT_FUNC PyInit_python_hard(void) {\n", + " return PyModule_Create(&moduledef);\n", + "}\n", + "\n", + "---------------------------\n", + "setup.py:\n", + "from setuptools import setup, Extension\n", + "import sys\n", + "\n", + "extra_compile_args = []\n", + "extra_link_args = []\n", + "if sys.platform == 'win32':\n", + " extra_compile_args = ['/O2', '/Ot', '/GL', '/fp:fast']\n", + " extra_link_args = ['/LTCG']\n", + "else:\n", + " extra_compile_args = ['-O3', '-march=native']\n", + "\n", + "module = Extension(\n", + " name='python_hard',\n", + " sources=['python_hard.c'],\n", + " extra_compile_args=extra_compile_args,\n", + " extra_link_args=extra_link_args,\n", + " language='c'\n", + ")\n", + "\n", + "setup(\n", + " name='python_hard',\n", + " version='1.0.0',\n", + " description='High-performance C extension reimplementation',\n", + " ext_modules=[module]\n", + ")\n", + "\n", + "---------------------------\n", + "USAGE:\n", + "import time\n", + "\n", + "# Original Python code\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + "\n", + "def max_subarray_sum_py(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum_py(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum_py(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Build and import extension (after running: python setup.py build && install or develop)\n", + "import python_hard as ext\n", + "\n", + "# Example parameters\n", + "n = 600\n", + "initial_seed = 12345678901234567890\n", + "min_val = -1000\n", + "max_val = 1000\n", + "\n", + "# Time Python\n", + "t0 = time.perf_counter()\n", + "py_res1 = max_subarray_sum_py(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)\n", + "t1 = time.perf_counter()\n", + "py_time1 = t1 - t0\n", + "\n", + "# Time C extension\n", + "t0 = time.perf_counter()\n", + "ext_res1 = ext.max_subarray_sum(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)\n", + "t1 = time.perf_counter()\n", + "ext_time1 = t1 - t0\n", + "\n", + "print('max_subarray_sum equality:', py_res1 == ext_res1)\n", + "print('Python time:', py_time1)\n", + "print('C ext time:', ext_time1)\n", + "\n", + "# Total over 20 seeds\n", + "t0 = time.perf_counter()\n", + "py_res2 = total_max_subarray_sum_py(n, initial_seed, min_val, max_val)\n", + "t1 = time.perf_counter()\n", + "py_time2 = t1 - t0\n", + "\n", + "t0 = time.perf_counter()\n", + "ext_res2 = ext.total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "t1 = time.perf_counter()\n", + "ext_time2 = t1 - t0\n", + "\n", + "print('total_max_subarray_sum equality:', py_res2 == ext_res2)\n", + "print('Python total time:', py_time2)\n", + "print('C ext total time:', ext_time2)\n", + "\n" + ] + } + ], + "source": [ + "# Print generated extension code.\n", + "\n", + "print_optimization(optimization)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "356969b8", + "metadata": {}, + "outputs": [], + "source": [ + "# Write the generated extension code to files.\n", + "# (Will overwrite existing files)\n", + "\n", + "write_optimization(optimization, module_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e0b3d073-88a2-40b2-831c-6f0c345c256f", + "metadata": {}, + "outputs": [], + "source": [ + "# Clean previous builds.\n", + "# (Make sure to run this cell before running the compile cell a second time only)\n", + "# (May cast errors if no previous build exists)\n", + "\n", + "!rm -r build/" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "29ae1993", + "metadata": {}, + "outputs": [], + "source": [ + "# [WINDOWS ONLY]\n", + "# Set COMSPEC to cmd.exe to avoid issues with some C compilers on Windows.\n", + "# (Remember to restore original COMSPEC after compilation and testing)\n", + "preset_comspec = os.environ.get(\"COMSPEC\")\n", + "os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "772328a7", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the C extension.\n", + "# (Will fail no C compiler is installed)\n", + "# (In case of errors, try directly on the command line)\n", + "\n", + "!python setup.py build_ext --inplace" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "4186a19e", + "metadata": {}, + "outputs": [], + "source": [ + "# [WINDOWS ONLY]\n", + "# Restore original COMSPEC.\n", + "\n", + "os.environ[\"COMSPEC\"] = preset_comspec" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "64899bb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "max_subarray_sum equality: True\n", + "Python time: 0.010335999992094003\n", + "C ext time: 1.4399993233382702e-05\n", + "total_max_subarray_sum equality: True\n", + "Python total time: 0.21065390000876505\n", + "C ext total time: 0.00012310000602155924\n" + ] + } + ], + "source": [ + "# Run the usage example to test the compiled C extension.\n", + "exec(optimization.usage)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ee4f8f62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing original Python code:\n", + "Total Maximum Subarray Sum (20 runs): 10980\n", + "Execution Time: 57.275276 seconds\n", + "Testing C extension code:\n", + "Total Maximum Subarray Sum (20 runs): 10980\n", + "Execution Time: 0.002317 seconds\n" + ] + } + ], + "source": [ + "# Import newly created C extension and compare performance with original Python code.\n", + "\n", + "from python_hard import total_max_subarray_sum as c_total_max_subarray_sum\n", + "\n", + "print(\"Testing original Python code:\")\n", + "test_subarray_sum(total_max_subarray_sum, 10000, 42, -10, 10)\n", + "print(\"Testing C extension code:\")\n", + "test_subarray_sum(c_total_max_subarray_sum, 10000, 42, -10, 10)" + ] + }, + { + "cell_type": "markdown", + "id": "85945330", + "metadata": {}, + "source": [ + "# Let's build a Gradio service" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to call the optimization process and return the generated codes.\n", + "\n", + "def optimize(python_code, module_name, model):\n", + " \"\"\"Call the optimization process and return the generated codes.\"\"\"\n", + " optimization = optimize_gpt(python_code, module_name, model)\n", + " return optimization.c_code, optimization.setup, optimization.usage" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "19bf2bff-a822-4009-a539-f003b1651383", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to execute Python code and capture its output.\n", + "\n", + "def execute_python(code):\n", + " \"\"\"Execute the given Python code and capture its output.\"\"\"\n", + " try:\n", + " output = io.StringIO()\n", + " sys.stdout = output\n", + " exec(code)\n", + " finally:\n", + " sys.stdout = sys.__stdout__\n", + " return output.getvalue()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "5fd83307", + "metadata": {}, + "outputs": [], + "source": [ + "# Extension compilation function.\n", + "\n", + "def build_extension():\n", + " \"\"\"Compile the C extension using 'setup.py' and return the compilation output.\"\"\"\n", + " # Set default COMSPEC to cmd.exe on Windows to avoid issues with some C compilers.\n", + " preset_comspec = os.environ.get(\"COMSPEC\")\n", + " os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\"\n", + " try:\n", + " compile_cmd = [\"python\", \"setup.py\", \"build_ext\", \"--inplace\"]\n", + " compile_result = subprocess.run(compile_cmd, env=os.environ,\n", + " check=True, text=True, capture_output=True)\n", + " except subprocess.CalledProcessError as ex:\n", + " raise Exception(f\"An error occurred while building:\\n{ex.stdout}\\n{ex.stderr}\")\n", + " finally: # The 'finally' clauses executes always whether there was an exception or not.\n", + " # Restore original COMSPEC.\n", + " os.environ[\"COMSPEC\"] = preset_comspec\n", + " return compile_result.stdout" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "77f3ab5d-fcfb-4d3f-8728-9cacbf833ea6", + "metadata": {}, + "outputs": [], + "source": [ + "# Extension compilation function.\n", + "\n", + "def generate_extension(c_code, setup_code, usage_code, module_name):\n", + " \"\"\"Build and install the C extension from the provided codes.\"\"\"\n", + " try: # Write the provided codes to their respective files.\n", + " write_file(c_code, f\"{module_name}.c\")\n", + " write_file(setup_code, \"setup.py\")\n", + " except Exception as ex:\n", + " return f\"An error occurred while writing files:\\n{ex}\"\n", + " # Build the extension and capture the output.\n", + " try:\n", + " build_output = build_extension()\n", + " except Exception as ex: # If build fails, return the error message.\n", + " return str(ex)\n", + " # Return the combined output of build and install processes.\n", + " return build_output" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "51f78871", + "metadata": {}, + "outputs": [], + "source": [ + "# Extension testing function.\n", + "\n", + "def test_extension(usage_code):\n", + " \"\"\"Test the installed C extension by executing the provided usage code and capturing its output.\"\"\"\n", + " try: # Write the provided codes to their respective files.\n", + " write_file(usage_code, \"usage_example.py\")\n", + " except Exception as ex:\n", + " return f\"An error occurred while writing test file:\\n{ex}\"\n", + " try:\n", + " output = execute_python(usage_code)\n", + " except Exception as ex:\n", + " return f\"An error occurred while testing the extension:\\n{ex}\"\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442", + "metadata": {}, + "outputs": [], + "source": [ + "# Define custom CSS for Gradio interface.\n", + "\n", + "css = \"\"\"\n", + ".python {background-color: #306998;}\n", + ".c_ext {background-color: #050;}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c7c8f5fc", + "metadata": {}, + "outputs": [], + "source": [ + "# Define default codes for the interface.\n", + "\n", + "default_p_code = \"\"\"\n", + "def hello_world():\n", + " print(\"Hello, World!\")\n", + "\"\"\"\n", + "# default_p_code = python_hard_code # Run the declaration cell before use.\n", + "# default_p_code = calculate_pi_code # Run the declaration cell before use.\n", + "\n", + "default_c_code = r\"\"\"\n", + "#include \n", + "\n", + "// Function to be called from Python\n", + "static PyObject* zz_hello_world(PyObject* self, PyObject* args) {\n", + " printf(\"Hello, World!\\n\");\n", + " Py_RETURN_NONE;\n", + "}\n", + "\n", + "// Method definition structure\n", + "static PyMethodDef zz_my_methods[] = {\n", + " {\"hello_world\", zz_hello_world, METH_VARARGS, \"Print 'Hello, World!'\"},\n", + " {NULL, NULL, 0, NULL} // Sentinel\n", + "};\n", + "\n", + "// Module definition\n", + "static struct PyModuleDef zz_my_module = {\n", + " PyModuleDef_HEAD_INIT,\n", + " \"zz_my_module\",\n", + " \"Extension module that prints Hello, World!\",\n", + " -1,\n", + " zz_my_methods\n", + "};\n", + "\n", + "// Module initialization function\n", + "PyMODINIT_FUNC PyInit_zz_my_module(void) {\n", + " return PyModule_Create(&zz_my_module);\n", + "}\n", + "\"\"\"\n", + "\n", + "default_setup = \"\"\"\n", + "from setuptools import setup, Extension\n", + "\n", + "module = Extension(\n", + " 'zz_my_module',\n", + " sources=['zz_my_module.c'],\n", + ")\n", + "\n", + "setup(\n", + " name='zz_my_module',\n", + " version='1.0',\n", + " description='This is a custom C extension module.',\n", + " ext_modules=[module]\n", + ")\n", + "\"\"\"\n", + "\n", + "default_test = \"\"\"\n", + "import time\n", + "import zz_my_module\n", + "\n", + "def python_hello_world():\n", + " print(\"Hello, World!\")\n", + "\n", + "start = time.time()\n", + "python_hello_world()\n", + "end = time.time()\n", + "print(f\"Python function execution time: {end - start:.6f} seconds\")\n", + "\n", + "start = time.time()\n", + "zz_my_module.hello_world()\n", + "end = time.time()\n", + "print(f\"C extension execution time: {end - start:.6f} seconds\")\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "fa64577a", + "metadata": {}, + "outputs": [], + "source": [ + "# We will use gradio auto reload feature, this way we do not need to restart the app to see changes in the code.\n", + "# * https://www.gradio.app/guides/developing-faster-with-reload-mode\n", + "\n", + "%load_ext gradio\n", + "\n", + "# This mandatory requires naming the 'gr.Blocks' interface as 'demo'.\n", + "# Now, each time we edit the code, we just need to re-run Gradio interface cell to see the changes in the app.\n", + "# The '.launch()' method is not needed anymore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1303932-160c-424b-97a8-d28c816721b2", + "metadata": {}, + "outputs": [], + "source": [ + "%%blocks\n", + "\n", + "with gr.Blocks(css=css) as demo:\n", + " gr.Markdown(\"## Convert code from Python to C++\")\n", + " with gr.Row():\n", + " module_name = gr.Textbox(label=\"Module name:\", lines=1, value=\"zz_my_module\")\n", + " model = gr.Dropdown([\"gpt-4o\", \"gpt-5\"], label=\"Select model\", value=\"gpt-4o\")\n", + " with gr.Row():\n", + " python = gr.Textbox(label=\"Python code:\", lines=30, value=default_p_code, elem_classes=[\"python\"])\n", + " c_code = gr.Textbox(label=\"C Extension code:\", lines=30, value=default_c_code, elem_classes=[\"c_ext\"])\n", + " with gr.Row():\n", + " get_extension = gr.Button(\"Generate extension code\")\n", + " with gr.Row():\n", + " setup_code = gr.Textbox(label=\"Compilation code:\", lines=10, value=default_setup, elem_classes=[\"python\"])\n", + " usage_code = gr.Textbox(label=\"Test compare code:\", lines=10, value=default_test, elem_classes=[\"python\"])\n", + " with gr.Row():\n", + " compile_ext = gr.Button(\"Compile extension\")\n", + " with gr.Row():\n", + " c_ext_out = gr.TextArea(label=\"C Extension result:\", elem_classes=[\"c_ext\"])\n", + " with gr.Row():\n", + " test_run = gr.Button(\"Test code\")\n", + " with gr.Row():\n", + " test_out = gr.TextArea(label=\"Test result:\", elem_classes=[\"python\"])\n", + "\n", + " get_extension.click(optimize, inputs=[python, module_name, model], outputs=[c_code, setup_code, usage_code])\n", + " compile_ext.click(generate_extension, inputs=[c_code, setup_code, usage_code, module_name ], outputs=[c_ext_out])\n", + " test_run.click(test_extension, inputs=[usage_code], outputs=[test_out])\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm-engineering-pYAzjokc", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week4/community-contributions/c_extension_generator/python_hard.c b/week4/community-contributions/c_extension_generator/python_hard.c new file mode 100644 index 0000000..67968ea --- /dev/null +++ b/week4/community-contributions/c_extension_generator/python_hard.c @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include + +// LCG step with 32-bit wrap-around +static inline uint32_t lcg_next(uint32_t *state) { + *state = (uint32_t)(1664525u * (*state) + 1013904223u); + return *state; +} + +static inline int add_overflow_int64(int64_t a, int64_t b, int64_t *res) { + if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) return 1; + *res = a + b; + return 0; +} + +// Kadane for int64 array with overflow detection; returns PyLong or NULL (on overflow -> signal via *overflowed) +static PyObject* kadane_int64(const int64_t *arr, Py_ssize_t n, int *overflowed) { + if (n <= 0) { + return PyFloat_FromDouble(-INFINITY); + } + int64_t meh = arr[0]; + int64_t msf = arr[0]; + for (Py_ssize_t i = 1; i < n; ++i) { + int64_t x = arr[i]; + if (meh > 0) { + int64_t tmp; + if (add_overflow_int64(meh, x, &tmp)) { *overflowed = 1; return NULL; } + meh = tmp; + } else { + meh = x; + } + if (meh > msf) msf = meh; + } + return PyLong_FromLongLong(msf); +} + +// Kadane for PyObject* integer array +static PyObject* kadane_big(PyObject **arr, Py_ssize_t n) { + if (n <= 0) { + return PyFloat_FromDouble(-INFINITY); + } + PyObject *meh = arr[0]; Py_INCREF(meh); + PyObject *msf = arr[0]; Py_INCREF(msf); + PyObject *zero = PyLong_FromLong(0); + if (!zero) { Py_DECREF(meh); Py_DECREF(msf); return NULL; } + + for (Py_ssize_t i = 1; i < n; ++i) { + int cmp = PyObject_RichCompareBool(meh, zero, Py_GT); + if (cmp < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; } + if (cmp == 1) { + PyObject *t = PyNumber_Add(meh, arr[i]); + if (!t) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; } + Py_DECREF(meh); + meh = t; + } else { + Py_DECREF(meh); + meh = arr[i]; Py_INCREF(meh); + } + int cmp2 = PyObject_RichCompareBool(meh, msf, Py_GT); + if (cmp2 < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; } + if (cmp2 == 1) { + Py_DECREF(msf); + msf = meh; Py_INCREF(msf); + } + } + Py_DECREF(meh); + Py_DECREF(zero); + return msf; // new reference +} + +// Generate int64 array fast path; returns 0 on success +static int gen_array_int64(Py_ssize_t n, uint32_t seed, int64_t min_v, int64_t max_v, int64_t *out) { + uint32_t state = seed; + uint64_t umax = (uint64_t)max_v; + uint64_t umin = (uint64_t)min_v; + uint64_t range = (umax - umin) + 1ULL; // max>=min guaranteed by caller + for (Py_ssize_t i = 0; i < n; ++i) { + state = lcg_next(&state); + uint32_t r32 = state; + uint64_t r = (range > 0x100000000ULL) ? (uint64_t)r32 : ((uint64_t)r32 % range); + int64_t val = (int64_t)(min_v + (int64_t)r); + out[i] = val; + } + return 0; +} + +// Generate PyObject* int array general path using Python arithmetic +static PyObject** gen_array_big(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) { + PyObject **arr = (PyObject**)PyMem_Malloc((n > 0 ? n : 1) * sizeof(PyObject*)); + if (!arr) { + PyErr_NoMemory(); + return NULL; + } + PyObject *one = PyLong_FromLong(1); + if (!one) { PyMem_Free(arr); return NULL; } + PyObject *diff = PyNumber_Subtract(max_val, min_val); + if (!diff) { Py_DECREF(one); PyMem_Free(arr); return NULL; } + PyObject *range_obj = PyNumber_Add(diff, one); + Py_DECREF(diff); + Py_DECREF(one); + if (!range_obj) { PyMem_Free(arr); return NULL; } + + uint32_t state = seed; + for (Py_ssize_t i = 0; i < n; ++i) { + state = lcg_next(&state); + PyObject *v = PyLong_FromUnsignedLong((unsigned long)state); + if (!v) { + Py_DECREF(range_obj); + for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]); + PyMem_Free(arr); + return NULL; + } + PyObject *r = PyNumber_Remainder(v, range_obj); + Py_DECREF(v); + if (!r) { + Py_DECREF(range_obj); + for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]); + PyMem_Free(arr); + return NULL; + } + PyObject *val = PyNumber_Add(r, min_val); + Py_DECREF(r); + if (!val) { + Py_DECREF(range_obj); + for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]); + PyMem_Free(arr); + return NULL; + } + arr[i] = val; + } + Py_DECREF(range_obj); + return arr; +} + +static PyObject* max_subarray_sum_internal(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) { + if (n <= 0) { + return PyFloat_FromDouble(-INFINITY); + } + + if (PyLong_Check(min_val) && PyLong_Check(max_val)) { + int overflow1 = 0, overflow2 = 0; + long long min64 = PyLong_AsLongLongAndOverflow(min_val, &overflow1); + if (overflow1) goto BIGINT_PATH; + long long max64 = PyLong_AsLongLongAndOverflow(max_val, &overflow2); + if (overflow2) goto BIGINT_PATH; + if (max64 >= min64) { + int64_t *arr = (int64_t*)PyMem_Malloc((size_t)n * sizeof(int64_t)); + if (!arr) { PyErr_NoMemory(); return NULL; } + if (gen_array_int64(n, seed, (int64_t)min64, (int64_t)max64, arr) != 0) { + PyMem_Free(arr); + return NULL; + } + int overflowed = 0; + PyObject *res = kadane_int64(arr, n, &overflowed); + if (!res && overflowed) { + // fallback to big-int Kadane + PyObject **arr_obj = (PyObject**)PyMem_Malloc((size_t)n * sizeof(PyObject*)); + if (!arr_obj) { PyMem_Free(arr); PyErr_NoMemory(); return NULL; } + for (Py_ssize_t i = 0; i < n; ++i) { + arr_obj[i] = PyLong_FromLongLong(arr[i]); + if (!arr_obj[i]) { + for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr_obj[k]); + PyMem_Free(arr_obj); + PyMem_Free(arr); + return NULL; + } + } + PyObject *bires = kadane_big(arr_obj, n); + for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]); + PyMem_Free(arr_obj); + PyMem_Free(arr); + return bires; + } + PyMem_Free(arr); + return res; + } + } +BIGINT_PATH: ; + PyObject **arr_obj = gen_array_big(n, seed, min_val, max_val); + if (!arr_obj) return NULL; + PyObject *res = kadane_big(arr_obj, n); + for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]); + PyMem_Free(arr_obj); + return res; +} + +static PyObject* py_max_subarray_sum(PyObject *self, PyObject *args) { + Py_ssize_t n; + PyObject *seed_obj, *min_val, *max_val; + if (!PyArg_ParseTuple(args, "nOOO", &n, &seed_obj, &min_val, &max_val)) return NULL; + if (n < 0) n = 0; + uint32_t seed = (uint32_t)(PyLong_AsUnsignedLongLongMask(seed_obj) & 0xFFFFFFFFULL); + if (PyErr_Occurred()) return NULL; + return max_subarray_sum_internal(n, seed, min_val, max_val); +} + +static PyObject* py_total_max_subarray_sum(PyObject *self, PyObject *args) { + Py_ssize_t n; + PyObject *init_seed_obj, *min_val, *max_val; + if (!PyArg_ParseTuple(args, "nOOO", &n, &init_seed_obj, &min_val, &max_val)) return NULL; + if (n < 0) n = 0; + uint32_t state = (uint32_t)(PyLong_AsUnsignedLongLongMask(init_seed_obj) & 0xFFFFFFFFULL); + if (PyErr_Occurred()) return NULL; + + PyObject *total = PyLong_FromLong(0); + if (!total) return NULL; + + for (int i = 0; i < 20; ++i) { + uint32_t seed = lcg_next(&state); + PyObject *part = max_subarray_sum_internal(n, seed, min_val, max_val); + if (!part) { Py_DECREF(total); return NULL; } + PyObject *new_total = PyNumber_Add(total, part); + Py_DECREF(part); + if (!new_total) { Py_DECREF(total); return NULL; } + Py_DECREF(total); + total = new_total; + } + return total; +} + +static PyMethodDef module_methods[] = { + {"max_subarray_sum", (PyCFunction)py_max_subarray_sum, METH_VARARGS, "Compute maximum subarray sum using LCG-generated array."}, + {"total_max_subarray_sum", (PyCFunction)py_total_max_subarray_sum, METH_VARARGS, "Compute total of maximum subarray sums over 20 LCG seeds."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "python_hard", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC PyInit_python_hard(void) { + return PyModule_Create(&moduledef); +} diff --git a/week4/community-contributions/c_extension_generator/setup_calculate_pi.py b/week4/community-contributions/c_extension_generator/setup_calculate_pi.py new file mode 100644 index 0000000..ecb4067 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/setup_calculate_pi.py @@ -0,0 +1,25 @@ +from setuptools import setup, Extension +import sys +import os + +extra_compile_args = [] +extra_link_args = [] + +if os.name == 'nt': + extra_compile_args.extend(['/O2', '/fp:precise']) +else: + extra_compile_args.extend(['-O3', '-fno-strict-aliasing']) + +module = Extension( + 'calculate_pi', + sources=['calculate_pi.c'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, +) + +setup( + name='calculate_pi', + version='1.0.0', + description='High-performance C extension for computing pi via the Leibniz series', + ext_modules=[module], +) diff --git a/week4/community-contributions/c_extension_generator/setup_python_hard.py b/week4/community-contributions/c_extension_generator/setup_python_hard.py new file mode 100644 index 0000000..20d3d3f --- /dev/null +++ b/week4/community-contributions/c_extension_generator/setup_python_hard.py @@ -0,0 +1,25 @@ +from setuptools import setup, Extension +import sys + +extra_compile_args = [] +extra_link_args = [] +if sys.platform == 'win32': + extra_compile_args = ['/O2', '/Ot', '/GL', '/fp:fast'] + extra_link_args = ['/LTCG'] +else: + extra_compile_args = ['-O3', '-march=native'] + +module = Extension( + name='python_hard', + sources=['python_hard.c'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + language='c' +) + +setup( + name='python_hard', + version='1.0.0', + description='High-performance C extension reimplementation', + ext_modules=[module] +) diff --git a/week4/community-contributions/c_extension_generator/setup_zz_my_module.py b/week4/community-contributions/c_extension_generator/setup_zz_my_module.py new file mode 100644 index 0000000..bb0d27b --- /dev/null +++ b/week4/community-contributions/c_extension_generator/setup_zz_my_module.py @@ -0,0 +1,14 @@ + +from setuptools import setup, Extension + +module = Extension( + 'zz_my_module', + sources=['zz_my_module.c'], +) + +setup( + name='zz_my_module', + version='1.0', + description='This is a custom C extension module.', + ext_modules=[module] +) diff --git a/week4/community-contributions/c_extension_generator/usage_example_calculate_pi.py b/week4/community-contributions/c_extension_generator/usage_example_calculate_pi.py new file mode 100644 index 0000000..c3cc418 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/usage_example_calculate_pi.py @@ -0,0 +1,38 @@ +# Build first: python setup.py build_ext --inplace +import time +import math +import calculate_pi + +# Original Python implementation +def py_leibniz_pi(iterations): + result = 1.0 + for i in range(1, iterations + 1): + j = i * 4 - 1 + result -= (1 / j) + j = i * 4 + 1 + result += (1 / j) + return result * 4 + +iters = 5_000_000 + +# Warm-up +calculate_pi.leibniz_pi(10) +py_leibniz_pi(10) + +start = time.perf_counter() +res_c = calculate_pi.leibniz_pi(iters) +end = time.perf_counter() +ctime = end - start + +start = time.perf_counter() +res_py = py_leibniz_pi(iters) +end = time.perf_counter() +pytime = end - start + +print(f"Iterations: {iters}") +print(f"C extension result: {res_c}") +print(f"Python result: {res_py}") +print(f"Absolute difference: {abs(res_c - res_py)}") +print(f"C extension time: {ctime:.6f} s") +print(f"Python time: {pytime:.6f} s") +print(f"Speedup: {pytime/ctime if ctime > 0 else float('inf'):.2f}x") diff --git a/week4/community-contributions/c_extension_generator/usage_example_python_hard.py b/week4/community-contributions/c_extension_generator/usage_example_python_hard.py new file mode 100644 index 0000000..552b0b5 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/usage_example_python_hard.py @@ -0,0 +1,69 @@ +import time + +# Original Python code + +def lcg(seed, a=1664525, c=1013904223, m=2**32): + value = seed + while True: + value = (a * value + c) % m + yield value + +def max_subarray_sum_py(n, seed, min_val, max_val): + lcg_gen = lcg(seed) + random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)] + max_sum = float('-inf') + for i in range(n): + current_sum = 0 + for j in range(i, n): + current_sum += random_numbers[j] + if current_sum > max_sum: + max_sum = current_sum + return max_sum + +def total_max_subarray_sum_py(n, initial_seed, min_val, max_val): + total_sum = 0 + lcg_gen = lcg(initial_seed) + for _ in range(20): + seed = next(lcg_gen) + total_sum += max_subarray_sum_py(n, seed, min_val, max_val) + return total_sum + +# Build and import extension (after running: python setup.py build && install or develop) +import python_hard as ext + +# Example parameters +n = 600 +initial_seed = 12345678901234567890 +min_val = -1000 +max_val = 1000 + +# Time Python +t0 = time.perf_counter() +py_res1 = max_subarray_sum_py(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val) +t1 = time.perf_counter() +py_time1 = t1 - t0 + +# Time C extension +t0 = time.perf_counter() +ext_res1 = ext.max_subarray_sum(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val) +t1 = time.perf_counter() +ext_time1 = t1 - t0 + +print('max_subarray_sum equality:', py_res1 == ext_res1) +print('Python time:', py_time1) +print('C ext time:', ext_time1) + +# Total over 20 seeds +t0 = time.perf_counter() +py_res2 = total_max_subarray_sum_py(n, initial_seed, min_val, max_val) +t1 = time.perf_counter() +py_time2 = t1 - t0 + +t0 = time.perf_counter() +ext_res2 = ext.total_max_subarray_sum(n, initial_seed, min_val, max_val) +t1 = time.perf_counter() +ext_time2 = t1 - t0 + +print('total_max_subarray_sum equality:', py_res2 == ext_res2) +print('Python total time:', py_time2) +print('C ext total time:', ext_time2) diff --git a/week4/community-contributions/c_extension_generator/usage_example_zz_my_module.py b/week4/community-contributions/c_extension_generator/usage_example_zz_my_module.py new file mode 100644 index 0000000..6404cd3 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/usage_example_zz_my_module.py @@ -0,0 +1,16 @@ + +import time +import zz_my_module + +def python_hello_world(): + print("Hello, World!") + +start = time.time() +python_hello_world() +end = time.time() +print(f"Python function execution time: {end - start:.6f} seconds") + +start = time.time() +zz_my_module.hello_world() +end = time.time() +print(f"C extension execution time: {end - start:.6f} seconds") diff --git a/week4/community-contributions/c_extension_generator/zz_my_module.c b/week4/community-contributions/c_extension_generator/zz_my_module.c new file mode 100644 index 0000000..2593233 --- /dev/null +++ b/week4/community-contributions/c_extension_generator/zz_my_module.c @@ -0,0 +1,28 @@ + +#include + +// Function to be called from Python +static PyObject* zz_hello_world(PyObject* self, PyObject* args) { + printf("Hello, World!\n"); + Py_RETURN_NONE; +} + +// Method definition structure +static PyMethodDef zz_my_methods[] = { + {"hello_world", zz_hello_world, METH_VARARGS, "Print 'Hello, World!'"}, + {NULL, NULL, 0, NULL} // Sentinel +}; + +// Module definition +static struct PyModuleDef zz_my_module = { + PyModuleDef_HEAD_INIT, + "zz_my_module", + "Extension module that prints Hello, World!", + -1, + zz_my_methods +}; + +// Module initialization function +PyMODINIT_FUNC PyInit_zz_my_module(void) { + return PyModule_Create(&zz_my_module); +}