Added sample files generated on the three different scenarios.

Including C code, setup.py and run example code for:
* Pi calculation with Leibniz formula.
* Max sub-array calculation. (Aka: Python Hard)
* Default for Gradio: Hello world. (Aka: _zz_my_module)
This commit is contained in:
Carlos Bazaga
2025-09-01 01:37:59 +02:00
parent e31e5c8f72
commit 78a3ee10b2
9 changed files with 542 additions and 0 deletions

View File

@@ -0,0 +1,83 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <stdint.h>
static PyObject* leibniz_pi(PyObject* self, PyObject* args) {
PyObject* iterations_obj;
if (!PyArg_ParseTuple(args, "O", &iterations_obj)) {
return NULL;
}
long long n_signed;
int overflow = 0;
n_signed = PyLong_AsLongLongAndOverflow(iterations_obj, &overflow);
if (n_signed == -1 && PyErr_Occurred() && overflow == 0) {
return NULL;
}
unsigned long long n = 0ULL;
if (overflow < 0) {
n = 0ULL;
} else if (overflow > 0) {
unsigned long long tmp = PyLong_AsUnsignedLongLong(iterations_obj);
if (tmp == (unsigned long long)-1 && PyErr_Occurred()) {
return NULL;
}
n = tmp;
} else {
if (n_signed <= 0) {
n = 0ULL;
} else {
n = (unsigned long long)n_signed;
}
}
double result = 1.0;
if (n == 0ULL) {
return PyFloat_FromDouble(result * 4.0);
}
Py_BEGIN_ALLOW_THREADS
for (unsigned long long i = 1ULL; i <= n; ++i) {
double jd1;
if (i <= ULLONG_MAX / 4ULL) {
unsigned long long j1 = i * 4ULL - 1ULL;
jd1 = (double)j1;
} else {
jd1 = (double)i * 4.0 - 1.0;
}
result -= 1.0 / jd1;
double jd2;
if (i <= (ULLONG_MAX - 1ULL) / 4ULL) {
unsigned long long j2 = i * 4ULL + 1ULL;
jd2 = (double)j2;
} else {
jd2 = (double)i * 4.0 + 1.0;
}
result += 1.0 / jd2;
}
Py_END_ALLOW_THREADS
return PyFloat_FromDouble(result * 4.0);
}
static PyMethodDef CalculatePiMethods[] = {
{"leibniz_pi", leibniz_pi, METH_VARARGS, "Compute pi using the Leibniz series with the given number of iterations."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef calculate_pimodule = {
PyModuleDef_HEAD_INIT,
"calculate_pi",
"High-performance Leibniz pi calculation.",
-1,
CalculatePiMethods
};
PyMODINIT_FUNC PyInit_calculate_pi(void) {
return PyModule_Create(&calculate_pimodule);
}

View File

@@ -0,0 +1,244 @@
#include <Python.h>
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
// LCG step with 32-bit wrap-around
static inline uint32_t lcg_next(uint32_t *state) {
*state = (uint32_t)(1664525u * (*state) + 1013904223u);
return *state;
}
static inline int add_overflow_int64(int64_t a, int64_t b, int64_t *res) {
if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) return 1;
*res = a + b;
return 0;
}
// Kadane for int64 array with overflow detection; returns PyLong or NULL (on overflow -> signal via *overflowed)
static PyObject* kadane_int64(const int64_t *arr, Py_ssize_t n, int *overflowed) {
if (n <= 0) {
return PyFloat_FromDouble(-INFINITY);
}
int64_t meh = arr[0];
int64_t msf = arr[0];
for (Py_ssize_t i = 1; i < n; ++i) {
int64_t x = arr[i];
if (meh > 0) {
int64_t tmp;
if (add_overflow_int64(meh, x, &tmp)) { *overflowed = 1; return NULL; }
meh = tmp;
} else {
meh = x;
}
if (meh > msf) msf = meh;
}
return PyLong_FromLongLong(msf);
}
// Kadane for PyObject* integer array
static PyObject* kadane_big(PyObject **arr, Py_ssize_t n) {
if (n <= 0) {
return PyFloat_FromDouble(-INFINITY);
}
PyObject *meh = arr[0]; Py_INCREF(meh);
PyObject *msf = arr[0]; Py_INCREF(msf);
PyObject *zero = PyLong_FromLong(0);
if (!zero) { Py_DECREF(meh); Py_DECREF(msf); return NULL; }
for (Py_ssize_t i = 1; i < n; ++i) {
int cmp = PyObject_RichCompareBool(meh, zero, Py_GT);
if (cmp < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }
if (cmp == 1) {
PyObject *t = PyNumber_Add(meh, arr[i]);
if (!t) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }
Py_DECREF(meh);
meh = t;
} else {
Py_DECREF(meh);
meh = arr[i]; Py_INCREF(meh);
}
int cmp2 = PyObject_RichCompareBool(meh, msf, Py_GT);
if (cmp2 < 0) { Py_DECREF(meh); Py_DECREF(msf); Py_DECREF(zero); return NULL; }
if (cmp2 == 1) {
Py_DECREF(msf);
msf = meh; Py_INCREF(msf);
}
}
Py_DECREF(meh);
Py_DECREF(zero);
return msf; // new reference
}
// Generate int64 array fast path; returns 0 on success
static int gen_array_int64(Py_ssize_t n, uint32_t seed, int64_t min_v, int64_t max_v, int64_t *out) {
uint32_t state = seed;
uint64_t umax = (uint64_t)max_v;
uint64_t umin = (uint64_t)min_v;
uint64_t range = (umax - umin) + 1ULL; // max>=min guaranteed by caller
for (Py_ssize_t i = 0; i < n; ++i) {
state = lcg_next(&state);
uint32_t r32 = state;
uint64_t r = (range > 0x100000000ULL) ? (uint64_t)r32 : ((uint64_t)r32 % range);
int64_t val = (int64_t)(min_v + (int64_t)r);
out[i] = val;
}
return 0;
}
// Generate PyObject* int array general path using Python arithmetic
static PyObject** gen_array_big(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {
PyObject **arr = (PyObject**)PyMem_Malloc((n > 0 ? n : 1) * sizeof(PyObject*));
if (!arr) {
PyErr_NoMemory();
return NULL;
}
PyObject *one = PyLong_FromLong(1);
if (!one) { PyMem_Free(arr); return NULL; }
PyObject *diff = PyNumber_Subtract(max_val, min_val);
if (!diff) { Py_DECREF(one); PyMem_Free(arr); return NULL; }
PyObject *range_obj = PyNumber_Add(diff, one);
Py_DECREF(diff);
Py_DECREF(one);
if (!range_obj) { PyMem_Free(arr); return NULL; }
uint32_t state = seed;
for (Py_ssize_t i = 0; i < n; ++i) {
state = lcg_next(&state);
PyObject *v = PyLong_FromUnsignedLong((unsigned long)state);
if (!v) {
Py_DECREF(range_obj);
for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);
PyMem_Free(arr);
return NULL;
}
PyObject *r = PyNumber_Remainder(v, range_obj);
Py_DECREF(v);
if (!r) {
Py_DECREF(range_obj);
for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);
PyMem_Free(arr);
return NULL;
}
PyObject *val = PyNumber_Add(r, min_val);
Py_DECREF(r);
if (!val) {
Py_DECREF(range_obj);
for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr[k]);
PyMem_Free(arr);
return NULL;
}
arr[i] = val;
}
Py_DECREF(range_obj);
return arr;
}
static PyObject* max_subarray_sum_internal(Py_ssize_t n, uint32_t seed, PyObject *min_val, PyObject *max_val) {
if (n <= 0) {
return PyFloat_FromDouble(-INFINITY);
}
if (PyLong_Check(min_val) && PyLong_Check(max_val)) {
int overflow1 = 0, overflow2 = 0;
long long min64 = PyLong_AsLongLongAndOverflow(min_val, &overflow1);
if (overflow1) goto BIGINT_PATH;
long long max64 = PyLong_AsLongLongAndOverflow(max_val, &overflow2);
if (overflow2) goto BIGINT_PATH;
if (max64 >= min64) {
int64_t *arr = (int64_t*)PyMem_Malloc((size_t)n * sizeof(int64_t));
if (!arr) { PyErr_NoMemory(); return NULL; }
if (gen_array_int64(n, seed, (int64_t)min64, (int64_t)max64, arr) != 0) {
PyMem_Free(arr);
return NULL;
}
int overflowed = 0;
PyObject *res = kadane_int64(arr, n, &overflowed);
if (!res && overflowed) {
// fallback to big-int Kadane
PyObject **arr_obj = (PyObject**)PyMem_Malloc((size_t)n * sizeof(PyObject*));
if (!arr_obj) { PyMem_Free(arr); PyErr_NoMemory(); return NULL; }
for (Py_ssize_t i = 0; i < n; ++i) {
arr_obj[i] = PyLong_FromLongLong(arr[i]);
if (!arr_obj[i]) {
for (Py_ssize_t k = 0; k < i; ++k) Py_DECREF(arr_obj[k]);
PyMem_Free(arr_obj);
PyMem_Free(arr);
return NULL;
}
}
PyObject *bires = kadane_big(arr_obj, n);
for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);
PyMem_Free(arr_obj);
PyMem_Free(arr);
return bires;
}
PyMem_Free(arr);
return res;
}
}
BIGINT_PATH: ;
PyObject **arr_obj = gen_array_big(n, seed, min_val, max_val);
if (!arr_obj) return NULL;
PyObject *res = kadane_big(arr_obj, n);
for (Py_ssize_t i = 0; i < n; ++i) Py_DECREF(arr_obj[i]);
PyMem_Free(arr_obj);
return res;
}
static PyObject* py_max_subarray_sum(PyObject *self, PyObject *args) {
Py_ssize_t n;
PyObject *seed_obj, *min_val, *max_val;
if (!PyArg_ParseTuple(args, "nOOO", &n, &seed_obj, &min_val, &max_val)) return NULL;
if (n < 0) n = 0;
uint32_t seed = (uint32_t)(PyLong_AsUnsignedLongLongMask(seed_obj) & 0xFFFFFFFFULL);
if (PyErr_Occurred()) return NULL;
return max_subarray_sum_internal(n, seed, min_val, max_val);
}
static PyObject* py_total_max_subarray_sum(PyObject *self, PyObject *args) {
Py_ssize_t n;
PyObject *init_seed_obj, *min_val, *max_val;
if (!PyArg_ParseTuple(args, "nOOO", &n, &init_seed_obj, &min_val, &max_val)) return NULL;
if (n < 0) n = 0;
uint32_t state = (uint32_t)(PyLong_AsUnsignedLongLongMask(init_seed_obj) & 0xFFFFFFFFULL);
if (PyErr_Occurred()) return NULL;
PyObject *total = PyLong_FromLong(0);
if (!total) return NULL;
for (int i = 0; i < 20; ++i) {
uint32_t seed = lcg_next(&state);
PyObject *part = max_subarray_sum_internal(n, seed, min_val, max_val);
if (!part) { Py_DECREF(total); return NULL; }
PyObject *new_total = PyNumber_Add(total, part);
Py_DECREF(part);
if (!new_total) { Py_DECREF(total); return NULL; }
Py_DECREF(total);
total = new_total;
}
return total;
}
static PyMethodDef module_methods[] = {
{"max_subarray_sum", (PyCFunction)py_max_subarray_sum, METH_VARARGS, "Compute maximum subarray sum using LCG-generated array."},
{"total_max_subarray_sum", (PyCFunction)py_total_max_subarray_sum, METH_VARARGS, "Compute total of maximum subarray sums over 20 LCG seeds."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"python_hard",
NULL,
-1,
module_methods,
NULL,
NULL,
NULL,
NULL
};
PyMODINIT_FUNC PyInit_python_hard(void) {
return PyModule_Create(&moduledef);
}

View File

@@ -0,0 +1,25 @@
from setuptools import setup, Extension
import sys
import os
extra_compile_args = []
extra_link_args = []
if os.name == 'nt':
extra_compile_args.extend(['/O2', '/fp:precise'])
else:
extra_compile_args.extend(['-O3', '-fno-strict-aliasing'])
module = Extension(
'calculate_pi',
sources=['calculate_pi.c'],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
setup(
name='calculate_pi',
version='1.0.0',
description='High-performance C extension for computing pi via the Leibniz series',
ext_modules=[module],
)

View File

@@ -0,0 +1,25 @@
from setuptools import setup, Extension
import sys
extra_compile_args = []
extra_link_args = []
if sys.platform == 'win32':
extra_compile_args = ['/O2', '/Ot', '/GL', '/fp:fast']
extra_link_args = ['/LTCG']
else:
extra_compile_args = ['-O3', '-march=native']
module = Extension(
name='python_hard',
sources=['python_hard.c'],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
language='c'
)
setup(
name='python_hard',
version='1.0.0',
description='High-performance C extension reimplementation',
ext_modules=[module]
)

View File

@@ -0,0 +1,14 @@
from setuptools import setup, Extension
module = Extension(
'zz_my_module',
sources=['zz_my_module.c'],
)
setup(
name='zz_my_module',
version='1.0',
description='This is a custom C extension module.',
ext_modules=[module]
)

View File

@@ -0,0 +1,38 @@
# Build first: python setup.py build_ext --inplace
import time
import math
import calculate_pi
# Original Python implementation
def py_leibniz_pi(iterations):
result = 1.0
for i in range(1, iterations + 1):
j = i * 4 - 1
result -= (1 / j)
j = i * 4 + 1
result += (1 / j)
return result * 4
iters = 5_000_000
# Warm-up
calculate_pi.leibniz_pi(10)
py_leibniz_pi(10)
start = time.perf_counter()
res_c = calculate_pi.leibniz_pi(iters)
end = time.perf_counter()
ctime = end - start
start = time.perf_counter()
res_py = py_leibniz_pi(iters)
end = time.perf_counter()
pytime = end - start
print(f"Iterations: {iters}")
print(f"C extension result: {res_c}")
print(f"Python result: {res_py}")
print(f"Absolute difference: {abs(res_c - res_py)}")
print(f"C extension time: {ctime:.6f} s")
print(f"Python time: {pytime:.6f} s")
print(f"Speedup: {pytime/ctime if ctime > 0 else float('inf'):.2f}x")

View File

@@ -0,0 +1,69 @@
import time
# Original Python code
def lcg(seed, a=1664525, c=1013904223, m=2**32):
value = seed
while True:
value = (a * value + c) % m
yield value
def max_subarray_sum_py(n, seed, min_val, max_val):
lcg_gen = lcg(seed)
random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
max_sum = float('-inf')
for i in range(n):
current_sum = 0
for j in range(i, n):
current_sum += random_numbers[j]
if current_sum > max_sum:
max_sum = current_sum
return max_sum
def total_max_subarray_sum_py(n, initial_seed, min_val, max_val):
total_sum = 0
lcg_gen = lcg(initial_seed)
for _ in range(20):
seed = next(lcg_gen)
total_sum += max_subarray_sum_py(n, seed, min_val, max_val)
return total_sum
# Build and import extension (after running: python setup.py build && install or develop)
import python_hard as ext
# Example parameters
n = 600
initial_seed = 12345678901234567890
min_val = -1000
max_val = 1000
# Time Python
t0 = time.perf_counter()
py_res1 = max_subarray_sum_py(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)
t1 = time.perf_counter()
py_time1 = t1 - t0
# Time C extension
t0 = time.perf_counter()
ext_res1 = ext.max_subarray_sum(n, (initial_seed * 1664525 + 1013904223) % (2**32), min_val, max_val)
t1 = time.perf_counter()
ext_time1 = t1 - t0
print('max_subarray_sum equality:', py_res1 == ext_res1)
print('Python time:', py_time1)
print('C ext time:', ext_time1)
# Total over 20 seeds
t0 = time.perf_counter()
py_res2 = total_max_subarray_sum_py(n, initial_seed, min_val, max_val)
t1 = time.perf_counter()
py_time2 = t1 - t0
t0 = time.perf_counter()
ext_res2 = ext.total_max_subarray_sum(n, initial_seed, min_val, max_val)
t1 = time.perf_counter()
ext_time2 = t1 - t0
print('total_max_subarray_sum equality:', py_res2 == ext_res2)
print('Python total time:', py_time2)
print('C ext total time:', ext_time2)

View File

@@ -0,0 +1,16 @@
import time
import zz_my_module
def python_hello_world():
print("Hello, World!")
start = time.time()
python_hello_world()
end = time.time()
print(f"Python function execution time: {end - start:.6f} seconds")
start = time.time()
zz_my_module.hello_world()
end = time.time()
print(f"C extension execution time: {end - start:.6f} seconds")

View File

@@ -0,0 +1,28 @@
#include <Python.h>
// Function to be called from Python
static PyObject* zz_hello_world(PyObject* self, PyObject* args) {
printf("Hello, World!\n");
Py_RETURN_NONE;
}
// Method definition structure
static PyMethodDef zz_my_methods[] = {
{"hello_world", zz_hello_world, METH_VARARGS, "Print 'Hello, World!'"},
{NULL, NULL, 0, NULL} // Sentinel
};
// Module definition
static struct PyModuleDef zz_my_module = {
PyModuleDef_HEAD_INIT,
"zz_my_module",
"Extension module that prints Hello, World!",
-1,
zz_my_methods
};
// Module initialization function
PyMODINIT_FUNC PyInit_zz_my_module(void) {
return PyModule_Create(&zz_my_module);
}