Dalke Scientific Software: More science. Less time. Products

Writing Python/C extensions by hand

Writing a ctypes interface is quick but has some overhead. I'll compare calling cos through Python's math library vs. calling it though ctypes bindings to libc.

import math, time, ctypes

R = range(100000)

libc = ctypes.CDLL("libc.dylib", ctypes.RTLD_GLOBAL)
libc.cos.argtypes = [ctypes.c_double]
libc.cos.restype = ctypes.c_double

def do_timings(cos):
  t1 = time.time()
  for x in R:
    cos(0.0)
  return time.time()-t1

def do_math_cos():
  return do_timings(math.cos)

def do_libc_cos():
  return do_timings(libc.cos)

print "math.cos", do_math_cos()
print "libc.cos", do_libc_cos()
The result is
math.cos 0.179316997528
libc.cos 0.487237215042
showing that the overhead of using ctypes is about three times that of calling a normal Python/C extension.

Drawing a Mandelbrot set

Read the wikipedia article if you don't know about the Mandelbrot set. From that article I took the following code, which tests the number of iterations needed for a given initial point to diverge. The algorithm never finishes for points in the set so when it reaches the maximum iteration limit it returns that count instead.

int iterate_point(double x0, double y0, int max_iterations) {
	int iteration = 0;
	double x=x0, y=y0, x2=x*x, y2=y*y;
	
	while (x2+y2<4 && iteration<max_iterations) {
		y = 2*x*y + y0;
		x = x2-y2 + x0;
		x2 = x*x;
		y2 = y*y;
		iteration++;
	}
	return iteration;
}
This is the sort of calculation that Python is slow at doing. (Well, except through the Numeric library.) Rather than rewriting the above in Python and taking the performance hit, I'll call it through Python extension.

Python's C APIs are pretty reasonable. It's been around for 16 years and some of the history shows through, which makes some things confusing. For example, the preferred way to initialize a module has changed over time. (Python is good with backwards compatibility and these changes don't break existing code.)

The big problems come because the programming is C. There is no automatic garbage collection so you need to do the reference counting manually. Getting that correct in the face of error handling is hard, as is getting the order of adding and removing references to an object.

Luckily, this example doesn't need any explicit reference counting calls. It's pretty straight-forward so I'll embed some comments in the code and leave it at that. The following file is named "mandelbrot.c":

#include "Python.h"

/* make this static if you don't want other code to call this function */
/* I don't make it static because want to access this via ctypes */
/* static */
int iterate_point(double x0, double y0, int max_iterations) {
	int iteration = 0;
	double x=x0, y=y0, x2=x*x, y2=y*y;
	
	while (x2+y2<4 && iteration<max_iterations) {
		y = 2*x*y + y0;
		x = x2-y2 + x0;
		x2 = x*x;
		y2 = y*y;
		iteration++;
	}
	return iteration;
}


/* The module doc string */
PyDoc_STRVAR(mandelbrot__doc__,
"Mandelbrot point evalutation kernel");

/* The function doc string */
PyDoc_STRVAR(iterate_point__doc__,
"x,y,max_iterations -> iteration count at that point, up to max_iterations");

/* The wrapper to the underlying C function */
static PyObject *
py_iterate_point(PyObject *self, PyObject *args)
{
	double x=0, y=0;
	int iterations, max_iterations=1000;
	/* "args" must have two doubles and may have an integer */
	/* If not specified, "max_iterations" remains unchanged; defaults to 1000 */
	/* The ':iterate_point' is for error messages */
	if (!PyArg_ParseTuple(args, "dd|i:iterate_point", &x, &y, &max_iterations))
		return NULL;
	/* Verify the parameters are correct */
	if (max_iterations < 0) max_iterations = 0;
	
	/* Call the C function */
	iterations = iterate_point(x, y, max_iterations);
	
	/* Convert from a C integer value to a Python integer instance */
	return PyInt_FromLong((long) iterations);
}

/* A list of all the methods defined by this module. */
/* "iterate_point" is the name seen inside of Python */
/* "py_iterate_point" is the name of the C function handling the Python call */
/* "METH_VARGS" tells Python how to call the handler */
/* The {NULL, NULL} entry indicates the end of the method definitions */
static PyMethodDef mandelbrot_methods[] = {
	{"iterate_point",  py_iterate_point, METH_VARARGS, iterate_point__doc__},
	{NULL, NULL}      /* sentinel */
};

/* When Python imports a C module named 'X' it loads the module */
/* then looks for a method named "init"+X and calls it.  Hence */
/* for the module "mandelbrot" the initialization function is */
/* "initmandelbrot".  The PyMODINIT_FUNC helps with portability */
/* across operating systems and between C and C++ compilers */
PyMODINIT_FUNC
initmandelbrot(void)
{
	/* There have been several InitModule functions over time */
	Py_InitModule3("mandelbrot", mandelbrot_methods,
                   mandelbrot__doc__);
}
This is a C extension. Here's the setup.py I used to build it
from distutils.core import setup, Extension

setup(name="mandelbrot", version="0.0",
	ext_modules = [Extension("mandelbrot", ["mandelbrot.c"])])
Finally, here's the code I used for testing. I compare calling iterate_point through the Python/C extension and through ctypes. This file is named "mandel.py"
#!/usr/bin/env python

import ctypes
import mandelbrot

ctypes_iterate_point = ctypes.CDLL("mandelbrot.so").iterate_point
ctypes_iterate_point.restype = ctypes.c_int
ctypes_iterate_point.argtypes = [ctypes.c_double, ctypes.c_double, ctypes.c_int]

x = -2
y = -1
w = 2.5
h = 2.0

NY = 40
NX = 70
RANGE_Y = range(NY)
RANGE_X = range(NX)

def render(iterate_point):
	chars = []
	append = chars.append
	for j in RANGE_Y:
		for i in RANGE_X:
			it = iterate_point(x+w/NX*i, y+h/NY*j, 1000)
			if it == 1000:
				append("*")
			elif it > 5:
				append(",")
			elif it > 2:
				append(".")
			else:
				append(" ")
		append("\n")
	return "".join(chars)

import time
t1 = time.time()
s1 = render(mandelbrot.iterate_point)
t2 = time.time()
s2 = render(ctypes_iterate_point)
t3 = time.time()
assert s1 == s2
print s1
print "as C extension", t2-t1
print "with ctypes", t3-t2
which produces
                                        .............,,,,,,*.......      
                                   ..............,,,,,,,,.........    
                                  ..............,,,,,,,,,..........   
                                ...............,,,,,,*,,,,..........  
                              ................,,,,******,,,.......... 
                             ...............,,,,,,******,,,,..........
                           ............,,,,,,,,,,,*****,,,,,,,...,,...
                         .............,,,,,,,,,,,,,,,,*,,,,,,,,,,,,,..
                        .............,,,,**,,,*************,,,,,,,,,,.
                      ...............,,,,**,******************,**,,,..
                   .................,,,,,,***********************,,,..
                ..................,,,,,,*************************,,,,.
            ........,,,,,,,,,,,,,,,,,,,***************************,,,,
        ............,,,,,,,,,,,,,,,,,*****************************,*,,
     ..............,,,,,,,,,,,,,,,,,,******************************,,,
    ...............,,,,,*,,**,*,,,,,,******************************,,,
  ...............,,,,,,,*********,,,*******************************,,.
  ..............,,,,,,************,,*******************************,,.
 ............,,,,,,,,,*************,*******************************,..
 .....,,,,.,,,,,,,,**,*************,******************************,,..
 ***************************************************************,,,,..
 .....,,,,.,,,,,,,,**,*************,******************************,,..
 ............,,,,,,,,,*************,*******************************,..
  ..............,,,,,,************,,*******************************,,.
  ...............,,,,,,,*********,,,*******************************,,.
    ...............,,,,,*,,**,*,,,,,,******************************,,,
     ..............,,,,,,,,,,,,,,,,,,******************************,,,
        ............,,,,,,,,,,,,,,,,,*****************************,*,,
            ........,,,,,,,,,,,,,,,,,,,***************************,,,,
                ..................,,,,,,*************************,,,,.
                   .................,,,,,,***********************,,,..
                      ...............,,,,**,******************,**,,,..
                        .............,,,,**,,,*************,,,,,,,,,,.
                         .............,,,,,,,,,,,,,,,,*,,,,,,,,,,,,,..
                           ............,,,,,,,,,,,*****,,,,,,,...,,...
                             ...............,,,,,,******,,,,..........
                              ................,,,,******,,,.......... 
                                ...............,,,,,,*,,,,..........  
                                  ..............,,,,,,,,,..........   
                                   ..............,,,,,,,,.........    

as C extension 0.0400660037994
with ctypes 0.0578570365906
The performance here isn't as bad as the cos example from earlier but still shows that there's a 50% performance hit going through ctypes. Of course the longer the function runs the less percentage time is spent in overhead.



Copyright © 2001-2013 Andrew Dalke Scientific AB