I'm pretty green with Python so hopefully I can phrase this question correctly.
The overall problem involves calling a C routine from Python. I can get pretty close by kluging together a few related SO questions/answers, but I cannot seem to make things come together quite right. There are two aspects: first is calling the C routine with pointers and the second is the use of a callback function.
Background Rubner provides an Earth Movers Distance (EMD) routine written in C [ EMD C code location ] He also provides two example C programs that call the EMD routine. I am trying to develop a Python routine as an alternative to, for example, example2.c that will call the EMD routine. (Yes, I'm familiar with the OpenCV implementation of EMD.)
For convenience here is the header file for the emd.c code I'd like call from python:
/* DEFINITIONS */
#define MAX_SIG_SIZE 100
#define MAX_ITERATIONS 500
#define INFINITY 1e20
#define EPSILON 1e-6
/*****************************************************************************/
/* feature_t SHOULD BE MODIFIED BY THE USER TO REFLECT THE FEATURE TYPE */
typedef int feature_t;
/* typedef struct { int X,Y,Z; } feature_t;*/
/*typedef struct { int X; } feature_t; */
/*****************************************************************************/
typedef struct
{
int n; /* Number of features in the signature */
feature_t *Features; /* Pointer to the features vector */
float *Weights; /* Pointer to the weights of the features */
} signature_t;
typedef struct
{
int from; /* Feature number in signature 1 */
int to; /* Feature number in signature 2 */
float amount; /* Amount of flow from "from" to "to" */
} flow_t;
float emd(signature_t *Signature1, signature_t *Signature2,
float (*func)(feature_t *, feature_t *),
flow_t *Flow, int *FlowSize);
#endif
Finally, here is the python codes that I've kluged together so far. I think (but am not sure) that I've gotten the structures setup correctly. (Note that this is a simplified version of the possible feature structures in the Rubner emd.c code . I would eventually like to get the whole thing working, but I'm starting out simple for now.) The first problem I'm having is somewhere in the argtypes for the calling function. I've tried a few variations, but the examples available on the web are pretty slim and I've hit a wall.
import ctypes
MAX_FEATURE_SIZE = 30
ARRAYFE = ctypes.c_int*MAX_FEATURE_SIZE
ARRAYWE= ctypes.c_float*MAX_FEATURE_SIZE
ARRAYFL = ctypes.c_float*(2*MAX_FEATURE_SIZE-1)
flowSize = ctypes.c_int
emdlib = ctypes.CDLL('emdlib.dylib')
ctypes.CMPFUNC = ctypes.CFUNCTYPE(ctypes.c_float, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int))
def py_dist_func(f1,f2):
print "dist: ", f1, f2
return(abs(f1-f2))
dist = ctypes.CMPFUNC(py_dist_func)
n = ctypes.c_int
flowSize = ctypes.c_int
class flow_t(ctypes.Structure):
_fields_ = [("from", ctypes.c_int),
("to", ctypes.c_int),
("amount", ctypes.c_float)]
class signature_t(ctypes.Structure):
_fields_ = [("N", n),("feature", ARRAYFE),
("weight", ARRAYWE)]
# emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.POINTER(ctypes.c_float), ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]
# emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.CMPFUNC(py_dist_func), ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]
emdlib.emd.argtypes = [ctypes.POINTER(signature_t), ctypes.POINTER(signature_t), ctypes.c_float, ctypes.POINTER(flow_t), ctypes.POINTER(ctypes.c_int)]
# emd.restype = ctypes.c_float
emdlib.emd.restype = flow_t
signature1=signature_t()
signature2=signature_t()
feature1 = ARRAYFE
feature2 = ARRAYFE
weight1 =ARRAYWE
weight2 = ARRAYWE
feature1 = [0,1,2,3]
feature2 = [0,3]
weight1 = [1,1,1,1]
weight2 = [1,1]
#signature1= [4,feature1, weight1]
#signature2 = [2, feature2, weight2]
# sample: arr = (ctypes.c_int * len(pyarr))(*pyarr)
signature1.N = len(feature1)
signature1.feature = (ctypes.c_int * MAX_FEATURE_SIZE)(*feature1)
signature2.feature = (ctypes.c_int * MAX_FEATURE_SIZE)(*feature2)
signature1.weight = (ctypes.c_float * MAX_FEATURE_SIZE)(*weight1)
signature2.weight = (ctypes.c_float * MAX_FEATURE_SIZE)(*weight2)
e = emdlib.emd(ctypes.byref(signature1), ctypes.byref(signature2), dist, ctypes.POINTER(flow_t), flowSize)
print "EMD= ", e
print "flowSize", flowSize
Any suggestions on where I've gone wrong would be greatly appreciated.
The second problem I'm sure I'll run into is with the argtypes for the returning pointers; any suggestions here would be appreciated as well.
Thanks in advance.
-------------- Updated (working) CODE
import ctypes
import math
import itertools
MAX_FEATURE_SIZE = 25
FEATURE_t = ctypes.c_int
FEATURE_ptr = ctypes.POINTER(FEATURE_t)
WEIGHT_t = ctypes.c_float
WEIGHT_ptr = ctypes.POINTER(WEIGHT_t)
COUNT_t = ctypes.c_int
COUNT_ptr = ctypes.POINTER(COUNT_t)
class FLOW_t(ctypes.Structure):
_fields_ = [("frm", ctypes.c_int),
("to", ctypes.c_int),
("amount", ctypes.c_float)]
# Note that ctypes.POINTER is compatible with a ctypes array declared
# as TYPE * array_len. This is equivalent to the way we can say 'char
# *foo = "ABCDEF"' in C.
class SIGNATURE_t(ctypes.Structure):
_fields_ = [("N", COUNT_t ),
("feature", FEATURE_ptr),
("weight", WEIGHT_ptr)]
FLOW_ARRAY_t = FLOW_t * (2*MAX_FEATURE_SIZE - 1)
CMPFUNC_t = ctypes.CFUNCTYPE(ctypes.c_float, FEATURE_ptr, FEATURE_ptr)
SIGNATURE_ptr = ctypes.POINTER(SIGNATURE_t)
FLOW_ptr = ctypes.POINTER(FLOW_t)
# Convenience function - keeps us from having to remember all the types and parameters later on
def make_signature(features, weights):
sig = SIGNATURE_t()
sig.N = len(features)
sig.feature = (len(features) * FEATURE_t)(*features)
sig.weight = (len(weights) * WEIGHT_t)(*weights)
return sig
# We want to pass into C a custom distance function from Python
def py_dist_func(f1,f2):
# print "f1, f2: %d, %d" % ( f1[0], f2[0] )
d= distance(f1[0],f2[0])
return d
# set this up as a holder for distance function between any two n-D points
def distance(p0,p1):
return(math.fabs(p0-p1))
dist_callback = CMPFUNC_t(py_dist_func)
#print "Importing emdlib"
emdlib = ctypes.CDLL('emdlib.dylib')
#print "Setting argtypes"
emdlib.emd.argtypes = [ SIGNATURE_ptr,
SIGNATURE_ptr,
CMPFUNC_t,
FLOW_ptr,
COUNT_ptr ]
#print "Setting restype"
emdlib.emd.restype = ctypes.c_float
feature1 = [0, 1,2,3,4,5,6,7,8]
feature2 = [0, 1,2,3,4,5,6,7,8]
weight1 = [0.275,0.296,0.002,0.131,0.208,0.048,0.058,0.098,0.455]
weight2 = [0.285,0.421,0.028,0.021,0.240,0.166,0.023,0.054,0.469]
#print "Creating signatures"
signature1 = make_signature(feature1, weight1)
signature2 = make_signature(feature2, weight2)
flow_array = FLOW_ARRAY_t()
flow_size = COUNT_t()
#print "Calling EMD"
e = emdlib.emd(ctypes.byref(signature1),
ctypes.byref(signature2),
dist_callback,
flow_array,
ctypes.byref(flow_size))
print "EMD= ", e
print "Number of FlowS", flow_size.value
print "Flow"
print "from to amount"
totalFlow=0.0
for i in range(0,flow_size.value):
# print "Flow from %d to %d amount :%f" %(flow_array[i].frm, flow_array[i].to, flow_array[i].amount)
print " %d %d %f" %(flow_array[i].frm, flow_array[i].to, flow_array[i].amount)
totalFlow=totalFlow+flow_array[i].amount
#
# now adjust EMD to account for different signature masses and make it a metric
alpha=1.0
mass1=sum(weight1)
mass2=sum(weight2)
fList=[feature1,feature2]
max_distance= 0.0
for p0, p1 in list(itertools.product(*fList)):
# print p0,p1, distance(p0,p1), max_distance
max_distance = max(max_distance, distance(p0, p1))
print "\nMax distance= %f" % max_distance
print "Total Source = %f" % mass1
print "Total Demand = %f" % mass2
print "Total Flow= %f\n " % totalFlow
print "Alpha = %f\n" %alpha
# emdHat = e*totalFlow+math.sqrt((mass1-mass2)*(mass1-mass2))*alpha*max_distance
emdHat = e*totalFlow+math.fabs((mass1-mass2))*alpha*max_distance
print "Corrected Earth Movers Distance \n"
print "emdHat = %f\n" % emdHat;
Through various arcane methods and the valuable comments, I finally got a piece of code working. As I mentioned in the comments, I'm not sure what the etiquette is, but I've seen enough similar questions asked that I thought it would be useful to post the final bit of code. It's not pretty, and if you find it useful enough to clean it up, I'd appreciate a link to a more elegant implementation.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With