I'm trying to port a knn (k nearest neighbor search ) on a kd-tree that I wrote in Java to C.
The Java output, as expected:
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Java code, class (Its a quick implementation just to get the algorithm working before I start my port):
class kd_tree {
public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
static int current_number_of_data_points = 0;
static int current_global_median = 0;
kd_tree left;
kd_tree right;
float[] data;
private int k_dimensions;
float distance_to_neighbor;
}
Java knn method:
/*===========================================================================
Function knn, knn algorithm using kd-tree & minimumNeighbor function.
Description:
==========================================================*/
public static kd_tree[] knn( kd_tree root
, float[] data_point
, int depth
, int k_dimension
, int number_of_nearest_neighbors) {
kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];
if (root != null) {
int nearests_counter = 0;
/* now lets traverse the tree inorder & calculate distances from the
query point based on Morris Traversal algorithm that does not
use any stacks or no recursion */
kd_tree cur = root;
kd_tree pre;
while (cur != null) {
if (cur.left == null) {
/* debugging System.out.println(cur.data[0]);
calculate distance */
cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right; // move to next right node
} else { // has a left subtree
pre = cur.left;
while (pre.right != null && pre.right != cur) { // find rightmost
pre = pre.right;
}
/* Make current as the right child of its inorder
predecessor */
if (pre.right == null) {
pre.right = cur;
cur = cur.left;
} else {
pre.right = null;
// debugging printf("%d ", current->data);
// calculate distance
cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right;
}
}
}//end while
// base cases
// sort from least to greatest
insertion_sort_based_on_distance(all_nearests);
// return on specified number_of_nearest_neighbors
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i]=all_nearests[i];
}
}
return n_nearests;
}
The relevant C code snippets:
#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree_ {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
static tree* processing_space [KD_TREE_HEAP_SIZE];
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, int total_number_of_elements
, tree* n_nearests);
Relevant knn implementation, kdtree.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, tree* n_nearests)
{
tree all_nearests[current_number_of_kd_tree_nodes];
tree* source_data_end_ptr = NULL;
tree* source_data_start_ptr = NULL;
tree* destination_data_start_ptr = NULL;
tree* destination_data_end_ptr = NULL;
if(NULL != root && NULL != n_nearests)
{
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while(NULL != cur)
{
if(NULL == cur->left)
{
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right; // move to next right node
}
else
{
pre = cur->left;
while(pre->right != NULL && pre->right != cur)
{ // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if(pre->right == NULL)
{
pre->right = cur;
cur = cur->left;
}
else
{
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right;
}
}
} // end while
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for(int c = 0; c < k_dimensions; c++)
{
if(NULL != processing_space[i].info)
{
printf("%f,", processing_space[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy relevant range up current_number_of_kd_tree_nodes before sort,
* in order to avoid sorting beyond range that does not have any data */
source_data_start_ptr = &processing_space[0];
destination_data_start_ptr = &all_nearests[0];
destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
while(destination_data_start_ptr <= destination_data_end_ptr)
{
*destination_data_start_ptr = *source_data_start_ptr;
source_data_start_ptr++;
destination_data_start_ptr++;
}
// sort based on distance from query point
quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for (int c = 0; c < k_dimensions; c++)
{
if (NULL != all_nearests[i].info)
{
printf ("%f,", all_nearests[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
// reuse pointers
destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1];
source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
source_data_start_ptr = all_nearests;
int counter = 0;
while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
{
// do NOT copy any empty tree nodes
if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
{
/* ATTENTION: i checked with debugger & values for
(distance_to_neighbor,info,left,right were not zeroed or empty */
float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor;
float* info = source_data_start_ptr->info;
tree* left = source_data_start_ptr->left;
tree* right = source_data_start_ptr->right;
n_nearests[counter].distance_to_neighbor = distance_to_neighbor;
n_nearests[counter].info = info;
n_nearests[counter].left = left;
n_nearests[counter].right = right;
counter++;
}
source_data_start_ptr++;
}
}
else
{
printf("Error, knn input parameter error");
}
return n_nearests;
} // end function
Relevant snippet of main.c:
#include<kdtree.h>
int main()
{
const int query_size = 10;
const int k_dimensions = 3;
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%Nearest to Key: {%f,%f,%f}\n", query_size,query_point[0], query_point[1], query_point[2]);
knn(root, query_point, 0, k_dimensions, query_size, KD_TREE_HEAP_SIZE, n_nearests);
// print n nearest neighbors
tree* tree_ptr = &n_nearests[0];
for(int i = 0; i <query_size; i++)
{
if(NULL != tree_ptr->info)
{
printf("Key={");
for(int c=0; c < k_dimensions; c++)
{
printf("%d,", tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
} // end main
Output by the C version:
knn (k nearest neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are:
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
I have already tested insertion, inorder traversal, searching, deletion, and finding a minimum neighbor in both Java & C version they are working as expected.
In the C version the knn function returns zeroed values after the function return?
Why are values zero ine the struct , after the call in main function (refer to output area titled "After function return" ?
Hoping someone else can spot something obvious, really desperate pulling out my hair.
There are multiple problems:
n_nearest
is defined in main
as an array of pointers to tree
objects. But you define the argument to knn
as a pointer to an array of actual tree
objects, which is incompatible. The argument should be defined as tree **n_nearest
or tree* n_nearest[]
, which is a pointer to an array of pointers. You should then just store references to the tree elements, not copy values. Similarly, all_nearests
should be defined as an array of pointers, just like in java, not an array of objects to store copies of the tree nodes.gcc -Wall -Werror
or clang -Weverything -Werror
are a good start to save countless hours of frustration.kd_tree
could be defined in C as typedef tree *kd_tree;
but C programmers find it confusing to hide pointers behind typedefs, where java programmers manipulate pointers for everything but scalar values without problems because arrays and classes never contain actual structures, only pointers to objects.processing_space
, you can avoid dynamic allocation of the temporary array all_nearests
, which is allocated from the heap in java and on the stack in C. Note that you do not need to pass the total_number_of_elements
in this case because processing_space
is assumed to be large enough to hold references to all tree nodes.n_nearest
and all_nearests
as arrays of kd_tree
objects with size current_number_of_data_points
, many of which will be null
.main
does not increment tree_ptr
, so it always prints the first element of n_nearests
.Here is a modified version using arrays of pointers to structures, much closer to the java version:
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
extern tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
extern tree* processing_space[KD_TREE_HEAP_SIZE];
// return the number of closest neighbors, <= number_of_nearest_neighbors
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
int total_number_of_elements,
tree* n_nearests);
Implementation of knn
:
#include <stdio.h>
#include <stdlib.h>
#include "kdtree.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
static int tree_compare_distance(const void *a, const void *b) {
tree* ap = *(tree* const *)a;
tree* bp = *(tree* const *)b;
return ((ap->distance_to_neighbor > bp->distance_to_neighbor) -
(ap->distance_to_neighbor < bp->distance_to_neighbor));
}
static void quick_sort_based_on_distance(tree** array, size_t size) {
qsort(array, size, sizeof(*array), tree_compare_distance);
}
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
tree** n_nearests)
{
/* use the static processing space to avoid defining a potentially huge
array on the stack */
tree** all_nearests = processing_space;
if (root != NULL && n_nearests != NULL) {
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while (cur != NULL) {
if (cur->left == NULL) {
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right; // move to next right node
} else { // has a left subtree
pre = cur->left;
while (pre->right != NULL && pre->right != cur) { // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if (pre->right == NULL) {
pre->right = cur;
cur = cur->left;
} else {
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right;
}
}
}
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// sort based on distance from query point
quick_sort_based_on_distance(all_nearests, nearests_counter);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// return only specified number_of_nearest_neighbors
// yet do not return elements beyond nearest_counter
if (number_of_nearest_neighbors > nearest_counter)
number_of_nearest_neighbors = nearest_counter;
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i] = all_nearests[i];
}
return number_of_nearest_neighbors;
} else {
printf("Error, knn input parameter error");
return -1;
}
}
The main code is modified too:
```c
#include <stdio.h>
#include "kdtree.h"
int main() {
const int query_size = 10;
const int k_dimensions = 3;
// ...
// get the values and the query point
// ...
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%d nearest neighbors to Key: {%f,%f,%f}\n", query_size,
query_point[0], query_point[1], query_point[2]);
int result_size = knn(root, query_point, 0, k_dimensions, query_size, n_nearests);
// print the computed nearest neighbors
for (int i = 0; i < result_size; i++) {
tree* tree_ptr = n_nearests[i];
if (tree_ptr->info != NULL) {
printf("Key={");
for (int c = 0; c < k_dimensions; c++) {
printf("%s%d", "," + !c, tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With