Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

C - values is struct pointer are not set after pointer is returned from function

Tags:

c

knn

I'm trying to port a knn (k nearest neighbor search ) on a kd-tree that I wrote in Java to C.

The Java output, as expected:

Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0

Java code, class (Its a quick implementation just to get the algorithm working before I start my port):

class kd_tree {
  public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
  static int current_number_of_data_points = 0;
  static int current_global_median = 0;

  kd_tree left;
  kd_tree right;
  float[] data;
  private int k_dimensions;
  float distance_to_neighbor;
}

Java knn method:

/*===========================================================================
Function        knn, knn algorithm  using kd-tree & minimumNeighbor function.
Description:    
==========================================================*/
public static kd_tree[] knn( kd_tree  root
                           , float[] data_point
                           , int     depth
                           , int     k_dimension
                           , int     number_of_nearest_neighbors) {

  kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
  kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];

  if (root != null) {
    int nearests_counter = 0;

    /* now lets traverse the tree inorder & calculate distances  from the 
    query point based on Morris Traversal algorithm that does not 
    use any stacks or no recursion */
    kd_tree cur = root;
    kd_tree pre;
    while (cur != null) {
      if (cur.left == null) {
        /* debugging System.out.println(cur.data[0]);
        calculate distance */

        cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
        all_nearests[nearests_counter] = cur;
        nearests_counter++;

        cur = cur.right; // move to next right node
      } else { // has a left subtree
        pre = cur.left;
        while (pre.right != null && pre.right != cur) { // find rightmost
          pre = pre.right;
        }

        /* Make current as the right child of its inorder  
        predecessor */
        if (pre.right == null) {
            pre.right = cur;
            cur = cur.left;
        } else {
            pre.right = null;
            // debugging printf("%d ", current->data);
            // calculate distance
            cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
            all_nearests[nearests_counter] = cur;
            nearests_counter++;

            cur = cur.right;
        }
      }
    }//end while

    // base cases
    // sort from least to greatest
    insertion_sort_based_on_distance(all_nearests);

    // return on specified number_of_nearest_neighbors
    for (int i = 0; i < number_of_nearest_neighbors; i++) {
      n_nearests[i]=all_nearests[i];
    }
  }

  return n_nearests;
}

The relevant C code snippets:

#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H

/*
 * Representation of a kd tree
 */
typedef struct tree_ {
    struct tree*  left;
    struct tree*  right;
    float*        info;
    float         distance_to_neighbor;
} tree;

// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];

// the knn algorithm will require memory space upto tree size 
static tree* processing_space [KD_TREE_HEAP_SIZE];

tree* knn( tree*      root
         , float      data_point[]
         , int        depth
         , const int  k_dimensions
         , int        number_of_nearest_neighbors
         , int        total_number_of_elements
         , tree*      n_nearests);

Relevant knn implementation, kdtree.c:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"

static int current_number_of_kd_tree_nodes  = 0;
static int current_number_of_data_points    = 0;

/*===========================================================================
Function        knn, knn algorithm  using kd-tree.
Description:    
==========================================================*/
tree* knn( tree*      root
         , float      data_point[]
         , int        depth
         , const int  k_dimensions
         , int        number_of_nearest_neighbors
         , tree*      n_nearests)
{
  tree  all_nearests[current_number_of_kd_tree_nodes];
  tree* source_data_end_ptr         = NULL;
  tree* source_data_start_ptr       = NULL;
  tree* destination_data_start_ptr  = NULL;
  tree* destination_data_end_ptr    = NULL;

  if(NULL != root && NULL != n_nearests)
  {
    int nearests_counter = 0;

    // now lets traverse the tree inorder & calculate distances
    // from the query point
    tree* cur = root;

    tree* pre;
    while(NULL != cur)
    {
      if(NULL == cur->left)
      {
        cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
        processing_space[nearests_counter] = *cur;
        nearests_counter++;
        cur = cur->right; // move to next right node
      }
      else
      {
        pre = cur->left;
        while(pre->right != NULL && pre->right != cur)
        { // find rightmost
          pre = pre->right;
        }
        /* Make current as the right child of its inorder predecessor */
        if(pre->right == NULL)
        {
          pre->right = cur;
          cur = cur->left;
        }
        else
        {
          pre->right = NULL;
          // calculate distance
          cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
          processing_space[nearests_counter] = *cur;

          nearests_counter++;
          cur = cur->right;
        }
      }
    } // end while

    // JUST FOR DEBUGGING START
    printf ("***For debugging before sort:\n");

    for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
    {
      printf("{");
      for(int c = 0; c < k_dimensions; c++)
      {
        if(NULL != processing_space[i].info)
        {
          printf("%f,", processing_space[i].info[c]);
        }
        else
        {
          break;
        }
      } // end for
      printf("} ");
      printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
    } // end for
    // JUST FOR DEBUGGING END

    /* copy relevant range up current_number_of_kd_tree_nodes before sort,
     * in  order to avoid sorting beyond range that does not have any data */

    source_data_start_ptr       = &processing_space[0];
    destination_data_start_ptr  = &all_nearests[0];
    destination_data_end_ptr    = &all_nearests[current_number_of_kd_tree_nodes - 1];

    while(destination_data_start_ptr <= destination_data_end_ptr)
    {
      *destination_data_start_ptr = *source_data_start_ptr;
      source_data_start_ptr++;
      destination_data_start_ptr++;
    }

    // sort based on distance from query point
    quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);

    // JUST FOR DEBUGGING START
    printf("***For debugging after sort\n");

    for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
    {
      printf("{");
      for (int c = 0; c < k_dimensions; c++)
      {
        if (NULL != all_nearests[i].info)
        {
          printf ("%f,", all_nearests[i].info[c]);
        }
        else
        {
          break;
        }
      } // end for
      printf("} ");
      printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
    } // end for

    // JUST FOR DEBUGGING END

    /* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
    // reuse pointers
    destination_data_end_ptr  = &n_nearests[number_of_nearest_neighbors - 1];
    source_data_end_ptr       = &all_nearests[current_number_of_kd_tree_nodes - 1];
    source_data_start_ptr     = all_nearests;

    int counter = 0;
    while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
    {
      // do NOT copy any empty tree nodes
      if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
      {
          /* ATTENTION: i checked with debugger & values for 
          (distance_to_neighbor,info,left,right were not zeroed or empty */
          float distance_to_neighbor  = source_data_start_ptr->distance_to_neighbor;
          float* info                 = source_data_start_ptr->info;
          tree* left                  = source_data_start_ptr->left;
          tree* right                 = source_data_start_ptr->right;

          n_nearests[counter].distance_to_neighbor  = distance_to_neighbor;
          n_nearests[counter].info                  = info;
          n_nearests[counter].left                  = left;
          n_nearests[counter].right                 = right;

          counter++;
      }
      source_data_start_ptr++;
    }
  }
  else
  {
    printf("Error, knn input parameter error");
  }

  return n_nearests;
} // end function

Relevant snippet of main.c:

#include<kdtree.h>

int main()
{
  const int query_size    = 10;
  const int k_dimensions  = 3;

  printf("knn (k nearest neighboor)\n:");
  tree* n_nearests[query_size];
  printf("%Nearest to Key: {%f,%f,%f}\n", query_size,query_point[0], query_point[1], query_point[2]); 
  knn(root, query_point, 0, k_dimensions, query_size, KD_TREE_HEAP_SIZE, n_nearests);

  // print n nearest neighbors
  tree* tree_ptr = &n_nearests[0];

  for(int i = 0; i <query_size; i++)
  {
    if(NULL != tree_ptr->info)
    {
      printf("Key={");
      for(int c=0; c < k_dimensions; c++)
      {
        printf("%d,", tree_ptr->info[c]);
      }
      printf("} ");
      printf("%f min distance\n", tree_ptr->distance_to_neighbor);
    }
  }

  return 0;
} // end main

Output by the C version:

knn (k nearest  neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are: 
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000

**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance

I have already tested insertion, inorder traversal, searching, deletion, and finding a minimum neighbor in both Java & C version they are working as expected.

In the C version the knn function returns zeroed values after the function return?

Why are values zero ine the struct , after the call in main function (refer to output area titled "After function return" ?

Hoping someone else can spot something obvious, really desperate pulling out my hair.

like image 567
cyber101 Avatar asked Jan 25 '23 15:01

cyber101


1 Answers

There are multiple problems:

  • n_nearest is defined in main as an array of pointers to tree objects. But you define the argument to knn as a pointer to an array of actual tree objects, which is incompatible. The argument should be defined as tree **n_nearest or tree* n_nearest[], which is a pointer to an array of pointers. You should then just store references to the tree elements, not copy values. Similarly, all_nearests should be defined as an array of pointers, just like in java, not an array of objects to store copies of the tree nodes.
  • The compiler should issue a diagnostic for this type mismatch. It is not a benign error, the code really has undefined behavior and this probably explains the incorrect output. Always enable extra warnings for the C compiler to detect potentially incorrect code: gcc -Wall -Werror or clang -Weverything -Werror are a good start to save countless hours of frustration.
  • The java type kd_tree could be defined in C as typedef tree *kd_tree; but C programmers find it confusing to hide pointers behind typedefs, where java programmers manipulate pointers for everything but scalar values without problems because arrays and classes never contain actual structures, only pointers to objects.
  • if you use the global processing_space, you can avoid dynamic allocation of the temporary array all_nearests, which is allocated from the heap in java and on the stack in C. Note that you do not need to pass the total_number_of_elements in this case because processing_space is assumed to be large enough to hold references to all tree nodes.
  • you do not return the number of elements stored into the destination array. There can be fewer than the requested number. This problem is present in java too where you allocate n_nearest and all_nearests as arrays of kd_tree objects with size current_number_of_data_points, many of which will be null.
  • indeed you must test for these null pointers in the sorting function you did not post, which is unnecessary if you pass the correct number of elements to sort.
  • the loop in main does not increment tree_ptr, so it always prints the first element of n_nearests.
  • using pointers in the C version is less readable than using array notation and does not ensure better performance. C compilers are very effective at removing common subexpressions arising from array offset computations. The C code would be much closer to the java code.

Here is a modified version using arrays of pointers to structures, much closer to the java version:

#ifndef KDTREE_H
#define KDTREE_H
/*
 * Representation of a kd tree
 */
typedef struct tree {
    struct tree*  left;
    struct tree*  right;
    float*        info;
    float         distance_to_neighbor;
} tree;

// pre-allocated tree nodes array
extern tree tree_space[KD_TREE_HEAP_SIZE];

// the knn algorithm will require memory space upto tree size 
extern tree* processing_space[KD_TREE_HEAP_SIZE];

// return the number of closest neighbors, <= number_of_nearest_neighbors
int knn(tree*      root,
        float      data_point[],
        int        depth,
        const int  k_dimensions,
        int        number_of_nearest_neighbors,
        int        total_number_of_elements,
        tree*      n_nearests);

Implementation of knn:

#include <stdio.h>
#include <stdlib.h>
#include "kdtree.h"

static int current_number_of_kd_tree_nodes  = 0;
static int current_number_of_data_points    = 0;

static int tree_compare_distance(const void *a, const void *b) {
    tree* ap = *(tree* const *)a;
    tree* bp = *(tree* const *)b;
    return ((ap->distance_to_neighbor > bp->distance_to_neighbor) -
            (ap->distance_to_neighbor < bp->distance_to_neighbor));
}

static void quick_sort_based_on_distance(tree** array, size_t size) {
    qsort(array, size, sizeof(*array), tree_compare_distance);
}

/*===========================================================================
   Function        knn, knn algorithm  using kd-tree.
   Description:
   ==========================================================*/
int knn(tree*      root,
        float      data_point[],
        int        depth,
        const int  k_dimensions,
        int        number_of_nearest_neighbors,
        tree**     n_nearests)
{
    /* use the static processing space to avoid defining a potentially huge
       array on the stack */
    tree** all_nearests = processing_space;

    if (root != NULL && n_nearests != NULL) {
        int nearests_counter = 0;

        // now lets traverse the tree inorder & calculate distances
        // from the query point
        tree* cur = root;
        tree* pre;
        while (cur != NULL) {
            if (cur->left == NULL) {
                // calculate distance
                cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
                all_nearests[nearests_counter] = cur;
                nearests_counter++;
                cur = cur->right; // move to next right node
            } else { // has a left subtree
                pre = cur->left;
                while (pre->right != NULL && pre->right != cur) { // find rightmost
                    pre = pre->right;
                }
                /* Make current as the right child of its inorder predecessor */
                if (pre->right == NULL) {
                    pre->right = cur;
                    cur = cur->left;
                } else {
                    pre->right = NULL;
                    // calculate distance
                    cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
                    all_nearests[nearests_counter] = cur;
                    nearests_counter++;
                    cur = cur->right;
                }
            }
        }

        // JUST FOR DEBUGGING START
        printf ("***For debugging before sort:\n");
        for (int i = 0; i < nearests_counter; i++) {
            printf("{");
            for (int c = 0; c < k_dimensions; c++) {
                printf("%f,", all_nearests[i]->info[c]);
            }
            printf("} ");
            printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
        }
        // JUST FOR DEBUGGING END

        // sort based on distance from query point
        quick_sort_based_on_distance(all_nearests, nearests_counter);

        // JUST FOR DEBUGGING START
        printf("***For debugging after sort\n");
        for (int i = 0; i < nearests_counter; i++) {
            printf("{");
            for (int c = 0; c < k_dimensions; c++) {
                printf("%f,", all_nearests[i]->info[c]);
            }
            printf("} ");
            printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
        }
        // JUST FOR DEBUGGING END

        // return only specified number_of_nearest_neighbors
        // yet do not return elements beyond nearest_counter
        if (number_of_nearest_neighbors > nearest_counter)
            number_of_nearest_neighbors = nearest_counter;
        for (int i = 0; i < number_of_nearest_neighbors; i++) {
            n_nearests[i] = all_nearests[i];
        }
        return number_of_nearest_neighbors;
    } else {
        printf("Error, knn input parameter error");
        return -1;
    }
}

The main code is modified too:

```c
#include <stdio.h>
#include "kdtree.h"

int main() {
    const int query_size    = 10;
    const int k_dimensions  = 3;

    // ...
    // get the values and the query point
    // ...

    printf("knn (k nearest neighboor)\n:");
    tree* n_nearests[query_size];
    printf("%d nearest neighbors to Key: {%f,%f,%f}\n", query_size,
           query_point[0], query_point[1], query_point[2]);
    int result_size = knn(root, query_point, 0, k_dimensions, query_size, n_nearests);

    // print the computed nearest neighbors
    for (int i = 0; i < result_size; i++) {
        tree* tree_ptr = n_nearests[i];
        if (tree_ptr->info != NULL) {
            printf("Key={");
            for (int c = 0; c < k_dimensions; c++) {
                printf("%s%d", "," + !c, tree_ptr->info[c]);
            }
            printf("} ");
            printf("%f min distance\n", tree_ptr->distance_to_neighbor);
        }
    }
    return 0;
}
like image 155
chqrlie Avatar answered Jan 30 '23 08:01

chqrlie