(First-time poster and rather new in programming, so be patient, please!)
I'm interested in both an efficient general algorithm for printing formatted binary trees (in a CLI environment) and a C implementation. Here is some code that I wrote myself for fun (this is a much simplified version of the original and part of a larger program supporting many BST operations, but it should compile just fine):
#include <stdbool.h> // C99, boolean type support
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define DATATYPE_IS_DOUBLE
#define NDEBUG // disable assertions
#include <assert.h>
#define WCHARBUF_LINES 20 // def: 20
#define WCHARBUF_COLMS 800 // def: 80 (using a huge number, like 500, is a good idea,
// in order to prevent a buffer overflow :)
#define RECOMMENDED_CONS_WIDTH 150
#define RECOMMENDED_CONS_WIDTHQ "150" // use the same value, quoted
/* Preprocessor directives depending on DATATYPE_IS_* : */
#if defined DATATYPE_IS_INT || defined DATATYPE_IS_LONG
#define DTYPE long int
#define DTYPE_STRING "INTEGER"
#define DTYPE_PRINTF "%*.*ld"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_FLOAT
#define DTYPE float
#define DTYPE_STRING "FLOAT"
#define DTYPE_PRINTF "%*.*f"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_DOUBLE
#define DTYPE double
#define DTYPE_STRING "DOUBLE"
#define DTYPE_PRINTF "%*.*lf"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_CHAR
#define DTYPE char
#define DTYPE_STRING "CHARACTER"
#define DTYPE_PRINTF "%*.*c" /* using the "precision" sub-specifier ( .* ) with a */
/* character will produce a harmless compiler warning */
#else
#error "DATATYPE_IS_* preprocessor directive undefined!"
#endif
typedef struct node_struct {
DTYPE data;
struct node_struct *left;
struct node_struct *right;
/* int height; // useful for AVL trees */
} node;
typedef struct {
node *root;
bool IsAVL; // useful for AVL trees
long size;
} tree;
static inline
DTYPE get_largest(node *n){
if (n == NULL)
return (DTYPE)0;
for(; n->right != NULL; n=n->right);
return n->data;
}
static
int subtreeheight(node *ST){
if (ST == NULL)
return -1;
int height_left = subtreeheight(ST->left);
int height_right = subtreeheight(ST->right);
return (height_left > height_right) ? (height_left + 1) : (height_right + 1);
}
void prettyprint_tree(tree *T){
if (T == NULL) // if T empty, abort
return;
#ifndef DATATYPE_IS_CHAR /* then DTYPE is a numeric type */
/* compute spaces, find width: */
int width, i, j;
DTYPE max = get_largest(T->root);
width = (max < 10) ? 1 :
(max < 100) ? 2 :
(max < 1000) ? 3 :
(max < 10000) ? 4 :
(max < 100000) ? 5 :
(max < 1000000) ? 6 :
(max < 10000000) ? 7 :
(max < 100000000) ? 8 :
(max < 1000000000) ? 9 : 10;
assert (max < 10000000000);
width += 2; // needed for prettier results
#if defined DATATYPE_IS_FLOAT || defined DATATYPE_IS_DOUBLE
width += 2; // because of the decimals! (1 decimal is printed by default...)
#endif // float or double
int spacesafter = width / 2;
int spacesbefore = spacesafter + 1;
//int spacesbefore = ceil(width / 2.0);
#else /* character input */
int i, j, width = 3, spacesbefore = 2, spacesafter = 1;
#endif // #ifndef DATATYPE_IS_CHAR
/* start wchar_t printing, using a 2D character array with swprintf() : */
struct columninfo{ // auxiliary structure
bool visited;
int col;
};
wchar_t wcharbuf[WCHARBUF_LINES][WCHARBUF_COLMS];
int line=0;
struct columninfo eachline[WCHARBUF_LINES];
for (i=0; i<WCHARBUF_LINES; ++i){ // initialization
for (j=0; j<WCHARBUF_COLMS; ++j)
wcharbuf[i][j] = (wchar_t)' ';
eachline[i].visited = false;
eachline[i].col = 0;
}
int height = subtreeheight(T->root);
void recur_swprintf(node *ST, int cur_line, const wchar_t *nullstr){ // nested function,
// GCC extension!
float offset = width * pow(2, height - cur_line);
++cur_line;
if (eachline[cur_line].visited == false) {
eachline[cur_line].col = (int) (offset / 2);
eachline[cur_line].visited = true;
}
else{
eachline[cur_line].col += (int) offset;
if (eachline[cur_line].col + width > WCHARBUF_COLMS)
swprintf(wcharbuf[cur_line], L" BUFFER OVERFLOW DETECTED! ");
}
if (ST == NULL){
swprintf(wcharbuf[cur_line] + eachline[cur_line].col, L"%*.*s", 0, width, nullstr);
if (cur_line <= height){
/* use spaces instead of the nullstr for all the "children" of a NULL node */
recur_swprintf(NULL, cur_line, L" ");
recur_swprintf(NULL, cur_line, L" ");
}
else
return;
}
else{
recur_swprintf(ST->left, cur_line, nullstr);
recur_swprintf(ST->right, cur_line, nullstr);
swprintf(wcharbuf[cur_line] + eachline[cur_line].col - 1, L"("DTYPE_PRINTF"",
spacesbefore, 1, ST->data);
//swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 1, L")");
swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 2, L")");
}
}
void call_recur(tree *tr){ // nested function, GCC extension! (wraps recur_swprintf())
recur_swprintf(tr->root, -1, L"NULL");
}
call_recur(T);
/* Omit empty columns: */
int omit_cols(void){ // nested function, GCC extension!
int col;
for (col=0; col<RECOMMENDED_CONS_WIDTH; ++col)
for (line=0; line <= height+1; ++line)
if (wcharbuf[line][col] != ' ' && wcharbuf[line][col] != '\0')
return col;
return 0;
}
/* Use fputwc to transfer the character array to the screen: */
j = omit_cols() - 2;
j = (j < 0) ? 0 : j;
for (line=0; line <= height+1; ++line){ // assumes RECOMMENDED_CONS_WIDTH console window!
fputwc('\n', stdout); // optional blanc line
for (i=j; i<j+RECOMMENDED_CONS_WIDTH && i<WCHARBUF_COLMS; ++i)
fputwc(wcharbuf[line][i], stdout);
fputwc('\n', stdout);
}
}
(also uploaded to a pastebin service, in order to preserve syntax highlighting)
It works quite well, although the automatic width setting could be better. The preprocessor magic is a bit silly (or even ugly) and not really related to the algorithm, but it allows using various data types in the tree nodes (I saw it as a chance to experiment a bit with the preprocessor - remember, I am a newbie!).
The main program is supposed to call
system("mode con:cols="RECOMMENDED_CONS_WIDTHQ" lines=2000");
before calling prettyprint_tree(), when running inside cmd.exe .
Sample output:
(106.0) (102.0) (109.0) (101.5) NULL (107.0) (115.0) NULL NULL (106.1) NULL (113.0) NULL NULL NULL NULL NULL
Ideally, the output would be like this (the reason I'm using the wprintf() family of functions is being able to print Unicode characters anyway):
(107.0) ┌─────┴─────┐ (106.1) NULL ┌───┴───┐ NULL NULL
So, my questions:
Thank you in advance for your responses!
PS. The question is not a duplicate of this one.
edit: Jonathan Leffler wrote an excellent answer that will most probably become the "accepted answer" after a few days (unless someone posts something equally awesome!). I decided to respond here instead of commenting because of the space constraints.
bool IsAVL
struct member is anything but unused; just not used in this particular function. I had to copy/paste code from various files and make a lot of changes in order to present the code cited above. That's a problem that I don't know how to solve. I would gladly post the whole program, but it is too large and commented in my mother-tongue (not in English!).-Wall
and -Wextra
enabled. Assertions and debug messages were enabled/disabled automatically depending on the build target. Also I thought that function prototypes weren't needed for nested functions, after all nested functions do not implement any external interface by definition - GCC certainly didn't complain here. I don't know why there are so many warnings on OSX :(vim
and I use nano
(inside GNU screen) or gedit
instead (shoot me)! In any case, I prefer the K&R brace style :)L' '
advice!This is my first "large-ish" (and non-trivial) program and I'm really thankful for your advice.
edit #2:
Here is an implementation of the "quick and dirty" method mentioned here.
(edit #3: I decided to split it to a separate answer, since it is a valid answer to the OP.)
Many responses mentioned Graphviz. I already knew about it (many Linux apps are linked against it) but I thought it would be overkill for a 10KB CLI executable. However, I'll keep it in mind for the future. It seems great.
You need to decide on whether your code needs to be portable. If you might ever need to use a compiler other than GCC, the nested functions are lethal to your portability goal. I would not use them - but my portability goals may not be the same as yours.
Your code is missing <wchar.h>
; it compiles fairly cleanly without it - GCC complained about missing prototypes for your non-static functions and for swprintf()
and fputwc()
), but adding <wchar.h>
generates a lot of serious warnings related to swprintf()
; they are actually diagnosing a bug.
gcc -O -I/Users/jleffler/inc -std=c99 -Wall -Wextra -Wmissing-prototypes \
-Wstrict-prototypes -Wold-style-definition -c tree.c
tree.c:88:6: warning: no previous prototype for ‘prettyprint_tree’
tree.c: In function ‘prettyprint_tree’:
tree.c:143:10: warning: no previous prototype for ‘recur_swprintf’
tree.c: In function ‘recur_swprintf’:
tree.c:156:17: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:156:17: error: too few arguments to function ‘swprintf’
/usr/include/wchar.h:135:5: note: declared here
tree.c:160:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:174:22: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:174:22: warning: passing argument 3 of ‘swprintf’ makes pointer from integer without a cast
/usr/include/wchar.h:135:5: note: expected ‘const wchar_t * restrict’ but argument is of type ‘int’
tree.c:177:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:177:13: error: too few arguments to function ‘swprintf’
/usr/include/wchar.h:135:5: note: declared here
tree.c: In function ‘prettyprint_tree’:
tree.c:181:10: warning: no previous prototype for ‘call_recur’
tree.c:188:9: warning: no previous prototype for ‘omit_cols’
(This is GCC 4.5.2 on MacOS X 10.6.5.)
swprintf()
; it is more like snprintf()
than sprintf()
(which is A Good Thing™!).The overall idea is interesting. I suggest choosing one representation when submitting your code for analysis, and cleaning up anything that is not relevant to the code analysis. For example, the arraystr
type is defined but unused - you don't want to let people like me get cheap shots at your code. Similarly with the unused structure members; don't even leave them as comments, even if you might want to keep them in the code in your VCS (though why?). You are using a version control system (VCS), aren't you? And that's a rhetorical question - if you aren't using a VCS, start using one now, before you lose something you value.
Design-wise, you want to avoid doing things like requiring the main program to run an obscure system()
command - your code should take care of such issues (maybe with an initializer function, and perhaps a finalizer function to undo the changes made to the terminal settings).
One more reason not to like nested functions: I can't work out how to get a declaration of the function in place. What seemed like plausible alternatives did not work - but I didn't go and read the GCC manual on them.
Minor nit: you can tell people who do not use 'vi' or 'vim' to edit - they don't put the opening brace of a function in column 1. In 'vi', the opening brace in column 1 gives you an easy way to the start of a function from anywhere inside it ('[[' to jump backwards; ']]' to jump to the start of the next function).
Don't disable assertions.
Do include a main program and the relevant test data - it means people can test your code, instead of just compiling it.
Use wide-character constants instead of casts:
wcharbuf[i][j] = (wchar_t)' ';
wcharbuf[i][j] = L' ';
Your code creates a big screen image (20 lines x 800 columns in the code) and fills in the data to be printed. That's a reasonable way to do it. With care, you could arrange to handle the line-drawing characters. However, I think you would need to rethink the core drawing algorithms. You would probably want to encapsulate the whole of the drawing code so that the screen image and related information is in a single structure, which can be passed by reference (pointer) to functions. You'd have a set of functions to draw various bits at positions your tree-searching code designates. You would have a function to draw the data value at an appropriate position; you would have a function to draw lines at appropriate positions. You would probably not have nested functions - it is, to my eyes, far harder to read the code when there's a function nested inside another. Making functions static is good; make the nested functions into static (non-nested) functions. Give them the context they need - hence the encapsulation of the screen image.
Request for information on encapsulation...
You could use a structure such as:
typedef struct columninfo Colinfo;
typedef struct Image
{
wchar_t image[WCHARBUF_LINES][WCHARBUF_COLUMNS];
Colinfo eachline[WCHARBUF_LINES];
} Image;
Image image;
You might find it convenient and/or sensible to add some extra members; that would show up during the implementation. You might then create a function:
void format_node(Image *image, int line, int column, DTYPE value)
{
...
}
You could also make some of the constants, such as spacesafter into enum values:
enum { spacesafter = 2 };
These can then be used by any of the functions.
Coding style: The prettyprint_tree()
function juggles too much computation and data to be comfortable to read. Initialization and printing of the image buffer can for example be placed in separate functions and the width
computation also. I am sure you can write a formula with log
to replace the
width = (max < 10) ? 1 :
(max < 100) ? 2 :
(max < 1000) ? 3 :
...
computation.
I am not used to reading nested functions and C, which makes it much harder for me to scan your code. Unless you don't share your code with others or have ideological reasons for tying the code to GCC, I wouldn't use those extensions.
Algorithm: For a quick and dirty pretty-printer, written in C, I would never use your style of layout. In comparison to your algorithm, it is a no-brainer to write an in-order traversal to print
a
/ \
b c
as
c
a
b
and I don't mind having to tilt my head. For anything prettier than that I would much rather emit
digraph g { a -> b; a -> c; }
and leave it to dot to do the formatting.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With