Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Grouping sets of 4 bits into nybbles in C printf

C23 added support for binary literals (0xb10100110) and for grouping them for human readability using a separator character ('). This is great for input, but how does one change the printf grouping to sets of four bits (a hexadecimal "digit") instead of three bits (an octal digit)?

#include <stdio.h>
#include <locale.h>

int main() {
    setlocale(LC_ALL, "en_US");
    unsigned long long int i = 0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001;
    printf("binary: %'#0*llb\n", (int)sizeof(i) * 8, i); 
    return 0;
}

I would like the output to look like this:

0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001

not like this:

0b1,000,100,100,010,000,100,001,111,010,010,110,001,011,011,000,001,110,010,110,001

Update: From the answers given so far, it appears C (as of 2025) does not have a natural way to do what I'm asking. Most of the proposed answers require explicitly calling a special function to convert the number into a string. I'm hoping there exists a more transparent solution, even if that means using a GNU extension.

like image 855
hackerb9 Avatar asked Oct 22 '25 05:10

hackerb9


2 Answers

Digit grouping is not a standard feature in the printf family of functions in Standard C. It is a POSIX extension supported by the GNU libc and other unix libraries. It applies to decimal integer conversions (%i, %d and %u) and the integral portion of floating point conversions (%f, %g and %G).

The number of digits in each group cannot be specified on an conversion basis nor depending on the base or the group number, it is specified in the locale.

The thousands separator is specified in the locale as well as the decimal separator that will be used for floating point conversions. The purpose is the conversion of currency amounts consistent with the local culture. Note that in some cases, the number of digits is not the same for all groups (eg: the Indian numbering system), so this feature is only half-baked.

This feature does not meet your goal as it does not apply to hexadecimal or binary conversions and modifying the locale definition is risky anyway.

Here is a simple function to format integers in different bases where you can specify both the grouping number and the separator.

#include <limits.h>
#include <stdio.h>

/* Convert an integer with parameterized grouping
 * return -1 if base is invalid
 * returns the length of the output without truncation
 */
int format_ull(char *dest,              /* destination array */
               size_t size,             /* array length */
               unsigned long long n,    /* value to convert */
               int base,                /* output radix, 0 or 2..36 */
               int mindigits,           /* minimum number of digits */
               int grouping,            /* group length, 0 for no groups */
               int sep)                 /* separator character */
{
    char buf[sizeof(n) * CHAR_BIT];
    char *p = buf + sizeof(buf);
    const char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
    int ndigits, nzeroes, phase;
    size_t pos, len;

    if (base < 2 || base > 36) {
        if (base == 0)
            base = 10;
        else
            return -1;
    }
    while (n) {
        *--p = digits[n % (unsigned)base];
        n = n / (unsigned)base;
    }
    ndigits = buf + sizeof(buf) - p;
    nzeroes = mindigits > ndigits ? mindigits - ndigits : 0;
    ndigits += nzeroes;
    if (grouping <= 0 || ndigits <= grouping) {
        len = phase = ndigits;
    } else {
        phase = (ndigits + grouping - 1) % grouping + 1;
        len = ndigits + (ndigits - 1) / grouping;
    }
    if (size > 0) {
        size--;
        if (size > len)
            size = len;
        for (pos = 0; pos < size; pos++) {
            if (phase-- > 0) {
                dest[pos] = (char)((nzeroes-- > 0) ? '0' : *p++);
            } else {
                phase = grouping;
                dest[pos] = (char)sep;
            }
        }
        dest[pos] = '\0';
    }
    return (int)len;
}

#define TEST(n)  test(n, #n)
void test(unsigned long long n, const char *source) {
    char buf[100];
    int nbits = sizeof(n) * CHAR_BIT;
    int len;
    printf("source:    %s\n", source);
    printf("normal:    %llu\n", n);
    format_ull(buf, sizeof buf, n, 10, 1, 3, ',');
    printf("base 10/3: %s\n", buf);
    len = format_ull(buf, sizeof buf, n, 8, 1, 3, '\'');
    printf("base 8/3:  %.*s%s\n", (*buf != '0') * (1 + (len % 4 == 3)), "0'", buf);
    format_ull(buf, sizeof buf, n, 16, nbits / 4, 4, '\'');
    printf("base 16/4: 0x%s\n", buf);
    format_ull(buf, sizeof buf, n, 2, nbits, 8, '\'');
    printf("base 2/8:  0b%s\n", buf);
    printf("\n");
}

int main(void) {
    TEST(0);
    TEST(ULLONG_MAX);
    TEST(0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001);
    return 0;
}

Output:

source:    0
normal:    0
base 10/3: 0
base 8/3:  0
base 16/4: 0x0000'0000'0000'0000
base 2/8:  0b00000000'00000000'00000000'00000000'00000000'00000000'00000000'00000000

source:    ULLONG_MAX
normal:    18446744073709551615
base 10/3: 18,446,744,073,709,551,615
base 8/3:  01'777'777'777'777'777'777'777
base 16/4: 0xffff'ffff'ffff'ffff
base 2/8:  0b11111111'11111111'11111111'11111111'11111111'11111111'11111111'11111111

source:    0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001
normal:    1234567890987654321
base 10/3: 1,234,567,890,987,654,321
base 8/3:  0'104'420'417'226'133'016'261
base 16/4: 0x1122'10f4'b16c'1cb1
base 2/8:  0b00010001'00100010'00010000'11110100'10110001'01101100'00011100'10110001

For your specific purpose, here is a simpler version for binary output grouped in sets of 4 bits. It uses a static buffer so it is not reentrant and can only be used once per printf call:

#include <limits.h>
#include <stdio.h>

/* Convert an integer to binary, grouping digits in sets of 4
 */
const char *format_bin4(int prefix, int min_digits, unsigned long long n) {
    static char buf[2 + sizeof(n) * CHAR_BIT * 5 / 4 + 1];
    char *p = buf + sizeof(buf);
    int group = 4;
    int i;

    if (n == 0) prefix--;
    *--p = '\0';
    for (i = 0; p > buf + 2 && (i < min_digits || n != 0); i++) {
        if (!group--) {
            *--p = '\'';
            group = 3;
        }
        *--p = '0' + (n & 1);
        n >>= 1;
    }
    if (prefix > 0) {
        *--p = 'b';
        *--p = '0';
    }
    return p;
}

int main(void) {
    unsigned long long x =  0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001;
    const char *x_source = "0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001";
    printf("         0,  0: %s\n", format_bin4(1, 0, 0));
    printf("         0,  1: %s\n", format_bin4(1, 1, 0));
    printf("         0,  2: %s\n", format_bin4(1, 2, 0));
    printf("         0, 64: %s\n", format_bin4(1, 64, 0));
    printf("ULLONG_MAX, 64: %s\n", format_bin4(1, 64, ULLONG_MAX));
    printf("  x source, 64: %s\n", x_source);
    printf("  x format, 64: %s\n", format_bin4(1, 64, x));
    return 0;
}

Output:

         0,  0:
         0,  1: 0
         0,  2: 00
         0, 64: 0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000'0000
ULLONG_MAX, 64: 0b1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111'1111
  x source, 64: 0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001
  x format, 64: 0b0001'0001'0010'0010'0001'0000'1111'0100'1011'0001'0110'1100'0001'1100'1011'0001
like image 160
chqrlie Avatar answered Oct 23 '25 21:10

chqrlie


#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbit.h>
/**
nibbles(str, u)
does the same job as sprintf(str, u)
but groups nibbles
str : where to store nibbles, 
u : number to convert to binary string
returns str
*/
char *
nibbles(char *str, unsigned long u)
{
    // case u = 0
    if(u == 0) {*str='0'; str[1] = '\0'; return str; }
    /*
    compute te length in digits of a unsigned long 
    minus the count of leading zeros
    */
    //unsigned int len = sizeof(u)*8 - __builtin_clzl(u) - 1; 
    unsigned int len = stdc_bit_width(u) - 1; 
    int i = 4 - len % 4;  // before the first quote
    char *p = str;
    for(unsigned long v = 1UL << len; v; v >>= 1)
    {
        *p++ = v & u ? '1' : '0';  // bit value
        if(v > 1UL && i++ % 4 == 0) *p++ = '\'';  // add '\''
    }
    *p = '\0'; // end str                             
    return str;
}

int main() 
{
    unsigned long x;
    char s[81];
    puts("input the unsigned long to convert\nq to quit");
    while(printf("> ") && scanf("%ld", &x) == 1) 
    {
        printf("standard C23 output:%32lb\n", x);
        printf("grouping nibbles:   %32s\n", nibbles(s, x)); 
    }
}

Outputs :

input the unsigned long to convert
q to quit
> 123
standard C23 output:                         1111011
grouping nibbles:                           111'1011
> 65231
standard C23 output:                1111111011001111
grouping nibbles:                1111'1110'1100'1111
> q

Thank you chux for the comment. I hope I improved my source.

like image 36
marnout Avatar answered Oct 23 '25 21:10

marnout



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!