You can use the strtok()
function to split a string (and specify the delimiter to use). Note that strtok()
will modify the string passed into it. If the original string is required elsewhere make a copy of it and pass the copy to strtok()
.
EDIT:
Example (note it does not handle consecutive delimiters, "JAN,,,FEB,MAR" for example):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
char** str_split(char* a_str, const char a_delim)
{
char** result = 0;
size_t count = 0;
char* tmp = a_str;
char* last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char** tokens;
printf("months=[%s]\n\n", months);
tokens = str_split(months, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("month=[%s]\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}
Output:
$ ./main.exe
months=[JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]
month=[JAN]
month=[FEB]
month=[MAR]
month=[APR]
month=[MAY]
month=[JUN]
month=[JUL]
month=[AUG]
month=[SEP]
month=[OCT]
month=[NOV]
month=[DEC]
I think strsep
is still the best tool for this:
while ((token = strsep(&str, ","))) my_fn(token);
That is literally one line that splits a string.
The extra parentheses are a stylistic element to indicate that we're intentionally testing the result of an assignment, not an equality operator ==
.
For that pattern to work, token
and str
both have type char *
. If you started with a string literal, then you'd want to make a copy of it first:
// More general pattern:
const char *my_str_literal = "JAN,FEB,MAR";
char *token, *str, *tofree;
tofree = str = strdup(my_str_literal); // We own str's memory now.
while ((token = strsep(&str, ","))) my_fn(token);
free(tofree);
If two delimiters appear together in str
, you'll get a token
value that's the empty string. The value of str
is modified in that each delimiter encountered is overwritten with a zero byte - another good reason to copy the string being parsed first.
In a comment, someone suggested that strtok
is better than strsep
because strtok
is more portable. Ubuntu and Mac OS X have strsep
; it's safe to guess that other unixy systems do as well. Windows lacks strsep
, but it has strbrk
which enables this short and sweet strsep
replacement:
char *strsep(char **stringp, const char *delim) {
if (*stringp == NULL) { return NULL; }
char *token_start = *stringp;
*stringp = strpbrk(token_start, delim);
if (*stringp) {
**stringp = '\0';
(*stringp)++;
}
return token_start;
}
Here is a good explanation of strsep
vs strtok
. The pros and cons may be judged subjectively; however, I think it's a telling sign that strsep
was designed as a replacement for strtok
.
String tokenizer this code should put you in the right direction.
int main(void) {
char st[] ="Where there is will, there is a way.";
char *ch;
ch = strtok(st, " ");
while (ch != NULL) {
printf("%s\n", ch);
ch = strtok(NULL, " ,");
}
getch();
return 0;
}
Method below will do all the job (memory allocation, counting the length) for you. More information and description can be found here - Implementation of Java String.split() method to split C string
int split (const char *str, char c, char ***arr)
{
int count = 1;
int token_len = 1;
int i = 0;
char *p;
char *t;
p = str;
while (*p != '\0')
{
if (*p == c)
count++;
p++;
}
*arr = (char**) malloc(sizeof(char*) * count);
if (*arr == NULL)
exit(1);
p = str;
while (*p != '\0')
{
if (*p == c)
{
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
token_len = 0;
i++;
}
p++;
token_len++;
}
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
i = 0;
p = str;
t = ((*arr)[i]);
while (*p != '\0')
{
if (*p != c && *p != '\0')
{
*t = *p;
t++;
}
else
{
*t = '\0';
i++;
t = ((*arr)[i]);
}
p++;
}
return count;
}
How to use it:
int main (int argc, char ** argv)
{
int i;
char *s = "Hello, this is a test module for the string splitting.";
int c = 0;
char **arr = NULL;
c = split(s, ' ', &arr);
printf("found %d tokens.\n", c);
for (i = 0; i < c; i++)
printf("string #%d: %s\n", i, arr[i]);
return 0;
}
Here is my two cents:
int split (const char *txt, char delim, char ***tokens)
{
int *tklen, *t, count = 1;
char **arr, *p = (char *) txt;
while (*p != '\0') if (*p++ == delim) count += 1;
t = tklen = calloc (count, sizeof (int));
for (p = (char *) txt; *p != '\0'; p++) *p == delim ? *t++ : (*t)++;
*tokens = arr = malloc (count * sizeof (char *));
t = tklen;
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
while (*txt != '\0')
{
if (*txt == delim)
{
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
txt++;
}
else *p++ = *txt++;
}
free (tklen);
return count;
}
Usage:
char **tokens;
int count, i;
const char *str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
count = split (str, ',', &tokens);
for (i = 0; i < count; i++) printf ("%s\n", tokens[i]);
/* freeing tokens */
for (i = 0; i < count; i++) free (tokens[i]);
free (tokens);
In the above example, there would be a way to return an array of null terminated strings (like you want) in place in the string. It would not make it possible to pass a literal string though, as it would have to be modified by the function:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char** str_split( char* str, char delim, int* numSplits )
{
char** ret;
int retLen;
char* c;
if ( ( str == NULL ) ||
( delim == '\0' ) )
{
/* Either of those will cause problems */
ret = NULL;
retLen = -1;
}
else
{
retLen = 0;
c = str;
/* Pre-calculate number of elements */
do
{
if ( *c == delim )
{
retLen++;
}
c++;
} while ( *c != '\0' );
ret = malloc( ( retLen + 1 ) * sizeof( *ret ) );
ret[retLen] = NULL;
c = str;
retLen = 1;
ret[0] = str;
do
{
if ( *c == delim )
{
ret[retLen++] = &c[1];
*c = '\0';
}
c++;
} while ( *c != '\0' );
}
if ( numSplits != NULL )
{
*numSplits = retLen;
}
return ret;
}
int main( int argc, char* argv[] )
{
const char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char* strCpy;
char** split;
int num;
int i;
strCpy = malloc( strlen( str ) * sizeof( *strCpy ) );
strcpy( strCpy, str );
split = str_split( strCpy, ',', &num );
if ( split == NULL )
{
puts( "str_split returned NULL" );
}
else
{
printf( "%i Results: \n", num );
for ( i = 0; i < num; i++ )
{
puts( split[i] );
}
}
free( split );
free( strCpy );
return 0;
}
There is probably a neater way to do it, but you get the idea.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
/**
* splits str on delim and dynamically allocates an array of pointers.
*
* On error -1 is returned, check errno
* On success size of array is returned, which may be 0 on an empty string
* or 1 if no delim was found.
*
* You could rewrite this to return the char ** array instead and upon NULL
* know it's an allocation problem but I did the triple array here. Note that
* upon the hitting two delim's in a row "foo,,bar" the array would be:
* { "foo", NULL, "bar" }
*
* You need to define the semantics of a trailing delim Like "foo," is that a
* 2 count array or an array of one? I choose the two count with the second entry
* set to NULL since it's valueless.
* Modifies str so make a copy if this is a problem
*/
int split( char * str, char delim, char ***array, int *length ) {
char *p;
char **res;
int count=0;
int k=0;
p = str;
// Count occurance of delim in string
while( (p=strchr(p,delim)) != NULL ) {
*p = 0; // Null terminate the deliminator.
p++; // Skip past our new null
count++;
}
// allocate dynamic array
res = calloc( 1, count * sizeof(char *));
if( !res ) return -1;
p = str;
for( k=0; k<count; k++ ){
if( *p ) res[k] = p; // Copy start of string
p = strchr(p, 0 ); // Look for next null
p++; // Start of next string
}
*array = res;
*length = count;
return 0;
}
char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";
int main() {
char **res;
int k=0;
int count =0;
int rc;
rc = split( str, ',', &res, &count );
if( rc ) {
printf("Error: %s errno: %d \n", strerror(errno), errno);
}
printf("count: %d\n", count );
for( k=0; k<count; k++ ) {
printf("str: %s\n", res[k]);
}
free(res );
return 0;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With