Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Read .csv file in C

Tags:

c

parsing

csv

I have a .csv file:

lp;imie;nazwisko;ulica;numer;kod;miejscowosc;telefon;email;data_ur 1;Jan;Kowalski;ul. Nowa;1a;11-234;Budry;123-123-456;[email protected];1980.05.13 2;Jerzy;Nowak;ul. Konopnicka;13a/3;00-900;Lichowice;(55)333-44-55;[email protected];1990.03.23 

And I need to read this in C. I have some code, but only for the connection.

like image 273
Zesss Avatar asked Oct 16 '12 09:10

Zesss


People also ask

Can we read CSV file in C?

Open CSV File using File Pointer in append mode which will place a pointer to the end of the file. Take Input from the user in temporary variables. Use fprintf() and separate variables according to their order and comma.

How do I read a CSV file in C sharp?

In C#, StreamReader class is used to deal with the files. It opens, reads and helps in performing other functions to different types of files. We can also perform different operations on a CSV file while using this class. OpenRead() method is used to open a CSV file and ReadLine() method is used to read its contents.

How do I open a CSV file?

Answer: You can open the CSV file on Google Sheet, Notepad, or OpenOffice Calc. Just right-click on the file, select Open With and pick either OpenOffice Calc or Notepad. To open in Google Sheets, go to the File option in Google Sheet, click import, select the CSV file you want to open, click import.


2 Answers

Hopefully this would get you started

See it live on http://ideone.com/l23He (using stdin)

#include <stdio.h> #include <stdlib.h> #include <string.h>  const char* getfield(char* line, int num) {     const char* tok;     for (tok = strtok(line, ";");             tok && *tok;             tok = strtok(NULL, ";\n"))     {         if (!--num)             return tok;     }     return NULL; }  int main() {     FILE* stream = fopen("input", "r");      char line[1024];     while (fgets(line, 1024, stream))     {         char* tmp = strdup(line);         printf("Field 3 would be %s\n", getfield(tmp, 3));         // NOTE strtok clobbers tmp         free(tmp);     } } 

Output:

Field 3 would be nazwisko Field 3 would be Kowalski Field 3 would be Nowak 
like image 73
sehe Avatar answered Sep 19 '22 21:09

sehe


The following code is in plain c language and handles blank spaces. It only allocates memory once, so one free() is needed, for each processed line.

http://ideone.com/mSCgPM

/* Tiny CSV Reader */ /* Copyright (C) 2015, Deligiannidis Konstantinos  This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.  You should have received a copy of the GNU General Public License along with this program.  If not, see <http://w...content-available-to-author-only...u.org/licenses/>.  */   #include <stdio.h> #include <string.h> #include <stdlib.h>   /* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */ int getcols( const char * const line, const char * const delim, char ***out_storage )  { const char *start_ptr, *end_ptr, *iter; char **out; int i;                                          //For "for" loops in the old c style. int tokens_found = 1, delim_size, line_size;    //Calculate "line_size" indirectly, without strlen() call. int start_idx[100], end_idx[100];   //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6 //Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200"  if ( *out_storage != NULL )                 return -4;  //This SHOULD be NULL: Not Already Allocated if ( !line || !delim )                      return -1;  //NULL pointers Rejected Here if ( (delim_size = strlen( delim )) == 0 )  return -2;  //Delimiter not provided  start_ptr = line;   //Start visiting input. We will distinguish tokens in a single pass, for good performance.                     //Then we are allocating one unified memory region & doing one memory copy. while ( ( end_ptr = strstr( start_ptr, delim ) ) ) {      start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token     end_idx[ tokens_found - 1 ] = end_ptr - line;       //Store Index of first character that will be replaced with                                                         //'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end"     tokens_found++;                                     //Accumulate the count of tokens.     start_ptr = end_ptr + delim_size;                   //Set pointer to the next c-string within the line }  for ( iter = start_ptr; (*iter!='\0') ; iter++ );  start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token: of last token here. end_idx[ tokens_found -1 ] = iter - line;           //and the last element that will be replaced with \0  line_size = iter - line;    //Saving CPU cycles: Indirectly Count the size of *line without using strlen();  int size_ptr_region = (1 + tokens_found)*sizeof( char* );   //The size to store pointers to c-strings + 1 (*NULL). out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 );   //Fit everything there...it is all memory. //It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests. *out_storage = out;     //Update the char** pointer of the caller function.  //"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values. //char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 ); //extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5;  for ( i = 0; i < tokens_found; i++ )    //Assign adresses first part of the allocated memory pointers that point to     out[ i ] = (char*) out + size_ptr_region + start_idx[ i ];  //the second part of the memory, reserved for Data. out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL.                                                     //Now assign the Data: c-strings. (\0 terminated strings): char *str_region = (char*) out + size_ptr_region;   //Region inside allocated memory which contains the String Data. memcpy( str_region, line, line_size );   //Copy input with delimiter characters: They will be replaced with \0.  //Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0' //They are not used in standard c lbraries. for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0';  //"Out of Range" TEST. Wait until Assigned Values are Printed back. //for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n");  // *out memory should now contain (example data): //[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ] //   |__________________________________^           ^              ^             ^ //          |_______________________________________|              |             | //                   |_____________________________________________|      These 5 Bytes should be intact.  return tokens_found; }   int main()  {  char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final"; char delim[] = ";;"; char **columns; int i;  printf("Example1:\n"); columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4;  int cols_found = getcols( in_line, delim, &columns); for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] );  //<- (1st way). // (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] );  free( columns );    //Release the Single Contiguous Memory Space. columns = NULL;     //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc().  printf("\n\nExample2, Nested:\n\n");  char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n"         "1;Sunday;january;2009;123-124-456;[email protected];2015-05-13\n"         "2;Monday;March;2011;(+30)333-22-55;[email protected];2009-05-23";  char **rows; int j;  rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks e.t.c).  getcols( example_file, "\n", &rows); for ( i = 0; rows[ i ]; i++) {     {         printf("Line[ %d ] = %s\n", i, rows[ i ] );         char **columnX = NULL;         getcols( rows[ i ], ";", &columnX);         for ( j = 0; columnX[ j ]; j++) printf("  Col[ %d ] = %s\n", j, columnX[ j ] );         free( columnX );     } }  free( rows ); rows = NULL;  return 0; } 
like image 41
user5175223 Avatar answered Sep 19 '22 21:09

user5175223