Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Read line by line from a socket buffer

I want to write a function that read line by line from a socket buffer obtained from third parameter from read() function from unistd.h header.

I have wrote this:

int sgetline(int fd, char ** out)
{
    int buf_size = 128;
    int bytesloaded = 0;
    char buf[2];
    char * buffer = malloc(buf_size);
    char * newbuf;
    int size = 0;

    assert(NULL != buffer);

    while( read(fd, buf, 1) > 0 )
    {
        strcat(buffer, buf);
        buf[1] = '\0';
        bytesloaded += strlen(buf);
        size = size + buf_size;

        if(buf[0] == '\n')
        {
            *out = buffer; 
            return bytesloaded;
        }

        if(bytesloaded >= size)
        {
            size = size + buf_size;
            newbuf = realloc(buffer, size);

            if(NULL != newbuf)
            {
                buffer = newbuf;
            }
            else 
            {
                printf("sgetline() allocation failed!\n");
                exit(1);
            }
        }
    }

    *out = buffer;
    return bytesloaded;
}

but I have some problems with this function, for example, if the input is something like:

HTTP/1.1 301 Moved Permanently\r\n
Cache-Control:no-cache\r\n
Content-Length:0\r\n
Location\r\nhttp://bing.com/\r\n
\r\n\r\n

and I do

int sockfd = socket( ... );
//....
char* tbuf;
while(sgetline(sockfd, &tbuf) > 0)
{
    if(strcmp(tbuf,"\r\n\r\n") == 0)
    {
       printf("End of Headers detected.\n");
    }
}

the above C application does not output "End of Header detected.". Why is this, and how can I fix this?

like image 520
Jack Avatar asked Mar 22 '12 18:03

Jack


3 Answers

It's not OK to read one byte at a time, because you are making too many system calls - better is to use a buffer, read a chunk and check if you got \n. After getting a line, the rest of the bytes read remains in the buffer, so you cannot mix read/recv with read_line. Another version of read n bytes using this kind of buffer can be write...

My version to read a line, and a little example to use it.

#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <string.h>

#define CBSIZE 2048

typedef struct cbuf {
    char buf[CBSIZE];
    int fd;
    unsigned int rpos, wpos;
} cbuf_t;


int read_line(cbuf_t *cbuf, char *dst, unsigned int size)
{
    unsigned int i = 0;
    ssize_t n;
    while (i < size) {
        if (cbuf->rpos == cbuf->wpos) {
            size_t wpos = cbuf->wpos % CBSIZE;
            //if ((n = read(cbuf->fd, cbuf->buf + wpos, (CBSIZE - wpos))) < 0) {
            if((n = recv(cbuf->fd, cbuf->buf + wpos, (CBSIZE - wpos), 0)) < 0) {
                if (errno == EINTR)
                    continue;
                return -1;
            } else if (n == 0)
                return 0;
            cbuf->wpos += n;
        }
        dst[i++] = cbuf->buf[cbuf->rpos++ % CBSIZE];
        if (dst[i - 1] == '\n')
            break;
    }
    if(i == size) {
         fprintf(stderr, "line too large: %d %d\n", i, size);
         return -1;
    }

    dst[i] = 0;
    return i;
}

int main()
{
    cbuf_t *cbuf;
    char buf[512];
    struct sockaddr_in saddr;
    struct hostent *h;
    char *ip;
    char host[] = "www.google.com";

    if(!(h = gethostbyname(host))) {
        perror("gethostbyname");
        return NULL;
    }
    ip = inet_ntoa(*(struct in_addr*)h->h_addr);

    cbuf = calloc(1, sizeof(*cbuf));

    fprintf(stdout, "Connecting to ip: %s\n", ip);
    if((cbuf->fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        perror("socket");
        return 1;
    }
    memset(&saddr, 0, sizeof(saddr));
    saddr.sin_family = AF_INET;
    saddr.sin_port = htons(80);
    inet_aton(ip, &saddr.sin_addr);
    if(connect(cbuf->fd, (struct sockaddr*)&saddr, sizeof(saddr)) < 0) {
        perror("connect");
        return 1;
    }

    snprintf(buf, sizeof(buf), "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n", host);
    write(cbuf->fd, buf, strlen(buf));
    while(read_line(cbuf, buf, sizeof(buf)) > 0) {
        // if it's an empty \r\n on a line, header ends //
        if(buf[0]=='\r' && buf[1] == '\n') {
            printf("------------------------\n");
        }
        printf("[%s]", buf);
    }
    close(cbuf->fd);
    free(cbuf);
    return 0;
}
like image 101
Viorel Avatar answered Nov 20 '22 02:11

Viorel


Try this implementation instead:

int sgetline(int fd, char ** out) 
{ 
    int buf_size = 0; 
    int in_buf = 0; 
    int ret;
    char ch; 
    char * buffer = NULL; 
    char * new_buffer;

    do
    {
        // read a single byte
        ret = read(fd, &ch, 1);
        if (ret < 1)
        {
            // error or disconnect
            free(buffer);
            return -1;
        }

        // has end of line been reached?
        if (ch == '\n') 
            break; // yes

        // is more memory needed?
        if ((buf_size == 0) || (in_buf == buf_size)) 
        { 
            buf_size += 128; 
            new_buffer = realloc(buffer, buf_size); 

            if (!new_buffer) 
            { 
                free(buffer);
                return -1;
            } 

            buffer = new_buffer; 
        } 

        buffer[in_buf] = ch; 
        ++in_buf; 
    } 
    while (true);

    // if the line was terminated by "\r\n", ignore the
    // "\r". the "\n" is not in the buffer
    if ((in_buf > 0) && (buffer[in_buf-1] == '\r'))
        --in_buf;

    // is more memory needed?
    if ((buf_size == 0) || (in_buf == buf_size)) 
    { 
        ++buf_size; 
        new_buffer = realloc(buffer, buf_size); 

        if (!new_buffer) 
        { 
            free(buffer);
            return -1;
        } 

        buffer = new_buffer; 
    } 

    // add a null terminator
    buffer[in_buf] = '\0';

    *out = buffer; // complete line

    return in_buf; // number of chars in the line, not counting the line break and null terminator
}

int sockfd = socket( ... );         
//....         
char* tbuf;         
int ret;

// keep reading until end of headers is detected.
// headers are terminated by a 0-length line
do
{
    // read a single line
    ret = sgetline(sockfd, &tbuf);
    if (ret < 0)
        break; // error/disconnect

    // is it a 0-length line?
    if (ret == 0)
    {
       printf("End of Headers detected.\n");         
       free(tbuf);
       break;
    }

    // tbuf contains a header line, use as needed...

    free(tbuf);
}
while (true);
like image 23
Remy Lebeau Avatar answered Nov 20 '22 01:11

Remy Lebeau


You are making things more difficult for yourself than they need to be. You really don't need to do strcats to get the single character you read on each read added at the current position.

But your bug is that the routine returns as soon as it sees a \n, so the string it returns can never contain anything following the first \n.

like image 2
DRVic Avatar answered Nov 20 '22 02:11

DRVic