Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

cURL: Handle multiple asynchronous requests

I've never really done anything multithreaded or asynchronous in c++, I only used cURL to do single synchronous requests so far.
In order to better visualize what I'm trying to do, I wrote a simple Javascript which would do what I want to do with cURL in C++.

function AddRequest( method, url, data, id ) {
    var httpObj = new ActiveXObject("Msxml2.XMLHTTP.6.0"); //new XMLHttpRequest();
    httpObj.onreadystatechange = function() {
        if (httpObj.readyState == 4)
            ResponseCallback( httpObj, id );
    };
    httpObj.Open( method, url, true );
    httpObj.Send( data );

}

function ResponseCallback( httpObj, id ) {
    WScript.Echo( id ); //alert( id );
    WScript.Echo( httpObj.ResponseText ); //alert( httpObj.ResponseText );
}

//It could now be used like this:

AddRequest("GET","http://example.com/","",1);
AddRequest("GET","https://www.facebook.com","",2);
WScript.Echo( "all requests sent" ); //alert( "all requests sent" );
//these requests are all done at the same time 
//and every time a request has finished it calls the ResponseCallback() function,
//telling it which request has finished

CURL just seems to be COMPLETELY different and unnecessary more complicated than XmlHttpRequest, even though both are just sending http requests...
Here is my first approach (based on the answer of hogren):

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;

bool printing = false; //will allow us to prevent prints overlapping each other

struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};

struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};

size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}

static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;

    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}

struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();

    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;

    string _url(url);
    string _method(method);
    string _proxy(proxy);

    struct curl_slist *headerList = NULL;
    string headerString;

    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (auto header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << '\n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects

    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body

    curl_easy_perform(curl); //send the request

    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code

    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();

    curl_easy_cleanup(curl);

    return response;
}

static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;

    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now

    struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);

    ResponseCallback(id,&response);

    pthread_exit(0);
    return NULL;
}

int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;

    pthread_t threads[3];
    struct requestStruct reqArguments[3];

    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";

    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.facebook.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.facebook.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments

    getchar(); //prevent console from closing instantly
    return 0;
}

I'm not really sure if I'm doing the whole pthread thing correctly..
There are some issues:
1. For some reason only the first request succeeds the others aren't even sent.
UNLESS I uncomment the first 4 lines of the main function which will do a direct request without a new thread, but I obviously don't want to use that code.
2. The HttpRequest() function doesn't return the response html code properly, I only receive garbage.
I think issue 2 might be a pointer related issue with the return struct of HttpRequest(), but I wasn't able to fix it. :(
3. My last and not that important problem is that I don't know how to receive the response headers and put them in a map.
Btw: I'm compiling with Visual C++ 2010 and I'm debugging the http traffic with Fiddler.

like image 273
Forivin Avatar asked Nov 01 '22 17:11

Forivin


1 Answers

EDIT : This is your code that I corrected.

There was not really an error. But after several tests, I saw that to launch several curl_perform in the same time cause issues. So I added a delay (5000ms is large, you can reduce it).

And pthread_exit() caused prolems with response error.

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl/curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;

bool printing = false; //will allow us to prevent prints overlapping each other
#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS__) || defined(__TOS_WIN__)

  #include <windows.h>

  inline void delay( unsigned long ms )
    {
    Sleep( ms );
    }

#else  /* presume POSIX */

  #include <unistd.h>

  inline void delay( unsigned long ms )
    {
    usleep( ms * 1000 );
    }

#endif 


struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};

struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};

size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}

static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;

    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}

struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();

    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;

    string _url(url);
    string _method(method);
    string _proxy(proxy);

    struct curl_slist *headerList = NULL;
    string headerString;

    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (std::map<const char*, const char*>::iterator header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << '\n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects

    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
    static int i=0;
    delay(5000*(i++));
    std::cout << "url: " << _url << ";" << std::endl;

    curl_easy_perform(curl); //send the request

    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code

    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();

    curl_easy_cleanup(curl);

    return response;
}

static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;

    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now

      struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);

    ResponseCallback(id,&response);

    /* this code cause trouble (no response code) */
    //pthread_exit(0);
    return NULL;
}

int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;

    pthread_t threads[3];
    struct requestStruct reqArguments[3];

    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";

    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.duckduckgo.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.google.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments

    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments

    //        getchar();
    // that is cleaner
    for (int i=0; i<3; ++i) {
      int rc = pthread_join(threads[i], NULL);
      printf("In main: thread %d is complete\n", i);
    }

    return 0;
}

For the last question about headers, please post an other question on stackoverflow. Because there are yet many subjects in one (I think).

And a little advice, work with objects is very more easy to write and to read code sources.

END EDIT

This is a copy of the official example to make multi-threading with libcurl : http://curl.haxx.se/libcurl/c/multithread.html

/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2011, Daniel Stenberg, <[email protected]>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/ 
/* A multi-threaded example that uses pthreads extensively to fetch
 * X remote files at once */ 

#include <stdio.h>
#include <pthread.h>
#include <curl/curl.h>

#define NUMT 4

/*
  List of URLs to fetch.

  If you intend to use a SSL-based protocol here you MUST setup the OpenSSL
  callback functions as described here:

  http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION

*/ 
const char * const urls[NUMT]= {
  "http://curl.haxx.se/",
  "ftp://cool.haxx.se/",
  "http://www.contactor.se/",
  "www.haxx.se"
};

static void *pull_one_url(void *url)
{
  CURL *curl;

  curl = curl_easy_init();
  curl_easy_setopt(curl, CURLOPT_URL, url);
  curl_easy_perform(curl); /* ignores error */ 
  curl_easy_cleanup(curl);

  return NULL;
}


/*
   int pthread_create(pthread_t *new_thread_ID,
   const pthread_attr_t *attr,
   void * (*start_func)(void *), void *arg);
*/ 

int main(int argc, char **argv)
{
  pthread_t tid[NUMT];
  int i;
  int error;

  /* Must initialize libcurl before any threads are started */ 
  curl_global_init(CURL_GLOBAL_ALL);

  for(i=0; i< NUMT; i++) {
    error = pthread_create(&tid[i],
                           NULL, /* default attributes please */ 
                           pull_one_url,
                           (void *)urls[i]);
    if(0 != error)
      fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i, error);
    else
      fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
  }

  /* now wait for all threads to terminate */ 
  for(i=0; i< NUMT; i++) {
    error = pthread_join(tid[i], NULL);
    fprintf(stderr, "Thread %d terminated\n", i);
  }

  return 0;
}

You can, for a interactive use, transform the urls Array to a vector.

I hope that it will help you !

like image 89
hogren Avatar answered Nov 15 '22 03:11

hogren