Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

libcurl: How to download url with original filename? (equivalent for "-O/--remote-name" )

Question 1: When url is downloaded using libcurl, how to preserve the original name of downloaded file ? LibCurl asks programmer to generate filename. Which is maybe easy when URL has the e.g. in below url its easy to figure out target name is vimqrc.pdf.

 http://tnerual.eriogerg.free.fr/vimqrc.pdf)  

but when URL is dynamically generating target name e.g.below URL downloads AdbeRdr1010_eu_ES.exe. with wget (no arguments except URL) and curl (argument -O)

http://get.adobe.com/reader/download/?installer=Reader_10.1_Basque_for_Windows&standalone=1%22

How does curl (-O) or wget figures out name of

//invoked as ./a.out <URL>

#include <stdio.h>
#include <curl/curl.h>

char *location = "/tmp/test/out";

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
    size_t written = fwrite(ptr, size, nmemb, stream);
    return written;
}

int main(int argc, char *argv[])
{
    CURL        *curl;
    CURLcode    res;
    int         ret = -1;


    if (argc!= 2) {
        //invoked as ./a.out <URL>
        return -1;
    } 

    curl = curl_easy_init();
    if (!curl) {
        goto bail;
    }

    FILE *fp = fopen(location, "wb");
    curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //invoked as ./a.out <URL>
    /* example.com is redirected, so we tell libcurl to follow redirection */
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);

    /* Perform the request, res will get the return code */
    res = curl_easy_perform(curl);
    /* Check for errors */
    if(res != CURLE_OK)
        fprintf(stderr, "curl_easy_perform() failed: %s\n",
                curl_easy_strerror(res));

    /* always cleanup */
    curl_easy_cleanup(curl);
    ret = 0;
    fclose(fp);

bail:
    return ret;
}
like image 742
bladeWalker Avatar asked Aug 29 '14 21:08

bladeWalker


1 Answers

I found the answer in libcurl source code. Looks like "remote name" is part of the "content-disposition" tag from the header. Libcurl is parsing header and looking for "filename=" in the content-disposition tag. This parsing is done in callback provided through CURLOPT_HEADERFUNCTION option. Finally, in a callback for writing data (provided through CURLOPT_WRITEFUNCTION) this remote name is used to create output file.

If file name is missing, its simply figuring it out from URL itself. This is pretty much code copied from lib curl and little modifications of my own to make it simpler and match my requirement.

#define _GNU_SOURCE 
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>

typedef unsigned long uint64_t;
typedef struct {
    char        dnld_remote_fname[4096];
    char        dnld_url[4096]; 
    FILE        *dnld_stream;
    FILE        *dbg_stream;
    uint64_t    dnld_file_sz;
} dnld_params_t;

static int get_oname_from_cd(char const*const cd, char *oname)
{
    char    const*const cdtag   = "Content-disposition:";
    char    const*const key     = "filename=";
    int     ret                 = 0;
    char    *val                = NULL;

    /* Example Content-Disposition: filename=name1367; charset=funny; option=strange */

    /* If filename is present */
    val = strcasestr(cd, key);
    if (!val) {
        printf("No key-value for \"%s\" in \"%s\"", key, cdtag);
        goto bail;
    }

    /* Move to value */
    val += strlen(key);

    /* Copy value as oname */
    while (*val != '\0' && *val != ';') {
        //printf (".... %c\n", *val);
        *oname++ = *val++;
    }
    *oname = '\0';

bail:
    return ret;
}

static int get_oname_from_url(char const* url, char *oname)
{
    int         ret = 0;
    char const  *u  = url;

    /* Remove "http(s)://" */
    u = strstr(u, "://");
    if (u) {
        u += strlen("://");
    }

    u = strrchr(u, '/');

    /* Remove last '/' */
    u++;

    /* Copy value as oname */
    while (*u != '\0') {
        //printf (".... %c\n", *u);
        *oname++ = *u++;
    }
    *oname = '\0';

    return ret;
}

size_t dnld_header_parse(void *hdr, size_t size, size_t nmemb, void *userdata)
{
    const   size_t  cb      = size * nmemb;
    const   char    *hdr_str= hdr;
    dnld_params_t *dnld_params = (dnld_params_t*)userdata;
    char const*const cdtag = "Content-disposition:";

    /* Example: 
     * ...
     * Content-Type: text/html
     * Content-Disposition: filename=name1367; charset=funny; option=strange
     */
    if (strstr(hdr_str, "Content-disposition:")) {
        printf ("has c-d: %s\n", hdr_str);
    }

    if (!strncasecmp(hdr_str, cdtag, strlen(cdtag))) {
        printf ("Found c-d: %s\n", hdr_str);
        int ret = get_oname_from_cd(hdr_str+strlen(cdtag), dnld_params->dnld_remote_fname);
        if (ret) {
            printf("ERR: bad remote name");
        }
    }

    return cb;
}

FILE* get_dnld_stream(char const*const fname)
{
    char const*const pre = "/tmp/";
    char out[4096];

    snprintf(out, sizeof(out), "%s/%s", pre, fname);

    FILE *fp = fopen(out, "wb");
    if (!fp) {
        printf ("Could not create file %s\n", out);
    }

    return fp;
}

size_t write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
{
    int ret = 0;
    dnld_params_t *dnld_params = (dnld_params_t*)userdata;

    if (!dnld_params->dnld_remote_fname[0]) {
        ret = get_oname_from_url(dnld_params->dnld_url, dnld_params->dnld_remote_fname);
    }

    if (!dnld_params->dnld_stream) {
        dnld_params->dnld_stream = get_dnld_stream(dnld_params->dnld_remote_fname);
    }

    ret = fwrite(buffer, sz, nmemb, dnld_params->dnld_stream);
    if (ret == (sz*nmemb)) {
       dnld_params->dnld_file_sz += ret;
    }
    return ret;
}


int download_url(char const*const url)
{
    CURL        *curl;
    int         ret = -1;
    CURLcode    cerr = CURLE_OK;
    dnld_params_t dnld_params;

    memset(&dnld_params, 0, sizeof(dnld_params));
    strncpy(dnld_params.dnld_url, url, strlen(url));

    curl = curl_easy_init();
    if (!curl) {
        goto bail;
    }

    cerr = curl_easy_setopt(curl, CURLOPT_URL, url);
    if (cerr) { printf ("%s: failed with err %d\n", "URL", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dnld_header_parse);
    if (cerr) { printf ("%s: failed with err %d\n", "HEADER", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_HEADERDATA, &dnld_params);
    if (cerr) { printf ("%s: failed with err %d\n", "HEADER DATA", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    if (cerr) { printf ("%s: failed with err %d\n", "WR CB", cerr); goto bail;}

    cerr = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &dnld_params);
    if (cerr) { printf ("%s: failed with err %d\n", "WR Data", cerr); goto bail;}


    cerr = curl_easy_perform(curl);
    if(cerr != CURLE_OK) {
        fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(cerr));
    }

    printf ("Remote name: %s\n", dnld_params.dnld_remote_fname);
    fclose(dnld_params.dnld_stream);

    /* always cleanup */
    curl_easy_cleanup(curl);
    ret = 0;
    printf ("file size : %lu\n", dnld_params.dnld_file_sz);

bail:
    return ret;
}

int main(int argc, char *argv[])
{
    if (argc != 2) {
        printf ("Bad args\n");
        return -1;
    }
    return download_url(argv[1]);
}
like image 168
bladeWalker Avatar answered Nov 14 '22 23:11

bladeWalker