Question 1: When url is downloaded using libcurl, how to preserve the original name of downloaded file ? LibCurl asks programmer to generate filename. Which is maybe easy when URL has the e.g. in below url its easy to figure out target name is vimqrc.pdf.
http://tnerual.eriogerg.free.fr/vimqrc.pdf)
but when URL is dynamically generating target name e.g.below URL downloads AdbeRdr1010_eu_ES.exe. with wget (no arguments except URL) and curl (argument -O)
http://get.adobe.com/reader/download/?installer=Reader_10.1_Basque_for_Windows&standalone=1%22
How does curl (-O) or wget figures out name of
//invoked as ./a.out <URL>
#include <stdio.h>
#include <curl/curl.h>
char *location = "/tmp/test/out";
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}
int main(int argc, char *argv[])
{
CURL *curl;
CURLcode res;
int ret = -1;
if (argc!= 2) {
//invoked as ./a.out <URL>
return -1;
}
curl = curl_easy_init();
if (!curl) {
goto bail;
}
FILE *fp = fopen(location, "wb");
curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //invoked as ./a.out <URL>
/* example.com is redirected, so we tell libcurl to follow redirection */
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
/* Perform the request, res will get the return code */
res = curl_easy_perform(curl);
/* Check for errors */
if(res != CURLE_OK)
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
fclose(fp);
bail:
return ret;
}
I found the answer in libcurl source code. Looks like "remote name" is part of the "content-disposition" tag from the header. Libcurl is parsing header and looking for "filename=" in the content-disposition tag. This parsing is done in callback provided through CURLOPT_HEADERFUNCTION option. Finally, in a callback for writing data (provided through CURLOPT_WRITEFUNCTION) this remote name is used to create output file.
If file name is missing, its simply figuring it out from URL itself. This is pretty much code copied from lib curl and little modifications of my own to make it simpler and match my requirement.
#define _GNU_SOURCE
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>
typedef unsigned long uint64_t;
typedef struct {
char dnld_remote_fname[4096];
char dnld_url[4096];
FILE *dnld_stream;
FILE *dbg_stream;
uint64_t dnld_file_sz;
} dnld_params_t;
static int get_oname_from_cd(char const*const cd, char *oname)
{
char const*const cdtag = "Content-disposition:";
char const*const key = "filename=";
int ret = 0;
char *val = NULL;
/* Example Content-Disposition: filename=name1367; charset=funny; option=strange */
/* If filename is present */
val = strcasestr(cd, key);
if (!val) {
printf("No key-value for \"%s\" in \"%s\"", key, cdtag);
goto bail;
}
/* Move to value */
val += strlen(key);
/* Copy value as oname */
while (*val != '\0' && *val != ';') {
//printf (".... %c\n", *val);
*oname++ = *val++;
}
*oname = '\0';
bail:
return ret;
}
static int get_oname_from_url(char const* url, char *oname)
{
int ret = 0;
char const *u = url;
/* Remove "http(s)://" */
u = strstr(u, "://");
if (u) {
u += strlen("://");
}
u = strrchr(u, '/');
/* Remove last '/' */
u++;
/* Copy value as oname */
while (*u != '\0') {
//printf (".... %c\n", *u);
*oname++ = *u++;
}
*oname = '\0';
return ret;
}
size_t dnld_header_parse(void *hdr, size_t size, size_t nmemb, void *userdata)
{
const size_t cb = size * nmemb;
const char *hdr_str= hdr;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;
char const*const cdtag = "Content-disposition:";
/* Example:
* ...
* Content-Type: text/html
* Content-Disposition: filename=name1367; charset=funny; option=strange
*/
if (strstr(hdr_str, "Content-disposition:")) {
printf ("has c-d: %s\n", hdr_str);
}
if (!strncasecmp(hdr_str, cdtag, strlen(cdtag))) {
printf ("Found c-d: %s\n", hdr_str);
int ret = get_oname_from_cd(hdr_str+strlen(cdtag), dnld_params->dnld_remote_fname);
if (ret) {
printf("ERR: bad remote name");
}
}
return cb;
}
FILE* get_dnld_stream(char const*const fname)
{
char const*const pre = "/tmp/";
char out[4096];
snprintf(out, sizeof(out), "%s/%s", pre, fname);
FILE *fp = fopen(out, "wb");
if (!fp) {
printf ("Could not create file %s\n", out);
}
return fp;
}
size_t write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
{
int ret = 0;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;
if (!dnld_params->dnld_remote_fname[0]) {
ret = get_oname_from_url(dnld_params->dnld_url, dnld_params->dnld_remote_fname);
}
if (!dnld_params->dnld_stream) {
dnld_params->dnld_stream = get_dnld_stream(dnld_params->dnld_remote_fname);
}
ret = fwrite(buffer, sz, nmemb, dnld_params->dnld_stream);
if (ret == (sz*nmemb)) {
dnld_params->dnld_file_sz += ret;
}
return ret;
}
int download_url(char const*const url)
{
CURL *curl;
int ret = -1;
CURLcode cerr = CURLE_OK;
dnld_params_t dnld_params;
memset(&dnld_params, 0, sizeof(dnld_params));
strncpy(dnld_params.dnld_url, url, strlen(url));
curl = curl_easy_init();
if (!curl) {
goto bail;
}
cerr = curl_easy_setopt(curl, CURLOPT_URL, url);
if (cerr) { printf ("%s: failed with err %d\n", "URL", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dnld_header_parse);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_HEADERDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER DATA", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
if (cerr) { printf ("%s: failed with err %d\n", "WR CB", cerr); goto bail;}
cerr = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "WR Data", cerr); goto bail;}
cerr = curl_easy_perform(curl);
if(cerr != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(cerr));
}
printf ("Remote name: %s\n", dnld_params.dnld_remote_fname);
fclose(dnld_params.dnld_stream);
/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
printf ("file size : %lu\n", dnld_params.dnld_file_sz);
bail:
return ret;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
printf ("Bad args\n");
return -1;
}
return download_url(argv[1]);
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With