Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Error checking when using HttpClient for asynchronous file downloads

This question is a followup to Threading issues when using HttpClient for asynchronous file downloads.

To get a file transfer to complete asynchronously using HttpClient, you need to add HttpCompletionOption.ResponseHeadersRead to the SendAsync request. Thus, when that call completes, you will be able to determine that all was well with the request and the response headers by adding a call to EnsureSuccessStatusCode. However the data is possibly still being transferred at this point.

How can you detect errors which happen after the headers are returned but before the data transfer is complete? How would said errors manifest themselves?

Some example code follows, with the point of the question marked at line 109)with the comment: "// *****WANT TO DO MORE ERROR CHECKING HERE**"

using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;

namespace TestHttpClient2
{
  class Program
  {
    /* Use Yahoo portal to access quotes for stocks - perform asynchronous operations. */

    static string baseUrl = "http://real-chart.finance.yahoo.com/";
    static string requestUrlFormat = "/table.csv?s={0}&d=0&e=1&f=2016&g=d&a=0&b=1&c=1901&ignore=.csv";

    static void Main(string[] args)
    {
      var activeTaskList = new List<Task>();

      string outputDirectory = "StockQuotes";
      if (!Directory.Exists(outputDirectory))
      {
        Directory.CreateDirectory(outputDirectory);
      }

      while (true)
      {
        Console.WriteLine("Enter symbol or [ENTER] to exit:");
        string symbol = Console.ReadLine();
        if (string.IsNullOrEmpty(symbol))
        {
          break;
        }

        Task downloadTask = DownloadDataForStockAsync(outputDirectory, symbol);
        if (TaskIsActive(downloadTask))
        {
          // This is an asynchronous world - lock the list before updating it!
          lock (activeTaskList)
          {
            activeTaskList.Add(downloadTask);
          }

        }
        else
        {
          Console.WriteLine("task completed already?!??!?");
        }
        CleanupTasks(activeTaskList);
      }

      Console.WriteLine("Cleaning up");
      while (CleanupTasks(activeTaskList))
      {
        Task.Delay(1).Wait();
      }
    }

    private static bool CleanupTasks(List<Task> activeTaskList)
    {
      // reverse loop to allow list item deletions
      // This is an asynchronous world - lock the list before updating it!
      lock (activeTaskList)
      {
        for (int i = activeTaskList.Count - 1; i >= 0; i--)
        {
          if (!TaskIsActive(activeTaskList[i]))
          {
            activeTaskList.RemoveAt(i);
          }
        }
        return activeTaskList.Count > 0;
      }
    }

    private static bool TaskIsActive(Task task)
    {
      return task != null
          && task.Status != TaskStatus.Canceled
          && task.Status != TaskStatus.Faulted
          && task.Status != TaskStatus.RanToCompletion;
    }

    static async Task DownloadDataForStockAsync(string outputDirectory, string symbol)
    {
      try
      {
        using (var client = new HttpClient())
        {
          client.BaseAddress = new Uri(baseUrl);
          client.Timeout = TimeSpan.FromMinutes(5);
          string requestUrl = string.Format(requestUrlFormat, symbol);

          var request = new HttpRequestMessage(HttpMethod.Post, requestUrl);
          var response = await client.SendAsync(request, 
            HttpCompletionOption.ResponseHeadersRead);
          response.EnsureSuccessStatusCode();

          using (var httpStream = await response.Content.ReadAsStreamAsync())
          {
            var timestampedName = FormatTimestampedString(symbol, true);
            var filePath = Path.Combine(outputDirectory, timestampedName + ".csv");
            using (var fileStream = File.Create(filePath))
            {
              await httpStream.CopyToAsync(fileStream);
            }
          }
          // *****WANT TO DO MORE ERROR CHECKING HERE*****
        }
      }
      catch (HttpRequestException ex)
      {
        Console.WriteLine("Exception on thread: {0}: {1}\r\n",
          System.Threading.Thread.CurrentThread.ManagedThreadId,
          ex.Message,
          ex.StackTrace);
      }
      catch (Exception ex)
      {
        Console.WriteLine("Exception on thread: {0}: {1}\r\n",
          System.Threading.Thread.CurrentThread.ManagedThreadId,
          ex.Message,
          ex.StackTrace);
      }
    }

    static volatile string lastTimestampedString = string.Empty;
    static volatile string dummy = string.Empty;

    static string FormatTimestampedString(string message, bool uniquify = false)
    {
      // This is an asynchronous world - lock the shared resource before using it!
      lock (dummy)
      //lock (lastTimestampedString)
      {
        Console.WriteLine("IN  - Thread: {0:D2} lastTimestampedString: {1}",
            System.Threading.Thread.CurrentThread.ManagedThreadId,
            lastTimestampedString);

        string newTimestampedString;

        while (true)
        {
          DateTime lastDateTime = DateTime.Now;

          newTimestampedString = string.Format(
              "{1:D4}_{2:D2}_{3:D2}_{4:D2}_{5:D2}_{6:D2}_{7:D3}_{0}",
                message,
                lastDateTime.Year, lastDateTime.Month, lastDateTime.Day,
                lastDateTime.Hour, lastDateTime.Minute, lastDateTime.Second,
                lastDateTime.Millisecond
                );
          if (!uniquify)
          {
            break;
          }
          if (newTimestampedString != lastTimestampedString)
          {
            break;
          }

          //Task.Delay(1).Wait();
        };

        lastTimestampedString = newTimestampedString;
        Console.WriteLine("OUT - Thread: {0:D2} lastTimestampedString: {1}",
            System.Threading.Thread.CurrentThread.ManagedThreadId,
            lastTimestampedString);

        return lastTimestampedString;
      }
    }
  }
}
like image 970
David Rogers Avatar asked Jan 21 '15 19:01

David Rogers


2 Answers

I have copied and slightly cleaned up the relevant code.

var request = new HttpRequestMessage(HttpMethod.Post, requestUrl);
var response = await client.SendAsync(request,
    HttpCompletionOption.ResponseHeadersRead);
response.EnsureSuccessStatusCode();
using (var httpStream = await response.Content.ReadAsStreamAsync())
{
    var timestampedName = FormatTimestampedString(symbol, true);
    var filePath = Path.Combine(outputDirectory, timestampedName + ".csv");
    using (var fileStream = File.Create(filePath))
    {
        await httpStream.CopyToAsync(fileStream);
    }
}

The question is, what if something goes wrong during reading the stream and copying it into your file?

All logical errors have already been addressed as part of the HTTP request and response cycle: the server has received your request, it has decided it is valid, it has responded with success (header portion of response), and it is now sending you the result (body portion of response).

The only errors that could occur now are things like the server crashing, the connection being lost, etc. My understanding is that these will manifest as HttpRequestException, meaning you can write code like this:

try
{
    using (var httpStream = await response.Content.ReadAsStreamAsync())
    {
        var timestampedName = FormatTimestampedString(symbol, true);
        var filePath = Path.Combine(outputDirectory, timestampedName + ".csv");
        using (var fileStream = File.Create(filePath))
        {
            await httpStream.CopyToAsync(fileStream);
        }
    }
}
catch (HttpRequestException e)
{
    ...
}

The documenation doesn't say much, unfortunately. The reference source doesn't either. So your best bet is to start with this and maybe log all exceptions that are not HttpRequestException in case there is another exception type that could be thrown during the download of the response body.

like image 128
Timothy Shields Avatar answered Nov 09 '22 00:11

Timothy Shields


If you want to narrow it down to the part which is between the header read and the content read, you actually leave yourself with the asynchronous buffer read:

var httpStream = await response.Content.ReadAsStreamAsync();

If you look whats going on inside the method, you'll see:

public Task<Stream> ReadAsStreamAsync()
{
    this.CheckDisposed();
    TaskCompletionSource<Stream> tcs = new TaskCompletionSource<Stream>();
    if (this.contentReadStream == null && this.IsBuffered)
    {
        this.contentReadStream = new MemoryStream(this.bufferedContent.GetBuffer(),
                                                  0, (int)this.bufferedContent.Length,
                                                  false, false);
    }
    if (this.contentReadStream != null)
    {
        tcs.TrySetResult(this.contentReadStream);
        return tcs.Task;
    }
    this.CreateContentReadStreamAsync().ContinueWithStandard(delegate(Task<Stream> task)
    {
        if (!HttpUtilities.HandleFaultsAndCancelation<Stream>(task, tcs))
        {
            this.contentReadStream = task.Result;
            tcs.TrySetResult(this.contentReadStream);
        }
    });
    return tcs.Task;
}

CreateContentReadStreamAsync is the one doing all the reading, internally, it will call LoadIntoBufferAsync, which you can find here.

Basically, you can see the it encapsulates IOException and ObjectDisposedException, or an ArgumentOutOfRangeException is the buffer is larger than 2GB (although i think that will be highly rare).

like image 30
Yuval Itzchakov Avatar answered Nov 09 '22 00:11

Yuval Itzchakov