Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Faster alternative to DateTime.ParseExact

I parse a string into a DateTime millions of times:

public static CultureInfo ci = CultureInfo.InvariantCulture;
while (!reader.EndOfStream)
{      
      line = reader.ReadLine();
      string[] fields = line.Split(' ');
      DateTime newDT = DateTime.ParseExact(fields[0], "yyyyMMddTHHmmssfff", ci);
}

My profiler highlights ParseExact as being a huge part of time taken. Is there any other method/approach that could parse the string into a DateTime that would be faster?

FOLLOW UP1:

1) I tried this - but speed was same

bool OK = DateTime.TryParseExact(fields[0], "yyyyMMddTHHmmssfff", null, System.Globalization.DateTimeStyles.None,out DT);

2)

I tried to code my own parser - but this too was as slow:

public static DateTime fastParse(ref string s)
{
           return new DateTime(int.Parse(s.Substring(0,4)), int.Parse(s.Substring(4,2)),int.Parse(s.Substring(6,2)), int.Parse(s.Substring(9,2)),int.Parse(s.Substring(11,2)),int.Parse(s.Substring(13,2)),int.Parse(s.Substring(15, 3)));
}

FOLLOW UP2

I tried Master117 suggestion of storing values - AGAIN it is NO faster - perhaps the issue is the construction?

     public class fastParseData
        {
            int year;
            int mon;
            int day;
            int hour;
            int min; 
            string previousSlice = "";

            public DateTime fastParse(ref string s)
            {
                if (previousSlice != s.Substring(0, 12))
                {
                     year=int.Parse(s.Substring(0,4));
                     mon=int.Parse(s.Substring(4,2));
                     day=int.Parse(s.Substring(6,2));
                     hour= int.Parse(s.Substring(9,2));
                     min = int.Parse(s.Substring(11,2));
                     previousSlice = s.Substring(0, 12);
                }

                return new DateTime(year, mon, day, hour,min, int.Parse(s.Substring(13, 2)), int.Parse(s.Substring(15, 3)));
            }

        }

FOLOW UP3

                public class fastParseData
                {
                    int year;
                    int mon;
                    int day;
                    int hour;
                    int min; 
                    string previousSlice = "";
                    DateTime previousDT;

                    public DateTime fastParse(ref string s)
                    {
                        if (previousSlice != s.Substring(0, 12))
                        {
                             year=int.Parse(s.Substring(0,4));
                             mon=int.Parse(s.Substring(4,2));
                             day=int.Parse(s.Substring(6,2));
                             hour= int.Parse(s.Substring(9,2));
                             min = int.Parse(s.Substring(11,2));
                             previousSlice = s.Substring(0, 12);
                            previousDT = new DateTime(year, mon, day, hour,min,0,0);
                        }
                        return previousDT.AddMilliseconds((int.Parse(s.Substring(13, 2))*1000)+int.Parse(s.Substring(15, 3)));
                    }

                }

FOLLOW UP4

From my profiler the crux seems to be

int.Parse(s.Substring(13, 2))

With the Parse bit being more costly than the substring.

I tried

int.TryParse(s.Substring(13, 2),NumberStyles.None,ci, out secs)
Convert.ToInt32(s.Substring(13, 2));

but again - no difference in speed.

Is there a faster way to parse an int?

like image 535
ManInMoon Avatar asked Mar 29 '13 11:03

ManInMoon


3 Answers

This is the code used to benchmark James's code against the framework code (in reference to my comment there). I run this in Release mode, .Net 4.5.2, 32bit console

static void Main(string[] args)
{
    const string date = "2015-04-11T12:45:59";
    const string format = "yyyy-MM-ddTHH:mm:ss";

    var reference = FrameworkParse(date, format);
    var method1 = JamesBarrettParse(date, format);

    if (reference != method1)
    {
        throw new Exception(string.Format("reference date {0} does not match SO date {1}",reference.ToString("s"),method1.ToString("s")));
    }

    const int iterations = 1000000;
    var sw = new Stopwatch();

    //FRAMEWORK PARSE
    Console.Write("Starting framework parse for {0} iterations...", iterations);
    sw.Start();
    DateTime dt;
    for (var i = 0; i < iterations; i++)
    {
        dt = FrameworkParse(date, format);
        if (dt.Minute != 45)
        {
            Console.WriteLine("duh");
        }
    }
    sw.Stop();
    Console.WriteLine("DONE in {0} millis",sw.ElapsedMilliseconds.ToString("F2",CultureInfo.InvariantCulture));

    //James Barrett parse
    Console.Write("Starting JB parse for {0} iterations...", iterations);
    sw.Restart();
    for (var i = 0; i < iterations; i++)
    {
        dt = JamesBarrettParse(date, format);
        if (dt.Minute != 45)
        {
            Console.WriteLine("duh");
        }
    }
    sw.Stop();
    Console.WriteLine("DONE in {0} millis",sw.ElapsedMilliseconds.ToString("F2",CultureInfo.InvariantCulture));

    Console.Write("press any key to exit");
    Console.ReadKey();
}

private static DateTime FrameworkParse(string s, string format, CultureInfo info = null)
{
    var time = DateTime.ParseExact(s, format,
        info ?? CultureInfo.InvariantCulture,
        DateTimeStyles.AdjustToUniversal | DateTimeStyles.AssumeUniversal);
    return time;
}

Output is

Starting framework parse for 1000000 iterations...DONE in 2058.00 millis
Starting JB parse for 1000000 iterations...DONE in 324.00 millis
press any key to exit

like image 90
bottlenecked Avatar answered Nov 08 '22 05:11

bottlenecked


The idea of splitting the string is on the right track, but substring is slow. Whenever I split a string, I use the character accessor. yyyyMMddTHHmmssfff Disclaimer: T

public class DateParser1
{
    private static System.String DateFormat="yyMMddTHHmmssfff";

    public static System.DateTime GetDate(System.String SourceString, int Offset=0) // Offset eliminates need for substring
    {
        int Year=0;
        int Month=0;
        int Day=0;
        int Hour=0;
        int Minute=0;
        int Second=0;
        int HourOffset=0;
        int MS=0;
        if(SourceString.Length+Offset<DateFormat.Length) throw new System.Exception(System.String.Format("Date Too Short {0} For {0}",SourceString.Substring(Offset),DateFormat));
        for(int i=0;i<DateFormat.Length;i++)
        {
            System.Char c=SourceString[Offset+i];
            switch(DateFormat[i])
            {
                  case 'y':
                      Year=Year*10+(c-'0');
                      break;
                  case 'M':
                      Month=Month*10+(c-'0');
                      break;
                  case 'd':
                      Day=Day*10+(c-'0');
                      break;
                  case 'T':
                      if(c=='p'||c=='P')
                           HourOffset=12;
                      break;
                  case 'h':
                      Hour=Hour*10+(c-'0');
                      if(Hour==12) Hour=0;
                      break;
                  case 'H':

                      Hour=Hour*10+(c-'0');
                      HourOffset=0;
                      break;
                  case 'm':
                      Minute=Minute*10+(c-'0');
                      break;
                  case 's':
                      Second=Second*10+(c-'0');
                      break;
                  case 'f':
                      MS=MS*10+(c-'0');
                      break;
            }

        }
        if(Year>30) //Change For Your Business Rules
        {
               Year+=1900;
        }
        else
        {
               Year+=2000;
        }
        try
        {
            return new System.DateTime(Year,Month,Day,Hour+HourOffset,Minute,Second,MS);
        }
        catch(System.Exception)
        {
            throw new System.Exception(System.String.Format("Error In Date: {0}/{0}/{0} {0}:{0}:{0}.{0} - {0} {0}",Year,Month,Day,Hour+HourOffset,Minute,Second,MS,DateFormat,SourceString.SubString(Offset,DateFormat.Length)));
        }
    }
}
like image 41
James Barrett Avatar answered Nov 08 '22 04:11

James Barrett


You can write your own parsing algorithm, frist you split your string into array/list/whatever and then use the Datetime Constructor to create your Datetime,

DateTime newDT = DateTime(Int32, Int32, Int32, Int32, Int32, Int32, Int32);

since the Year/Month/Day won't change that fast you can BUffer them and have therefore a lower number of String operations.

http://msdn.microsoft.com/de-de/library/vstudio/system.datetime.aspx

an easy way would be too store the first 8 letters, like string a = fields[0].slice(0,8) (don't know the correct operation at the moment), now you parse them and make the ints, but in the next run you slice them again and test if a = new a, if so use the ints from last time instead of parsing them again, naturally for that you need to store a and the integers

So since now the problem seems to be the Construction time you should try to either add the elapsed time, by checking if your ints are higher/lower than before with addSecond etc, or you could take your construct and set the values to your new time.

try this:

            public class fastParseData
            {
                int year;
                int mon;
                int day;
                int hour;
                int min; 
                string previousSlice = "";
                DateTime previousDT;

                public DateTime fastParse(ref string s)
                {
                    if (previousSlice != s.Substring(0, 12))
                    {
                         year=int.Parse(s.Substring(0,4));
                         mon=int.Parse(s.Substring(4,2));
                         day=int.Parse(s.Substring(6,2));
                         hour= int.Parse(s.Substring(9,2));
                         min = int.Parse(s.Substring(11,2));
                         previousSlice = s.Substring(0, 12);
                         previousDT = new DateTime(year, mon, day, hour,min,0,0);
                    }
                    return previousDT.ParseExact(year, mon, day, hour,min, int.Parse(s.Substring(13, 2)), int.Parse(s.Substring(15, 3));
                }

            }

That way you only ones create a DT and then just set the time new

like image 38
Master117 Avatar answered Nov 08 '22 05:11

Master117