I parse a string into a DateTime millions of times:
public static CultureInfo ci = CultureInfo.InvariantCulture;
while (!reader.EndOfStream)
{
line = reader.ReadLine();
string[] fields = line.Split(' ');
DateTime newDT = DateTime.ParseExact(fields[0], "yyyyMMddTHHmmssfff", ci);
}
My profiler highlights ParseExact as being a huge part of time taken. Is there any other method/approach that could parse the string into a DateTime that would be faster?
FOLLOW UP1:
1) I tried this - but speed was same
bool OK = DateTime.TryParseExact(fields[0], "yyyyMMddTHHmmssfff", null, System.Globalization.DateTimeStyles.None,out DT);
2)
I tried to code my own parser - but this too was as slow:
public static DateTime fastParse(ref string s)
{
return new DateTime(int.Parse(s.Substring(0,4)), int.Parse(s.Substring(4,2)),int.Parse(s.Substring(6,2)), int.Parse(s.Substring(9,2)),int.Parse(s.Substring(11,2)),int.Parse(s.Substring(13,2)),int.Parse(s.Substring(15, 3)));
}
FOLLOW UP2
I tried Master117 suggestion of storing values - AGAIN it is NO faster - perhaps the issue is the construction?
public class fastParseData
{
int year;
int mon;
int day;
int hour;
int min;
string previousSlice = "";
public DateTime fastParse(ref string s)
{
if (previousSlice != s.Substring(0, 12))
{
year=int.Parse(s.Substring(0,4));
mon=int.Parse(s.Substring(4,2));
day=int.Parse(s.Substring(6,2));
hour= int.Parse(s.Substring(9,2));
min = int.Parse(s.Substring(11,2));
previousSlice = s.Substring(0, 12);
}
return new DateTime(year, mon, day, hour,min, int.Parse(s.Substring(13, 2)), int.Parse(s.Substring(15, 3)));
}
}
FOLOW UP3
public class fastParseData
{
int year;
int mon;
int day;
int hour;
int min;
string previousSlice = "";
DateTime previousDT;
public DateTime fastParse(ref string s)
{
if (previousSlice != s.Substring(0, 12))
{
year=int.Parse(s.Substring(0,4));
mon=int.Parse(s.Substring(4,2));
day=int.Parse(s.Substring(6,2));
hour= int.Parse(s.Substring(9,2));
min = int.Parse(s.Substring(11,2));
previousSlice = s.Substring(0, 12);
previousDT = new DateTime(year, mon, day, hour,min,0,0);
}
return previousDT.AddMilliseconds((int.Parse(s.Substring(13, 2))*1000)+int.Parse(s.Substring(15, 3)));
}
}
FOLLOW UP4
From my profiler the crux seems to be
int.Parse(s.Substring(13, 2))
With the Parse bit being more costly than the substring.
I tried
int.TryParse(s.Substring(13, 2),NumberStyles.None,ci, out secs)
Convert.ToInt32(s.Substring(13, 2));
but again - no difference in speed.
Is there a faster way to parse an int?
This is the code used to benchmark James's code against the framework code (in reference to my comment there). I run this in Release mode, .Net 4.5.2, 32bit console
static void Main(string[] args)
{
const string date = "2015-04-11T12:45:59";
const string format = "yyyy-MM-ddTHH:mm:ss";
var reference = FrameworkParse(date, format);
var method1 = JamesBarrettParse(date, format);
if (reference != method1)
{
throw new Exception(string.Format("reference date {0} does not match SO date {1}",reference.ToString("s"),method1.ToString("s")));
}
const int iterations = 1000000;
var sw = new Stopwatch();
//FRAMEWORK PARSE
Console.Write("Starting framework parse for {0} iterations...", iterations);
sw.Start();
DateTime dt;
for (var i = 0; i < iterations; i++)
{
dt = FrameworkParse(date, format);
if (dt.Minute != 45)
{
Console.WriteLine("duh");
}
}
sw.Stop();
Console.WriteLine("DONE in {0} millis",sw.ElapsedMilliseconds.ToString("F2",CultureInfo.InvariantCulture));
//James Barrett parse
Console.Write("Starting JB parse for {0} iterations...", iterations);
sw.Restart();
for (var i = 0; i < iterations; i++)
{
dt = JamesBarrettParse(date, format);
if (dt.Minute != 45)
{
Console.WriteLine("duh");
}
}
sw.Stop();
Console.WriteLine("DONE in {0} millis",sw.ElapsedMilliseconds.ToString("F2",CultureInfo.InvariantCulture));
Console.Write("press any key to exit");
Console.ReadKey();
}
private static DateTime FrameworkParse(string s, string format, CultureInfo info = null)
{
var time = DateTime.ParseExact(s, format,
info ?? CultureInfo.InvariantCulture,
DateTimeStyles.AdjustToUniversal | DateTimeStyles.AssumeUniversal);
return time;
}
Output is
Starting framework parse for 1000000 iterations...DONE in 2058.00 millis
Starting JB parse for 1000000 iterations...DONE in 324.00 millis
press any key to exit
The idea of splitting the string is on the right track, but substring is slow. Whenever I split a string, I use the character accessor. yyyyMMddTHHmmssfff Disclaimer: T
public class DateParser1
{
private static System.String DateFormat="yyMMddTHHmmssfff";
public static System.DateTime GetDate(System.String SourceString, int Offset=0) // Offset eliminates need for substring
{
int Year=0;
int Month=0;
int Day=0;
int Hour=0;
int Minute=0;
int Second=0;
int HourOffset=0;
int MS=0;
if(SourceString.Length+Offset<DateFormat.Length) throw new System.Exception(System.String.Format("Date Too Short {0} For {0}",SourceString.Substring(Offset),DateFormat));
for(int i=0;i<DateFormat.Length;i++)
{
System.Char c=SourceString[Offset+i];
switch(DateFormat[i])
{
case 'y':
Year=Year*10+(c-'0');
break;
case 'M':
Month=Month*10+(c-'0');
break;
case 'd':
Day=Day*10+(c-'0');
break;
case 'T':
if(c=='p'||c=='P')
HourOffset=12;
break;
case 'h':
Hour=Hour*10+(c-'0');
if(Hour==12) Hour=0;
break;
case 'H':
Hour=Hour*10+(c-'0');
HourOffset=0;
break;
case 'm':
Minute=Minute*10+(c-'0');
break;
case 's':
Second=Second*10+(c-'0');
break;
case 'f':
MS=MS*10+(c-'0');
break;
}
}
if(Year>30) //Change For Your Business Rules
{
Year+=1900;
}
else
{
Year+=2000;
}
try
{
return new System.DateTime(Year,Month,Day,Hour+HourOffset,Minute,Second,MS);
}
catch(System.Exception)
{
throw new System.Exception(System.String.Format("Error In Date: {0}/{0}/{0} {0}:{0}:{0}.{0} - {0} {0}",Year,Month,Day,Hour+HourOffset,Minute,Second,MS,DateFormat,SourceString.SubString(Offset,DateFormat.Length)));
}
}
}
You can write your own parsing algorithm, frist you split your string into array/list/whatever and then use the Datetime Constructor to create your Datetime,
DateTime newDT = DateTime(Int32, Int32, Int32, Int32, Int32, Int32, Int32);
since the Year/Month/Day won't change that fast you can BUffer them and have therefore a lower number of String operations.
http://msdn.microsoft.com/de-de/library/vstudio/system.datetime.aspx
an easy way would be too store the first 8 letters, like string a = fields[0].slice(0,8) (don't know the correct operation at the moment), now you parse them and make the ints, but in the next run you slice them again and test if a = new a, if so use the ints from last time instead of parsing them again, naturally for that you need to store a and the integers
So since now the problem seems to be the Construction time you should try to either add the elapsed time, by checking if your ints are higher/lower than before with addSecond etc, or you could take your construct and set the values to your new time.
try this:
public class fastParseData
{
int year;
int mon;
int day;
int hour;
int min;
string previousSlice = "";
DateTime previousDT;
public DateTime fastParse(ref string s)
{
if (previousSlice != s.Substring(0, 12))
{
year=int.Parse(s.Substring(0,4));
mon=int.Parse(s.Substring(4,2));
day=int.Parse(s.Substring(6,2));
hour= int.Parse(s.Substring(9,2));
min = int.Parse(s.Substring(11,2));
previousSlice = s.Substring(0, 12);
previousDT = new DateTime(year, mon, day, hour,min,0,0);
}
return previousDT.ParseExact(year, mon, day, hour,min, int.Parse(s.Substring(13, 2)), int.Parse(s.Substring(15, 3));
}
}
That way you only ones create a DT and then just set the time new
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With