diff --git a/Usenet/Nntp/Parsers/HeaderDateParser.cs b/Usenet/Nntp/Parsers/HeaderDateParser.cs index 22c4df3..90da3e1 100644 --- a/Usenet/Nntp/Parsers/HeaderDateParser.cs +++ b/Usenet/Nntp/Parsers/HeaderDateParser.cs @@ -6,6 +6,18 @@ namespace Usenet.Nntp.Parsers { internal static class HeaderDateParser { + private const string _dateTimeRegexString = + @"(?:\s*" + + @"(?Sun|Mon|Tue|Wed|Thu|Fri|Sat),)?\s*" + + @"(?\d{1,2})\s+" + + @"(?Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+" + + @"(?\d{2,4})\s+" + + @"(?\d{1,2}):(?\d{1,2})(?::(?\d{1,2}))?\s*" + + @"(?[+-]\d+|(?:UT|UTC|GMT|Z|EDT|EST|CDT|CST|MDT|MST|PDT|PST|A|N|M|Y|[A-Z]+)" + + @")?"; + + private static readonly Regex _dateTimeRegex = new Regex(_dateTimeRegexString, RegexOptions.IgnoreCase | RegexOptions.Compiled); + /// /// Parses header date/time strings as described in the /// Date and Time Specification. @@ -18,59 +30,31 @@ internal static class HeaderDateParser { return null; } - string[] valueParts = value.Split(new []{','}, StringSplitOptions.RemoveEmptyEntries); - if (valueParts.Length > 2) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - - // skip day-of-week for now - //string dayOfWeek = valueParts.Length == 2 ? valueParts[0] : null; - string dateTime = valueParts.Length == 2 ? valueParts[1] : valueParts[0]; - - // remove obsolete whitespace from time part - dateTime = Regex.Replace(dateTime, @"\s+:\s+", ":"); - - string[] dateTimeParts = dateTime.Split(new[] {' ', '\n', '\r', '\t'}, StringSplitOptions.RemoveEmptyEntries); - if (dateTimeParts.Length != 5 && (dateTimeParts.Length != 6 || dateTimeParts[5] != "(UTC)")) + var matches = _dateTimeRegex.Match(value); + if (!matches.Success) { throw new FormatException(Resources.Nntp.BadHeaderDateFormat); } - ParseDate(dateTimeParts, out int year, out int month, out int day); - ParseTime(dateTimeParts[3], out int hour, out int minute, out int second); - TimeSpan zone = ParseZone(dateTimeParts[4]); + var day = int.Parse(matches.Groups["day"].Value); + var month = matches.Groups["month"].Value; + var year = int.Parse(matches.Groups["year"].Value); + var hour = int.Parse(matches.Groups["hour"].Value); + var minute = int.Parse(matches.Groups["min"].Value); + int.TryParse(matches.Groups["sec"].Value, out var second); + var tz = matches.Groups["tz"].Value; + var zone = ParseZone(tz); - return new DateTimeOffset(year, month, day, hour, minute, second, 0, zone); - } + int monthIndex = 1 + Array.FindIndex(DateTimeFormatInfo.InvariantInfo.AbbreviatedMonthNames, + m => string.Equals(m, month, StringComparison.OrdinalIgnoreCase)); - private static void ParseDate(string[] dateTimeParts, out int year, out int month, out int day) - { - if (dateTimeParts.Length < 3) + if (matches.Groups["year"].Value.Length < 4) { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - if (!int.TryParse(dateTimeParts[0], out day)) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - string monthString = dateTimeParts[1]; - int monthIndex = Array.FindIndex(DateTimeFormatInfo.InvariantInfo.AbbreviatedMonthNames, - m => string.Equals(m, monthString, StringComparison.OrdinalIgnoreCase)); - if (monthIndex < 0) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - month = monthIndex + 1; - if (!int.TryParse(dateTimeParts[2], out year)) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - if (dateTimeParts[2].Length <= 2) - { - year += 100 * GetCentury(year, month, day); + year += GetCentury(year, monthIndex, day) * 100; } + + return new DateTimeOffset(year, monthIndex, day, hour, minute, second, 0, zone); } private static int GetCentury(int year, int month, int day) @@ -82,28 +66,6 @@ private static int GetCentury(int year, int month, int day) : currentCentury; } - private static void ParseTime(string value, out int hour, out int minute, out int second) - { - string[] timeParts = value.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries); - if (timeParts.Length < 2 || timeParts.Length > 3) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - if (!int.TryParse(timeParts[0], out hour)) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - if (!int.TryParse(timeParts[1], out minute)) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - second = 0; - if (timeParts.Length > 2 && !int.TryParse(timeParts[2], out second)) - { - throw new FormatException(Resources.Nntp.BadHeaderDateFormat); - } - } - private static TimeSpan ParseZone(string value) { // The time zone must be as specified in RFC822, https://tools.ietf.org/html/rfc822#section-5 @@ -112,11 +74,12 @@ private static TimeSpan ParseZone(string value) { switch (value) { - // UTC is not specified in RFC822, but allowing it since it is commonly used + // UTC and empty are not specified in RFC822, but allowing them since they are commonly used case "UTC": case "UT": case "GMT": case "Z": + case "": break; case "EDT": diff --git a/UsenetTests/Nntp/Parsers/HeaderDateParserTests.cs b/UsenetTests/Nntp/Parsers/HeaderDateParserTests.cs index 11d2782..26e9823 100644 --- a/UsenetTests/Nntp/Parsers/HeaderDateParserTests.cs +++ b/UsenetTests/Nntp/Parsers/HeaderDateParserTests.cs @@ -10,6 +10,10 @@ public class HeaderDateParserTests { public static readonly IEnumerable ParseData = new[] { + new object[] {"Mon, 1 May 2017 1:55", new DateTimeOffset(2017, 5, 1, 1, 55, 0, TimeSpan.Zero)}, + new object[] {"1 May 2017 1:55:33", new DateTimeOffset(2017, 5, 1, 1, 55, 33, TimeSpan.Zero)}, + new object[] {"01 May 2017 13:55", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.Zero)}, + new object[] {"01 May 2017 13:55:33", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)}, new object[] {"01 May 2017 13:55:33 +0000", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)}, new object[] {"01 May 2017 13:55:33 -0000", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)}, new object[] {"01 May 2017 13:55:33 +0000 (UTC)", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)}, @@ -20,6 +24,9 @@ public class HeaderDateParserTests new object[] {"01 May 2017 13:55 +1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.FromMinutes(10 * 60 + 30))}, new object[] {"01 May 2017 13:55 -1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, -TimeSpan.FromMinutes(10 * 60 + 30))}, + new object[] {"01 May 2017 13:55+1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.FromMinutes(10 * 60 + 30))}, + new object[] {"01 May 2017 13:55-1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, -TimeSpan.FromMinutes(10 * 60 + 30))}, + new object[] {"1 Jan 2017 00:00:00 +0000", new DateTimeOffset(2017, 1, 1, 0, 0, 0, TimeSpan.Zero)}, new object[] {"1 Feb 2017 00:00:00 +0000", new DateTimeOffset(2017, 2, 1, 0, 0, 0, TimeSpan.Zero)}, new object[] {"1 Mar 2017 00:00:00 +0000", new DateTimeOffset(2017, 3, 1, 0, 0, 0, TimeSpan.Zero)}, @@ -75,7 +82,7 @@ public void HeaderDateShouldBeParsedCorrectly(string headerDate, DateTimeOffset [Theory] [MemberData(nameof(TimezoneParseFailureData))] - public void HeaderDateShouldBeNotBeParsedCorrectly(string headerDate, Type exceptionType) + public void HeaderDateShouldNotBeParsedCorrectly(string headerDate, Type exceptionType) { Assert.Throws(exceptionType, () => HeaderDateParser.Parse(headerDate)); }