Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed header date time formatting by using a regular expression #18

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 30 additions & 67 deletions Usenet/Nntp/Parsers/HeaderDateParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ namespace Usenet.Nntp.Parsers
{
internal static class HeaderDateParser
{
private const string _dateTimeRegexString =
@"(?:\s*"
+ @"(?<dayName>Sun|Mon|Tue|Wed|Thu|Fri|Sat),)?\s*"
+ @"(?<day>\d{1,2})\s+"
+ @"(?<month>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+"
+ @"(?<year>\d{2,4})\s+"
+ @"(?<hour>\d{1,2}):(?<min>\d{1,2})(?::(?<sec>\d{1,2}))?\s*"
+ @"(?<tz>[+-]\d+|(?:UT|UTC|GMT|Z|EDT|EST|CDT|CST|MDT|MST|PDT|PST|A|N|M|Y|[A-Z]+)"
+ @")?";

private static readonly Regex _dateTimeRegex = new Regex(_dateTimeRegexString, RegexOptions.IgnoreCase | RegexOptions.Compiled);

/// <summary>
/// Parses header date/time strings as described in the
/// <a href="https://tools.ietf.org/html/rfc5322#section-3.3">Date and Time Specification</a>.
Expand All @@ -18,59 +30,31 @@ internal static class HeaderDateParser
{
return null;
}
string[] valueParts = value.Split(new []{','}, StringSplitOptions.RemoveEmptyEntries);
if (valueParts.Length > 2)
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}

// skip day-of-week for now
//string dayOfWeek = valueParts.Length == 2 ? valueParts[0] : null;

string dateTime = valueParts.Length == 2 ? valueParts[1] : valueParts[0];

// remove obsolete whitespace from time part
dateTime = Regex.Replace(dateTime, @"\s+:\s+", ":");

string[] dateTimeParts = dateTime.Split(new[] {' ', '\n', '\r', '\t'}, StringSplitOptions.RemoveEmptyEntries);
if (dateTimeParts.Length != 5 && (dateTimeParts.Length != 6 || dateTimeParts[5] != "(UTC)"))
var matches = _dateTimeRegex.Match(value);
if (!matches.Success)
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}

ParseDate(dateTimeParts, out int year, out int month, out int day);
ParseTime(dateTimeParts[3], out int hour, out int minute, out int second);
TimeSpan zone = ParseZone(dateTimeParts[4]);
var day = int.Parse(matches.Groups["day"].Value);
var month = matches.Groups["month"].Value;
var year = int.Parse(matches.Groups["year"].Value);
var hour = int.Parse(matches.Groups["hour"].Value);
var minute = int.Parse(matches.Groups["min"].Value);
int.TryParse(matches.Groups["sec"].Value, out var second);
var tz = matches.Groups["tz"].Value;
var zone = ParseZone(tz);

return new DateTimeOffset(year, month, day, hour, minute, second, 0, zone);
}
int monthIndex = 1 + Array.FindIndex(DateTimeFormatInfo.InvariantInfo.AbbreviatedMonthNames,
m => string.Equals(m, month, StringComparison.OrdinalIgnoreCase));

private static void ParseDate(string[] dateTimeParts, out int year, out int month, out int day)
{
if (dateTimeParts.Length < 3)
if (matches.Groups["year"].Value.Length < 4)
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
if (!int.TryParse(dateTimeParts[0], out day))
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
string monthString = dateTimeParts[1];
int monthIndex = Array.FindIndex(DateTimeFormatInfo.InvariantInfo.AbbreviatedMonthNames,
m => string.Equals(m, monthString, StringComparison.OrdinalIgnoreCase));
if (monthIndex < 0)
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
month = monthIndex + 1;
if (!int.TryParse(dateTimeParts[2], out year))
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
if (dateTimeParts[2].Length <= 2)
{
year += 100 * GetCentury(year, month, day);
year += GetCentury(year, monthIndex, day) * 100;
}

return new DateTimeOffset(year, monthIndex, day, hour, minute, second, 0, zone);
}

private static int GetCentury(int year, int month, int day)
Expand All @@ -82,28 +66,6 @@ private static int GetCentury(int year, int month, int day)
: currentCentury;
}

private static void ParseTime(string value, out int hour, out int minute, out int second)
{
string[] timeParts = value.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries);
if (timeParts.Length < 2 || timeParts.Length > 3)
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
if (!int.TryParse(timeParts[0], out hour))
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
if (!int.TryParse(timeParts[1], out minute))
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
second = 0;
if (timeParts.Length > 2 && !int.TryParse(timeParts[2], out second))
{
throw new FormatException(Resources.Nntp.BadHeaderDateFormat);
}
}

private static TimeSpan ParseZone(string value)
{
// The time zone must be as specified in RFC822, https://tools.ietf.org/html/rfc822#section-5
Expand All @@ -112,11 +74,12 @@ private static TimeSpan ParseZone(string value)
{
switch (value)
{
// UTC is not specified in RFC822, but allowing it since it is commonly used
// UTC and empty are not specified in RFC822, but allowing them since they are commonly used
case "UTC":
case "UT":
case "GMT":
case "Z":
case "":
break;

case "EDT":
Expand Down
9 changes: 8 additions & 1 deletion UsenetTests/Nntp/Parsers/HeaderDateParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ public class HeaderDateParserTests
{
public static readonly IEnumerable<object[]> ParseData = new[]
{
new object[] {"Mon, 1 May 2017 1:55", new DateTimeOffset(2017, 5, 1, 1, 55, 0, TimeSpan.Zero)},
new object[] {"1 May 2017 1:55:33", new DateTimeOffset(2017, 5, 1, 1, 55, 33, TimeSpan.Zero)},
new object[] {"01 May 2017 13:55", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.Zero)},
new object[] {"01 May 2017 13:55:33", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)},
new object[] {"01 May 2017 13:55:33 +0000", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)},
new object[] {"01 May 2017 13:55:33 -0000", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)},
new object[] {"01 May 2017 13:55:33 +0000 (UTC)", new DateTimeOffset(2017, 5, 1, 13, 55, 33, TimeSpan.Zero)},
Expand All @@ -20,6 +24,9 @@ public class HeaderDateParserTests
new object[] {"01 May 2017 13:55 +1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.FromMinutes(10 * 60 + 30))},
new object[] {"01 May 2017 13:55 -1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, -TimeSpan.FromMinutes(10 * 60 + 30))},

new object[] {"01 May 2017 13:55+1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, TimeSpan.FromMinutes(10 * 60 + 30))},
new object[] {"01 May 2017 13:55-1030", new DateTimeOffset(2017, 5, 1, 13, 55, 0, -TimeSpan.FromMinutes(10 * 60 + 30))},

new object[] {"1 Jan 2017 00:00:00 +0000", new DateTimeOffset(2017, 1, 1, 0, 0, 0, TimeSpan.Zero)},
new object[] {"1 Feb 2017 00:00:00 +0000", new DateTimeOffset(2017, 2, 1, 0, 0, 0, TimeSpan.Zero)},
new object[] {"1 Mar 2017 00:00:00 +0000", new DateTimeOffset(2017, 3, 1, 0, 0, 0, TimeSpan.Zero)},
Expand Down Expand Up @@ -75,7 +82,7 @@ public void HeaderDateShouldBeParsedCorrectly(string headerDate, DateTimeOffset

[Theory]
[MemberData(nameof(TimezoneParseFailureData))]
public void HeaderDateShouldBeNotBeParsedCorrectly(string headerDate, Type exceptionType)
public void HeaderDateShouldNotBeParsedCorrectly(string headerDate, Type exceptionType)
{
Assert.Throws(exceptionType, () => HeaderDateParser.Parse(headerDate));
}
Expand Down