Skip to content

Commit

Permalink
[EN DateTimeV2] Add support for time range with duration (#3174)
Browse files Browse the repository at this point in the history
* Datetime for longer span - local draft commit

* DateTimeForLongerSpan - Implement for from

* DateTimeForLongerSpan - from datetime for duration local commit

* DateTimeForLongerSpan - Parsing time period with duration initial commit

* DateTimeForLongerSpan - Parsing time period with duration update test cases

* DateTimeForLongerSpan - Add support for time range with duration - Update test case to trigger rebuild

* DateTimeForLongerSpan - Add support for time range with duration - Update test case to trigger rebuild

---------

Co-authored-by: Michael Wang (Centific Technologies Inc) <[email protected]>
  • Loading branch information
MichaelMWW and Michael Wang (Centific Technologies Inc) authored Nov 12, 2024
1 parent dde290b commit 18fe661
Show file tree
Hide file tree
Showing 11 changed files with 697 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@ public static class DateTimeDefinitions
public static readonly string TasksModeSupressionRegexes = $@"({AmPmDescRegex}|{TasksModeSpecialDescRegex}|{TasksModeHolidayListSupression}|{DecadeRegex}|{DecadeWithCenturyRegex}|{QuarterRegex}|{QuarterRegexYearFront}|{AllHalfYearRegex}|{SeasonRegex})";
public const string TasksModeNextPrefix = @"(?<next>next\s+)";
public static readonly string TasksModeDurationToDatePatterns = $@"\b({TasksModeNextPrefix}((?<week>week)|(?<month>month)|(?<year>year)))\b";
public static readonly string TimePeriodFromForRegex = $@"(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))\s*for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?";
public static readonly string TimePeriodForFromRegex = $@"for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?\s+(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))";
public static readonly string TimePeriodWithDurationRegex = $@"({TimePeriodFromForRegex}|{TimePeriodForFromRegex})";
public static readonly Dictionary<string, string> UnitMap = new Dictionary<string, string>
{
{ @"decades", @"10Y" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ public class EnglishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfig
public static readonly Regex GeneralEndingRegex =
new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut);

public static readonly Regex TimePeriodWithDurationRegex =
new Regex(DateTimeDefinitions.TimePeriodWithDurationRegex, RegexFlags, RegexTimeOut);

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;

public EnglishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c
TimeOfDayRegex = EnglishTimePeriodExtractorConfiguration.TimeOfDayRegex;
GeneralEndingRegex = EnglishTimePeriodExtractorConfiguration.GeneralEndingRegex;
TillRegex = EnglishTimePeriodExtractorConfiguration.TillRegex;
TimePeriodWithDurationRegex = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex;
DurationParser = config.DurationParser;
DurationExtractor = config.DurationExtractor;

Numbers = config.Numbers;
UtilityConfiguration = config.UtilityConfiguration;
Expand All @@ -40,6 +43,10 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c

public IDateTimeParser TimeZoneParser { get; }

public IDateTimeParser DurationParser { get; }

public IDateTimeExtractor DurationExtractor { get; }

public Regex SpecificTimeFromToRegex { get; }

public Regex SpecificTimeBetweenAndRegex { get; }
Expand All @@ -54,6 +61,8 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c

public Regex TillRegex { get; }

public Regex TimePeriodWithDurationRegex { get; }

public IImmutableDictionary<string, int> Numbers { get; }

public IDateTimeUtilityConfiguration UtilityConfiguration { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Text.DateTime.English;
using Microsoft.Recognizers.Text.InternalCache;
using Microsoft.Recognizers.Text.Utilities;
using DateObject = System.DateTime;
Expand Down Expand Up @@ -55,6 +55,7 @@ private List<ExtractResult> ExtractImpl(string text, DateObject reference)
{
var tokens = new List<Token>();
tokens.AddRange(MatchSimpleCases(text));
tokens.AddRange(MatchTimePeriodWithDurationCases(text));
tokens.AddRange(MergeTwoTimePoints(text, reference));
tokens.AddRange(MatchTimeOfDay(text));

Expand Down Expand Up @@ -153,6 +154,22 @@ private List<Token> MatchSimpleCases(string text)
return ret;
}

// Cases like "from 6am for 3 hours" and "for 3 hours from 6 am" are extracted as timerange here.
private List<Token> MatchTimePeriodWithDurationCases(string text)
{
var ret = new List<Token>();
if (this.config as EnglishTimePeriodExtractorConfiguration != null)
{
Match match = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex.Match(text);
if (match.Success)
{
ret.Add(new Token(match.Index, match.Index + match.Length));
}
}

return ret;
}

private bool StartsWithTimeZone(string afterText)
{
var startsWithTimeZone = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
using System;
using System.Collections.Generic;
using System.Globalization;

using Microsoft.Recognizers.Text.DateTime.English;
using Microsoft.Recognizers.Text.Utilities;

using DateObject = System.DateTime;
Expand Down Expand Up @@ -120,6 +120,11 @@ private DateTimeResolutionResult InternalParse(string entityText, DateObject ref
innerResult = ParseTimeOfDay(entityText, referenceTime);
}

if (!innerResult.Success)
{
innerResult = ParseTimePeroidWithDuration(entityText, referenceTime);
}

return innerResult;
}

Expand Down Expand Up @@ -669,6 +674,138 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject
return ret;
}

// Cases like "from 6am for 3 hours" and "for 3 hours from 6am" are parsing here.
private DateTimeResolutionResult ParseTimePeroidWithDuration(string text, DateObject referenceTime)
{
var parserConfig = this.config as EnglishTimePeriodParserConfiguration;
var ret = new DateTimeResolutionResult();
if (parserConfig != null)
{
var match = parserConfig.TimePeriodWithDurationRegex.MatchExact(text, trim: true);

if (match.Success)
{
var erDuration = parserConfig.DurationExtractor.Extract(text);

if (erDuration is null || erDuration.Count == 0)
{
return ret;
}

var prDuration = parserConfig.DurationParser.Parse(erDuration[0]);
int year = referenceTime.Year, month = referenceTime.Month, day = referenceTime.Day;

// Cases like "half past seven" are not handled here
if (match.Groups[Constants.PrefixGroupName].Success)
{
return ret;
}

// Cases like "4" is different with "4:00" as the Timex is different "T04H" vs "T04H00M"
int beginHour;
int beginMinute = Constants.InvalidMinute;
int beginSecond = Constants.InvalidSecond;

// Get time1
var hourGroup = match.Groups[Constants.HourGroupName];

var hourStr = hourGroup.Captures[0].Value;

if (config.Numbers.ContainsKey(hourStr))
{
beginHour = config.Numbers[hourStr];
}
else
{
beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture);
}

var time1StartIndex = match.Groups["time1"].Index;
var time1EndIndex = time1StartIndex + match.Groups["time1"].Length;

// Get beginMinute (if exists)
for (int i = 0; i < match.Groups[Constants.MinuteGroupName].Captures.Count; i++)
{
var minuteCapture = match.Groups[Constants.MinuteGroupName].Captures[i];
if (minuteCapture.Index >= time1StartIndex && minuteCapture.Index + minuteCapture.Length <= time1EndIndex)
{
beginMinute = int.Parse(minuteCapture.Value, CultureInfo.InvariantCulture);
}
}

// Get beginSecond (if exists)
for (int i = 0; i < match.Groups[Constants.SecondGroupName].Captures.Count; i++)
{
var secondCapture = match.Groups[Constants.SecondGroupName].Captures[i];
if (secondCapture.Index >= time1StartIndex && secondCapture.Index + secondCapture.Length <= time1EndIndex)
{
beginSecond = int.Parse(secondCapture.Value, CultureInfo.InvariantCulture);
}
}

// Desc here means descriptions like "am / pm / o'clock"
// Get leftDesc (if exists)
var leftDesc = match.Groups["leftDesc"].Value;
for (int i = 0; i < match.Groups[Constants.DescGroupName].Captures.Count; i++)
{
var descCapture = match.Groups[Constants.DescGroupName].Captures[i];
if (descCapture.Index >= time1StartIndex && descCapture.Index + descCapture.Length <= time1EndIndex && string.IsNullOrEmpty(leftDesc))
{
leftDesc = descCapture.Value;
}
}

var beginDateTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, beginMinute >= 0 ? beginMinute : 0, beginSecond >= 0 ? beginSecond : 0);

var hasLeftAm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("a", StringComparison.Ordinal);
var hasLeftPm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("p", StringComparison.Ordinal);

// one of the time point has description like 'am' or 'pm'
if (hasLeftAm)
{
if (beginHour >= Constants.HalfDayHourCount)
{
beginDateTime = beginDateTime.AddHours(-Constants.HalfDayHourCount);
}
}
else if (hasLeftPm)
{
if (beginHour < Constants.HalfDayHourCount)
{
beginDateTime = beginDateTime.AddHours(Constants.HalfDayHourCount);
}
}

var endDateTime = beginDateTime.AddSeconds(Convert.ToInt32((prDuration.Value as DateTimeResolutionResult).FutureValue, CultureInfo.InvariantCulture));

var beginStr = DateTimeFormatUtil.ShortTime(beginDateTime.Hour, beginMinute, beginSecond);
var endStr = DateTimeFormatUtil.ShortTime(endDateTime.Hour, endDateTime.Minute, endDateTime.Second);

ret.Success = true;

ret.Timex = $"({beginStr},{endStr},{DateTimeFormatUtil.LuisTimeSpan(endDateTime - beginDateTime)})";

ret.FutureValue = ret.PastValue = new Tuple<DateObject, DateObject>(
beginDateTime,
endDateTime);

ret.SubDateTimeEntities = new List<object>();
var er = new ExtractResult()
{
Start = time1StartIndex,
Length = time1EndIndex - time1StartIndex,
Text = text.Substring(time1StartIndex, time1EndIndex - time1StartIndex),
Type = $"{Constants.SYS_DATETIME_TIME}",
};

var pr = this.config.TimeParser.Parse(er, referenceTime);
ret.SubDateTimeEntities.Add(pr);
}
}

return ret;
}

private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime)
{
var ret = new DateTimeResolutionResult();
Expand Down
9 changes: 9 additions & 0 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,15 @@ TasksModeNextPrefix: !simpleRegex
TasksModeDurationToDatePatterns: !nestedRegex
def: \b({TasksModeNextPrefix}((?<week>week)|(?<month>month)|(?<year>year)))\b
references: [TasksModeNextPrefix]
TimePeriodFromForRegex: !nestedRegex
def: (from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))\s*for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?
references: [ TimeRegex2, FirstTimeRegexInTimeRange, TimeRegexWithDotConnector, TillRegex, HourRegex, PeriodHourNumRegex, DescRegex, PmRegex, AmRegex, RangePrefixRegex, DurationUnitRegex ]
TimePeriodForFromRegex: !nestedRegex
def: for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?\s+(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))
references: [ TimeRegex2, FirstTimeRegexInTimeRange, TimeRegexWithDotConnector, TillRegex, HourRegex, PeriodHourNumRegex, DescRegex, PmRegex, AmRegex, RangePrefixRegex, DurationUnitRegex ]
TimePeriodWithDurationRegex: !nestedRegex
def: ({TimePeriodFromForRegex}|{TimePeriodForFromRegex})
references: [ TimePeriodFromForRegex, TimePeriodForFromRegex ]
UnitMap: !dictionary
types: [ string, string ]
entries:
Expand Down
Loading

0 comments on commit 18fe661

Please sign in to comment.