From cdb178ab6fa1deaeb2be27880ca4738af1f451d9 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 25 Sep 2024 09:11:23 +0800 Subject: [PATCH] [EN DateTimeV2] Support for longer span date time (#3166) * Datetime for longer span - local draft commit * DateTimeForLongerSpan - Implement for from --------- Co-authored-by: Michael Wang (Centific Technologies Inc) --- .../English/DateTimeDefinitions.cs | 3 +- .../Constants.cs | 2 + ...EnglishDatePeriodExtractorConfiguration.cs | 3 + .../EnglishDatePeriodParserConfiguration.cs | 3 + .../Extractors/BaseDatePeriodExtractor.cs | 14 +++- .../Parsers/BaseDatePeriodParser.cs | 66 ++++++++++++++++ Patterns/English/English-DateTime.yaml | 4 +- .../DateTime/English/DatePeriodExtractor.json | 36 +++++++++ Specs/DateTime/English/DatePeriodParser.json | 78 +++++++++++++++++++ Specs/DateTime/English/DateTimeModel.json | 75 ++++++++++++++++++ 10 files changed, 281 insertions(+), 3 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index 21d069402c..1d9e716ec5 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -260,12 +260,13 @@ public static class DateTimeDefinitions public static readonly string SinceRegexExp = $@"({SinceRegex}|\bfrom(\s+the)?\b)"; public const string AgoRegex = @"\b(ago|earlier|before\s+(?yesterday|today))\b"; public static readonly string LaterRegex = $@"\b(?:later(?!((\s+in)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex})|\s+than\b)|from now|(from|after)\s+(?tomorrow|tmrw?|today))\b"; - public const string BeforeAfterRegex = @"\b((?before)|(?from|after))\b"; + public const string BeforeAfterRegex = @"(,?\s*)\b((?before)|(?from|after))\b"; public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public const string InConnectorRegex = @"\b(in)\b"; public static readonly string SinceYearSuffixRegex = $@"(^\s*{SinceRegex}(\s*(the\s+)?year\s*)?{YearSuffix})"; public static readonly string WithinNextPrefixRegex = $@"\b(within(\s+the)?(\s+(?{NextPrefixRegex}))?)\b"; + public const string ForPrefixRegex = @"((?for.*from.*)|(?\bfrom\b)|(?\bfor\b))"; public const string TodayNowRegex = @"\b(today|now|current (date|time))\b"; public static readonly string MorningStartEndRegex = $@"(^(morning|{AmDescRegex}))|((morning|{AmDescRegex})$)"; public static readonly string AfternoonStartEndRegex = $@"(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs index ead87435e6..d8a6e0ba6e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs @@ -245,6 +245,8 @@ public static class Constants public const string StartGroupName = "start"; public const string EndGroupName = "end"; public const string WithinGroupName = "within"; + public const string ForGroupName = "for"; + public const string FromGroupName = "from"; public const string NumberGroupName = "number"; public const string OrdinalGroupName = "ordinal"; public const string OrderGroupName = "order"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs index 0a361a1993..aa2ed947c2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs @@ -124,6 +124,9 @@ public class EnglishDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig public static readonly Regex WithinNextPrefixRegex = new Regex(DateTimeDefinitions.WithinNextPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex ForPrefixRegex = + new Regex(DateTimeDefinitions.ForPrefixRegex, RegexFlags, RegexTimeOut); + public static readonly Regex RestOfDateRegex = new Regex(DateTimeDefinitions.RestOfDateRegex, RegexFlags, RegexTimeOut); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs index be633fe2d0..a1922fd658 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDatePeriodParserConfiguration.cs @@ -95,6 +95,7 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c RelativeDecadeRegex = EnglishDatePeriodExtractorConfiguration.RelativeDecadeRegex; InConnectorRegex = config.UtilityConfiguration.InConnectorRegex; WithinNextPrefixRegex = EnglishDatePeriodExtractorConfiguration.WithinNextPrefixRegex; + ForPrefixRegex = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex; ReferenceDatePeriodRegex = EnglishDatePeriodExtractorConfiguration.ReferenceDatePeriodRegex; AgoRegex = EnglishDatePeriodExtractorConfiguration.AgoRegex; LaterRegex = EnglishDatePeriodExtractorConfiguration.LaterRegex; @@ -186,6 +187,8 @@ public EnglishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c public Regex WithinNextPrefixRegex { get; } + public Regex ForPrefixRegex { get; } + public Regex RestOfDateRegex { get; } public Regex LaterEarlyPeriodRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs index 4c27165ae3..433fa5f3fd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs @@ -5,7 +5,7 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; - +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -734,6 +734,18 @@ private List SingleTimePointWithPatterns(string text, List } } } + + // For cases like "for 1 week from today", "for 3 days from 20th May" etc.. + if (EnglishDatePeriodExtractorConfiguration.ForPrefixRegex != null) + { + Match prefixMatchFor = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex.Match(beforeString); + Match datepointMatchFrom = EnglishDatePeriodExtractorConfiguration.ForPrefixRegex.Match(extractionResult.Text); + if (prefixMatchFor.Success && prefixMatchFor.Groups[Constants.ForGroupName].Success + && datepointMatchFrom.Success && datepointMatchFrom.Groups[Constants.FromGroupName].Success) + { + ret.AddRange(GetTokenForRegexMatching(beforeString, EnglishDatePeriodExtractorConfiguration.ForPrefixRegex, extractionResult, inPrefix: true)); + } + } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs index fb2a25af03..134b85079f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs @@ -6,6 +6,7 @@ using System.Globalization; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.DateTime.English; using Microsoft.Recognizers.Text.DateTime.Utilities; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -476,6 +477,12 @@ private DateTimeResolutionResult ParseBaseDatePeriod(string text, DateObject ref innerResult = ParseDatePointWithAgoAndLater(text, referenceDate); } + // Cases like "for x weeks/days from today/12 sep etc." + if (!innerResult.Success) + { + innerResult = ParseDatePointWithForPrefix(text, referenceDate); + } + // Parse duration should be at the end since it will extract "the last week" from "the last week of July" if (!innerResult.Success) { @@ -620,6 +627,65 @@ private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, Date return ret; } + // Only handle cases like "for x weeks/days from today/tomorrow/some day" + private DateTimeResolutionResult ParseDatePointWithForPrefix(string text, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var er = this.config.DateExtractor.Extract(text, referenceDate).FirstOrDefault(); + + if (er != null) + { + var beforeString = text.Substring(0, (int)er.Start); + var isAgo = this.config.AgoRegex.Match(er.Text).Success; + var config = this.config as EnglishDatePeriodParserConfiguration; + + if (!string.IsNullOrEmpty(beforeString) && config != null) + { + var matchFor = config.ForPrefixRegex.Match(beforeString); + + if (matchFor.Success && matchFor.Groups[Constants.ForGroupName].Success) + { + var pr = this.config.DateParser.Parse(er, referenceDate); + var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text, referenceDate).FirstOrDefault(); + + if (durationExtractionResult != null) + { + var duration = this.config.DurationParser.Parse(durationExtractionResult); + var durationInSeconds = (double)((DateTimeResolutionResult)duration.Value).PastValue; + + DateObject startDate; + DateObject endDate; + + if (isAgo) + { + startDate = (DateObject)((DateTimeResolutionResult)pr.Value).PastValue; + endDate = startDate.AddSeconds(durationInSeconds); + } + else + { + endDate = (DateObject)((DateTimeResolutionResult)pr.Value).FutureValue; + startDate = endDate.AddSeconds(-durationInSeconds); + } + + if (startDate != DateObject.MinValue) + { + var startLuisStr = DateTimeFormatUtil.LuisDate(startDate); + var endLuisStr = DateTimeFormatUtil.LuisDate(endDate); + var durationTimex = ((DateTimeResolutionResult)duration.Value).Timex; + + ret.Timex = $"({startLuisStr},{endLuisStr},{durationTimex})"; + ret.FutureValue = new Tuple(startDate, endDate); + ret.PastValue = new Tuple(startDate, endDate); + ret.Success = true; + } + } + } + } + } + + return ret; + } + private DateTimeResolutionResult ParseSingleTimePoint(string text, DateObject referenceDate, DateContext dateContext = null) { var ret = new DateTimeResolutionResult(); diff --git a/Patterns/English/English-DateTime.yaml b/Patterns/English/English-DateTime.yaml index 14ded022d4..31d3532599 100644 --- a/Patterns/English/English-DateTime.yaml +++ b/Patterns/English/English-DateTime.yaml @@ -624,7 +624,7 @@ LaterRegex: !nestedRegex def: \b(?:later(?!((\s+in)?\s*{OneWordPeriodRegex})|(\s+{TimeOfDayRegex})|\s+than\b)|from now|(from|after)\s+(?tomorrow|tmrw?|today))\b references: [ OneWordPeriodRegex, TimeOfDayRegex ] BeforeAfterRegex: !simpleRegex - def: \b((?before)|(?from|after))\b + def: (,?\s*)\b((?before)|(?from|after))\b ModPrefixRegex: !nestedRegex def: \b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b references: [RelativeRegex, AroundRegex, BeforeRegex, AfterRegex, SinceRegex ] @@ -639,6 +639,8 @@ SinceYearSuffixRegex: !nestedRegex WithinNextPrefixRegex: !nestedRegex def: \b(within(\s+the)?(\s+(?{NextPrefixRegex}))?)\b references: [ NextPrefixRegex ] +ForPrefixRegex: !simpleRegex + def: ((?for.*from.*)|(?\bfrom\b)|(?\bfor\b)) TodayNowRegex: !simpleRegex # Added to remove hard coded strings in BaseDatePeriodParser def: \b(today|now|current (date|time))\b # "next" group here is used to judge uncommon unsupported cases like "within the next 5 days before today" diff --git a/Specs/DateTime/English/DatePeriodExtractor.json b/Specs/DateTime/English/DatePeriodExtractor.json index efe886738d..c76b0261cd 100644 --- a/Specs/DateTime/English/DatePeriodExtractor.json +++ b/Specs/DateTime/English/DatePeriodExtractor.json @@ -3572,6 +3572,42 @@ } ] }, + { + "Input": "set OOO for 1 week, from 20th dec", + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 1 week, from 20th dec", + "Type": "daterange", + "Start": 8, + "Length": 25 + } + ] + }, + { + "Input": "set OOO for 3 days, from today", + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 3 days, from today", + "Type": "daterange", + "Start": 8, + "Length": 22 + } + ] + }, + { + "Input": "set OOO for 3 days from today", + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 3 days from today", + "Type": "daterange", + "Start": 8, + "Length": 21 + } + ] + }, { "Input": "I will come back less than 2 weeks from today", "NotSupported": "python, javascript", diff --git a/Specs/DateTime/English/DatePeriodParser.json b/Specs/DateTime/English/DatePeriodParser.json index 97e0144412..4be33a8551 100644 --- a/Specs/DateTime/English/DatePeriodParser.json +++ b/Specs/DateTime/English/DatePeriodParser.json @@ -5266,6 +5266,84 @@ } ] }, + { + "Input": "set OOO for 1 week, from 20th dec", + "Context": { + "ReferenceDateTime": "2018-05-29T00:00:00" + }, + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 1 week, from 20th dec", + "Type": "daterange", + "Value": { + "Timex": "(2018-12-20,2018-12-27,P1W)", + "FutureResolution": { + "startDate": "2018-12-20", + "endDate": "2018-12-27" + }, + "PastResolution": { + "startDate": "2018-12-20", + "endDate": "2018-12-27" + } + }, + "Start": 8, + "Length": 25 + } + ] + }, + { + "Input": "set OOO for 3 days, from today", + "Context": { + "ReferenceDateTime": "2018-05-23T00:00:00" + }, + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 3 days, from today", + "Type": "daterange", + "Value": { + "Timex": "(2018-05-23,2018-05-26,P3D)", + "FutureResolution": { + "startDate": "2018-05-23", + "endDate": "2018-05-26" + }, + "PastResolution": { + "startDate": "2018-05-23", + "endDate": "2018-05-26" + } + }, + "Start": 8, + "Length": 22 + } + ] + }, + { + "Input": "set OOO for 3 days from today", + "Context": { + "ReferenceDateTime": "2018-05-23T00:00:00" + }, + "NotSupported": "python, javascript, java", + "Results": [ + { + "Text": "for 3 days from today", + "Type": "daterange", + "Value": { + "Timex": "(2018-05-23,2018-05-26,P3D)", + "FutureResolution": { + "startDate": "2018-05-23", + "endDate": "2018-05-26" + }, + "PastResolution": { + "startDate": "2018-05-23", + "endDate": "2018-05-26" + } + }, + "Start": 8, + "Length": 21 + } + ] + }, { "Input": "I have already finished all my work more than 2 weeks before today", "Context": { diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index 804b5c2033..ff100ed2e3 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -20951,6 +20951,81 @@ } ] }, + { + "Input": "set OOO for 1 week, from 20th dec", + "Context": { + "ReferenceDateTime": "2018-05-29T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "for 1 week, from 20th dec", + "Start": 8, + "End": 32, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(2018-12-20,2018-12-27,P1W)", + "type": "daterange", + "start": "2018-12-20", + "end": "2018-12-27" + } + ] + } + } + ] + }, + { + "Input": "set OOO for 3 days, from today", + "Context": { + "ReferenceDateTime": "2018-05-23T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "for 3 days, from today", + "Start": 8, + "End": 29, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(2018-05-23,2018-05-26,P3D)", + "type": "daterange", + "start": "2018-05-23", + "end": "2018-05-26" + } + ] + } + } + ] + }, + { + "Input": "set OOO for 3 days from today", + "Context": { + "ReferenceDateTime": "2018-05-23T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "for 3 days from today", + "Start": 8, + "End": 28, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(2018-05-23,2018-05-26,P3D)", + "type": "daterange", + "start": "2018-05-23", + "end": "2018-05-26" + } + ] + } + } + ] + }, { "Input": "The project was submitted last month and 3 weeks later it was approved", "Context": {