diff --git a/.NET/Microsoft.Recognizers.Definitions/BaseUnits.cs b/.NET/Microsoft.Recognizers.Definitions/BaseUnits.cs new file mode 100644 index 0000000000..8a888cb0ad --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions/BaseUnits.cs @@ -0,0 +1,25 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// Generation parameters: +// - DataFilename: Patterns\Base-Units.yaml +// - Language: NULL +// - ClassName: BaseUnits +// +//------------------------------------------------------------------------------ +namespace Microsoft.Recognizers.Definitions +{ + using System; + using System.Collections.Generic; + + public static class BaseUnits + { + public const string HourRegex = @"(?00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)(h)?"; + public const string MinuteRegex = @"(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)(?!\d)"; + public const string SecondRegex = @"(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)"; + public static readonly string SpecialTimeRegex = $@"({HourRegex}\s*:\s*{MinuteRegex}(\s*:\s*{SecondRegex})?\s*pm)"; + } +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions/BaseUnits.tt b/.NET/Microsoft.Recognizers.Definitions/BaseUnits.tt new file mode 100644 index 0000000000..350bd9d873 --- /dev/null +++ b/.NET/Microsoft.Recognizers.Definitions/BaseUnits.tt @@ -0,0 +1,7 @@ +<#@ template debug="true" hostspecific="true" language="C#" #> +<# + this.DataFilename = @"Patterns\Base-Units.yaml"; + this.Language = null; + this.ClassName = "BaseUnits"; +#> +<#@ include file=".\CommonDefinitions.ttinclude"#> \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs index 6b1e289654..220768286e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Chinese; using Microsoft.Recognizers.Text.Number.Chinese; @@ -16,6 +17,7 @@ protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -32,6 +34,8 @@ protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; } + public Regex SpecialTimeRegex { get; set; } + public IExtractor IntegerExtractor { get; } public abstract ImmutableDictionary SuffixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs index 574e11c246..2f5bb28120 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs @@ -15,6 +15,9 @@ public static class Constants public const string SYS_UNIT_VOLUME = "builtin.unit.volume"; public const string SYS_UNIT_WEIGHT = "builtin.unit.weight"; public const string SYS_NUM = "builtin.num"; + + // For cases like '2:00 pm', both 'pm' and '00 pm' are not dimension + public const string SYS_SPECIAL_UNIT = "pm"; // For currencies without ISO codes, we use internal values prefixed by '_'. // These values should never be present in parse output. diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs index a6da375f3e..27280700b2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.Number.Dutch; using Microsoft.Recognizers.Text.Number; +using Microsoft.Recognizers.Definitions; namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch { @@ -17,6 +18,7 @@ protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs index 80ccbeec8e..ff8e7fef08 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.Number.English; using Microsoft.Recognizers.Text.Number; @@ -17,6 +18,7 @@ protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs index 85443a81e6..2d8d3702c5 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/INumberWithUnitExtractorConfiguration.cs @@ -26,5 +26,7 @@ public interface INumberWithUnitExtractorConfiguration string ConnectorToken { get; } Regex CompoundUnitConnectorRegex { get; } + + Regex SpecialTimeRegex { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs index 169aa920f0..53af6697c3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs @@ -259,6 +259,28 @@ public List Extract(string source) /* Relative position will be used in Parser */ number.Start = start - er.Start; er.Data = number; + + //Special treatment, handle cases like '2:00 pm', '00 pm' is not dimension + var isDimensionFallsInTime = false; + if (er.Type.Equals(Constants.SYS_UNIT_DIMENSION)) + { + var specialTime = this.config.SpecialTimeRegex.Matches(source); + + foreach (Match time in specialTime) + { + if (er.Start >= time.Index && er.Start + er.Length <= time.Index + time.Length) + { + isDimensionFallsInTime = true; + break; + } + } + } + + if (isDimensionFallsInTime) + { + continue; + } + result.Add(er); continue; @@ -327,6 +349,27 @@ public void ExtractSeparateUnits(string source, List numDependRes matchResult[j] = true; } + //Special treatment, handle cases like '2:00 pm', both '00 pm' and 'pm' are not dimension + var isDimensionFallsInTime = false; + if (match.Value.Equals(Constants.SYS_SPECIAL_UNIT)) + { + var specialTime = this.config.SpecialTimeRegex.Matches(source); + + foreach (Match time in specialTime) + { + if (isDimensionFallsInSpecialTime(match, time)) + { + isDimensionFallsInTime = true; + break; + } + } + } + + if (isDimensionFallsInTime) + { + continue; + } + numDependResults.Add(new ExtractResult { Start = match.Index, @@ -346,6 +389,17 @@ protected virtual bool PreCheckStr(string str) return !string.IsNullOrEmpty(str); } + private bool isDimensionFallsInSpecialTime(Match dimension, Match time) + { + bool isSubMatch = false; + if (dimension.Index >= time.Index && dimension.Index + dimension.Length <= time.Index + time.Length) + { + isSubMatch = true; + } + + return isSubMatch; + } + } public class DinoComparer : IComparer @@ -407,5 +461,5 @@ public class PrefixUnitResult public int Offset; public string UnitStr; } - + } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs index 3fd04e8d9b..4c7993ba4b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Extractors/FrenchNumberWithUnitExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.Number.French; @@ -17,6 +18,7 @@ protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs index 86a0fe37e1..e667475f40 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/GermanNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.German; using Microsoft.Recognizers.Text.Number.German; @@ -16,6 +17,7 @@ protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -32,6 +34,8 @@ protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs index b0cc8a92f6..bb49ff5460 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Extractors/ItalianNumberWithUnitExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Italian; using Microsoft.Recognizers.Text.Number.Italian; @@ -17,6 +18,7 @@ protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs index e8a8d81200..308ae242f1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Extractors/JapaneseNumberWithUnitExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Number.Japanese; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Japanese; namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese @@ -16,6 +17,7 @@ protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -32,6 +34,8 @@ protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; } + public Regex SpecialTimeRegex { get; set; } + public IExtractor IntegerExtractor { get; } public abstract ImmutableDictionary SuffixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs index 70abe48c2e..f66b077f3e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Portuguese; @@ -17,6 +18,7 @@ protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs index 55eb73ddb1..2d3c92d1dd 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Globalization; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Spanish; @@ -17,6 +18,7 @@ protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); + this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase); } public abstract string ExtractType { get; } @@ -33,6 +35,8 @@ protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) public Regex CompoundUnitConnectorRegex { get; set; } + public Regex SpecialTimeRegex { get; set; } + public abstract ImmutableDictionary SuffixList { get; } public abstract ImmutableDictionary PrefixList { get; } diff --git a/JavaScript/packages/recognizers-number-with-unit/resource-definitions.json b/JavaScript/packages/recognizers-number-with-unit/resource-definitions.json index 90433d8ec3..43425a1e08 100644 --- a/JavaScript/packages/recognizers-number-with-unit/resource-definitions.json +++ b/JavaScript/packages/recognizers-number-with-unit/resource-definitions.json @@ -1,6 +1,14 @@ { "outputPath": "./src/resources/", "configFiles": [ + { + "input": [ "Base-Units" ], + "output": "baseUnits", + "header": [ + "export namespace BaseUnits {" + ], + "footer": [ "}" ] + }, { "input": [ "Base-Numbers" ], "output": "baseNumbers", diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/chinese/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/chinese/base.ts index 8730eda04f..6651bf3fa9 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/chinese/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/chinese/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { ChineseNumericWithUnit } from "../../resources/chineseNumericWithUnit"; +import { BaseUnits } from "../../resources/baseUnits"; export abstract class ChineseNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class ChineseNumberWithUnitExtractorConfiguration implements INu readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class ChineseNumberWithUnitExtractorConfiguration implements INu this.buildSuffix = ChineseNumericWithUnit.BuildSuffix; this.connectorToken = ChineseNumericWithUnit.ConnectorToken; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(ChineseNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/constants.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/constants.ts index e2b23b6511..62a2958e25 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/constants.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/constants.ts @@ -11,7 +11,10 @@ export class Constants { static readonly SYS_UNIT_WEIGHT: string = "builtin.unit.weight"; static readonly SYS_NUM: string = "builtin.num"; - // For currencies without ISO codes, we use internal values prefixed by '_'. + // For cases like '2:00 pm', both 'pm' and '00 pm' are not dimension + static readonly SYS_SPECIAL_UNIT: string = "pm"; + + // For currencies without ISO codes, we use internal values prefixed by '_'. // These values should never be present in parse output. static readonly FAKE_ISO_CODE_PREFIX: string = "_"; } \ No newline at end of file diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/english/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/english/base.ts index 165e88e492..a8b9b0cf57 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/english/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/english/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { EnglishNumericWithUnit } from "../../resources/englishNumericWithUnit"; +import { BaseUnits} from "../../resources/baseUnits"; export abstract class EnglishNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class EnglishNumberWithUnitExtractorConfiguration implements INu readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class EnglishNumberWithUnitExtractorConfiguration implements INu this.buildSuffix = EnglishNumericWithUnit.BuildSuffix; this.connectorToken = ''; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(EnglishNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/extractors.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/extractors.ts index ca0f9f4dfc..f86a4f61f3 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/extractors.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/extractors.ts @@ -16,6 +16,7 @@ export interface INumberWithUnitExtractorConfiguration { readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; } export class NumberWithUnitExtractor implements IExtractor { @@ -158,6 +159,21 @@ export class NumberWithUnitExtractor implements IExtractor { num.start = start - er.start; er.data = num; + let isDimensionFallsInTime = false; + if (er.type === Constants.SYS_UNIT_DIMENSION) { + let specialTime = RegExpUtility.getMatches(this.config.specialTimeRegex, source); + + specialTime.forEach(match => { + if (er.start >= match.index && er.start + er.length <= match.index + match.length) { + isDimensionFallsInTime = true; + } + }); + } + + if (isDimensionFallsInTime) { + continue; + } + result.push(er); continue; } @@ -219,13 +235,27 @@ export class NumberWithUnitExtractor implements IExtractor { for (let j = 0; j < i; j++) { matchResult[j] = true; } - numDependResults.push({ - start: match.index, - length: match.length, - text: match.value, - type: this.config.extractType, - data: null - } as ExtractResult); + + let isDimensionFallsInTime = false; + if (match.value === Constants.SYS_SPECIAL_UNIT) { + let specialTime = RegExpUtility.getMatches(this.config.specialTimeRegex, source); + + specialTime.forEach(time => { + if (this.isDimensionFallsInSpecialTime(match, time)) { + isDimensionFallsInTime = true; + } + }); + } + + if (isDimensionFallsInTime === false) { + numDependResults.push({ + start: match.index, + length: match.length, + text: match.value, + type: this.config.extractType, + data: null + } as ExtractResult); + } } }); } @@ -335,6 +365,15 @@ export class NumberWithUnitExtractor implements IExtractor { } } } + + private isDimensionFallsInSpecialTime(dimension: Match, time: Match): boolean { + let isSubMatch = false; + if (dimension.index >= time.index && dimension.index + dimension.length <= time.index + time.length) { + isSubMatch = true; + } + + return isSubMatch; + } } export class BaseMergedUnitExtractor implements IExtractor { diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/french/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/french/base.ts index 8478f85c21..c5f4392b85 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/french/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/french/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { FrenchNumericWithUnit } from "../../resources/frenchNumericWithUnit"; +import { BaseUnits } from "../../resources/baseUnits"; export abstract class FrenchNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class FrenchNumberWithUnitExtractorConfiguration implements INum readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class FrenchNumberWithUnitExtractorConfiguration implements INum this.buildSuffix = FrenchNumericWithUnit.BuildSuffix; this.connectorToken = FrenchNumericWithUnit.ConnectorToken; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(FrenchNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/japanese/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/japanese/base.ts index e884392337..8ae1f5d254 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/japanese/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/japanese/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { JapaneseNumericWithUnit } from "../../resources/japaneseNumericWithUnit"; +import { BaseUnits } from "../../resources/baseUnits"; export abstract class JapaneseNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class JapaneseNumberWithUnitExtractorConfiguration implements IN readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class JapaneseNumberWithUnitExtractorConfiguration implements IN this.buildSuffix = JapaneseNumericWithUnit.BuildSuffix; this.connectorToken = JapaneseNumericWithUnit.ConnectorToken; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(JapaneseNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/base.ts index b7ca832e3f..50ca50930a 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { PortugueseNumericWithUnit } from "../../resources/portugueseNumericWithUnit"; +import { BaseUnits } from "../../resources/baseUnits"; export abstract class PortugueseNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class PortugueseNumberWithUnitExtractorConfiguration implements readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class PortugueseNumberWithUnitExtractorConfiguration implements this.buildSuffix = PortugueseNumericWithUnit.BuildSuffix; this.connectorToken = PortugueseNumericWithUnit.ConnectorToken; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(PortugueseNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/base.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/base.ts index ce3e41f057..f8afaed589 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/base.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/base.ts @@ -4,6 +4,7 @@ import { Constants } from "../constants"; import { INumberWithUnitExtractorConfiguration } from "../extractors"; import { BaseNumberWithUnitParserConfiguration } from "../parsers"; import { SpanishNumericWithUnit } from "../../resources/spanishNumericWithUnit"; +import { BaseUnits } from "../../resources/baseUnits"; export abstract class SpanishNumberWithUnitExtractorConfiguration implements INumberWithUnitExtractorConfiguration { abstract readonly suffixList: ReadonlyMap; @@ -17,6 +18,7 @@ export abstract class SpanishNumberWithUnitExtractorConfiguration implements INu readonly buildSuffix: string; readonly connectorToken: string; readonly compoundUnitConnectorRegex: RegExp; + readonly specialTimeRegex: RegExp; constructor(ci: CultureInfo) { this.cultureInfo = ci; @@ -26,6 +28,7 @@ export abstract class SpanishNumberWithUnitExtractorConfiguration implements INu this.buildSuffix = SpanishNumericWithUnit.BuildSuffix; this.connectorToken = SpanishNumericWithUnit.ConnectorToken; this.compoundUnitConnectorRegex = RegExpUtility.getSafeRegExp(SpanishNumericWithUnit.CompoundUnitConnectorRegex); + this.specialTimeRegex = RegExpUtility.getSafeRegExp(BaseUnits.SpecialTimeRegex); } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/resources/baseUnits.ts b/JavaScript/packages/recognizers-number-with-unit/src/resources/baseUnits.ts new file mode 100644 index 0000000000..159b1246cd --- /dev/null +++ b/JavaScript/packages/recognizers-number-with-unit/src/resources/baseUnits.ts @@ -0,0 +1,14 @@ +// ------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +// ------------------------------------------------------------------------------ + +export namespace BaseUnits { + export const HourRegex = `(?00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)(h)?`; + export const MinuteRegex = `(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)(?!\\d)`; + export const SecondRegex = `(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)`; + export const SpecialTimeRegex = `(${HourRegex}\\s*:\\s*${MinuteRegex}(\\s*:\\s*${SecondRegex})?\\s*pm)`; +} diff --git a/Patterns/Base-Units.yaml b/Patterns/Base-Units.yaml new file mode 100644 index 0000000000..a066a1b4e1 --- /dev/null +++ b/Patterns/Base-Units.yaml @@ -0,0 +1,11 @@ +--- +HourRegex: !simpleRegex + def: (?00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)(h)? +MinuteRegex: !simpleRegex + def: (?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)(?!\d) +SecondRegex: !simpleRegex + def: (?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9) +SpecialTimeRegex: !nestedRegex + def: ({HourRegex}\s*:\s*{MinuteRegex}(\s*:\s*{SecondRegex})?\s*pm) + references: [HourRegex, MinuteRegex, SecondRegex] +... \ No newline at end of file diff --git a/Specs/NumberWithUnit/Chinese/DimensionModel.json b/Specs/NumberWithUnit/Chinese/DimensionModel.json index 98c67b84c0..e0c4dbc8b0 100644 --- a/Specs/NumberWithUnit/Chinese/DimensionModel.json +++ b/Specs/NumberWithUnit/Chinese/DimensionModel.json @@ -115,4 +115,10 @@ "Input": "dimension是什么意思", "Results": [] } + , + { + "Input": "今天2:00 pm, 你将会收到一个惊喜!", + "NotSupported": "python, java", + "Results": [] + } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Dutch/DimensionModel.json b/Specs/NumberWithUnit/Dutch/DimensionModel.json index 073da12b3d..98c1dd9b39 100644 --- a/Specs/NumberWithUnit/Dutch/DimensionModel.json +++ b/Specs/NumberWithUnit/Dutch/DimensionModel.json @@ -936,5 +936,10 @@ } ], "NotSupportedByDesign": "javascript,python,java" + }, + { + "Input": "2:00 pm", + "Results": [], + "NotSupportedByDesign": "javascript,python,java" } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/English/DimensionModel.json b/Specs/NumberWithUnit/English/DimensionModel.json index 96b1bd93ea..544adbaf5a 100644 --- a/Specs/NumberWithUnit/English/DimensionModel.json +++ b/Specs/NumberWithUnit/English/DimensionModel.json @@ -799,4 +799,36 @@ } ] } + , + { + "Input": "I'll give you a surprise at 2:00 pm", + "NotSupported": "python, java", + "Results": [] + } + , + { + "Input": "He said: 2 pm is 2 picometer", + "Results": [ + { + "Text": "2 pm", + "Start": 9, + "End": 12, + "TypeName": "dimension", + "Resolution": { + "unit": "Picometer", + "value": "2" + } + }, + { + "Text": "2 picometer", + "Start": 17, + "End": 27, + "TypeName": "dimension", + "Resolution": { + "unit": "Picometer", + "value": "2" + } + } + ] + } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/German/DimensionModel.json b/Specs/NumberWithUnit/German/DimensionModel.json index 3726325fd3..d2f62ef161 100644 --- a/Specs/NumberWithUnit/German/DimensionModel.json +++ b/Specs/NumberWithUnit/German/DimensionModel.json @@ -376,4 +376,11 @@ "NotSupported": "javascript", "Results": [] } + , + { + "Input": "2:00 pm", + "NotSupportedByDesign": "python", + "NotSupported": "javascript, java", + "Results": [] + } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Portuguese/DimensionModel.json b/Specs/NumberWithUnit/Portuguese/DimensionModel.json index f3ea8fd271..c43330afd7 100644 --- a/Specs/NumberWithUnit/Portuguese/DimensionModel.json +++ b/Specs/NumberWithUnit/Portuguese/DimensionModel.json @@ -820,4 +820,10 @@ } ] } + , + { + "Input": "2:00 pm", + "NotSupported": "python, java", + "Results": [] + } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Spanish/DimensionModel.json b/Specs/NumberWithUnit/Spanish/DimensionModel.json index 4d023a3a34..eeb9474bdd 100644 --- a/Specs/NumberWithUnit/Spanish/DimensionModel.json +++ b/Specs/NumberWithUnit/Spanish/DimensionModel.json @@ -756,4 +756,10 @@ } ] } + , + { + "Input": "2:00 pm", + "NotSupported": "python, java", + "Results": [] + } ] \ No newline at end of file