Skip to content

Commit

Permalink
Fix wrong parse result for '2:00 PM', '00 PM' is not dimension (#895)
Browse files Browse the repository at this point in the history
* Fix wrongly recognized dimension inside a time entity
  • Loading branch information
Sothan authored and tellarin committed Oct 18, 2018
1 parent f76e661 commit d350b91
Show file tree
Hide file tree
Showing 31 changed files with 291 additions and 9 deletions.
25 changes: 25 additions & 0 deletions .NET/Microsoft.Recognizers.Definitions/BaseUnits.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Base-Units.yaml
// - Language: NULL
// - ClassName: BaseUnits
// </auto-generated>
//------------------------------------------------------------------------------
namespace Microsoft.Recognizers.Definitions
{
using System;
using System.Collections.Generic;

public static class BaseUnits
{
public const string HourRegex = @"(?<hour>00|01|02|03|04|05|06|07|08|09|0|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|1|2|3|4|5|6|7|8|9)(h)?";
public const string MinuteRegex = @"(?<min>00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)(?!\d)";
public const string SecondRegex = @"(?<sec>00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|0|1|2|3|4|5|6|7|8|9)";
public static readonly string SpecialTimeRegex = $@"({HourRegex}\s*:\s*{MinuteRegex}(\s*:\s*{SecondRegex})?\s*pm)";
}
}
7 changes: 7 additions & 0 deletions .NET/Microsoft.Recognizers.Definitions/BaseUnits.tt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Base-Units.yaml";
this.Language = null;
this.ClassName = "BaseUnits";
#>
<#@ include file=".\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Chinese;
using Microsoft.Recognizers.Text.Number.Chinese;

Expand All @@ -16,6 +17,7 @@ protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -32,6 +34,8 @@ protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; }

public Regex SpecialTimeRegex { get; set; }

public IExtractor IntegerExtractor { get; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }
Expand Down
3 changes: 3 additions & 0 deletions .NET/Microsoft.Recognizers.Text.NumberWithUnit/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ public static class Constants
public const string SYS_UNIT_VOLUME = "builtin.unit.volume";
public const string SYS_UNIT_WEIGHT = "builtin.unit.weight";
public const string SYS_NUM = "builtin.num";

// For cases like '2:00 pm', both 'pm' and '00 pm' are not dimension
public const string SYS_SPECIAL_UNIT = "pm";

// For currencies without ISO codes, we use internal values prefixed by '_'.
// These values should never be present in parse output.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.Recognizers.Definitions.Dutch;
using Microsoft.Recognizers.Text.Number.Dutch;
using Microsoft.Recognizers.Text.Number;
using Microsoft.Recognizers.Definitions;

namespace Microsoft.Recognizers.Text.NumberWithUnit.Dutch
{
Expand All @@ -17,6 +18,7 @@ protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = string.Empty;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.English;
using Microsoft.Recognizers.Text.Number.English;
using Microsoft.Recognizers.Text.Number;
Expand All @@ -17,6 +18,7 @@ protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = string.Empty;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ public interface INumberWithUnitExtractorConfiguration
string ConnectorToken { get; }

Regex CompoundUnitConnectorRegex { get; }

Regex SpecialTimeRegex { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,28 @@ public List<ExtractResult> Extract(string source)
/* Relative position will be used in Parser */
number.Start = start - er.Start;
er.Data = number;

//Special treatment, handle cases like '2:00 pm', '00 pm' is not dimension
var isDimensionFallsInTime = false;
if (er.Type.Equals(Constants.SYS_UNIT_DIMENSION))
{
var specialTime = this.config.SpecialTimeRegex.Matches(source);

foreach (Match time in specialTime)
{
if (er.Start >= time.Index && er.Start + er.Length <= time.Index + time.Length)
{
isDimensionFallsInTime = true;
break;
}
}
}

if (isDimensionFallsInTime)
{
continue;
}

result.Add(er);

continue;
Expand Down Expand Up @@ -327,6 +349,27 @@ public void ExtractSeparateUnits(string source, List<ExtractResult> numDependRes
matchResult[j] = true;
}

//Special treatment, handle cases like '2:00 pm', both '00 pm' and 'pm' are not dimension
var isDimensionFallsInTime = false;
if (match.Value.Equals(Constants.SYS_SPECIAL_UNIT))
{
var specialTime = this.config.SpecialTimeRegex.Matches(source);

foreach (Match time in specialTime)
{
if (isDimensionFallsInSpecialTime(match, time))
{
isDimensionFallsInTime = true;
break;
}
}
}

if (isDimensionFallsInTime)
{
continue;
}

numDependResults.Add(new ExtractResult
{
Start = match.Index,
Expand All @@ -346,6 +389,17 @@ protected virtual bool PreCheckStr(string str)
return !string.IsNullOrEmpty(str);
}

private bool isDimensionFallsInSpecialTime(Match dimension, Match time)
{
bool isSubMatch = false;
if (dimension.Index >= time.Index && dimension.Index + dimension.Length <= time.Index + time.Length)
{
isSubMatch = true;
}

return isSubMatch;
}

}

public class DinoComparer : IComparer<string>
Expand Down Expand Up @@ -407,5 +461,5 @@ public class PrefixUnitResult
public int Offset;
public string UnitStr;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Globalization;
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.French;
using Microsoft.Recognizers.Text.Number.French;

Expand All @@ -17,6 +18,7 @@ protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.German;
using Microsoft.Recognizers.Text.Number.German;

Expand All @@ -16,6 +17,7 @@ protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = string.Empty;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -32,6 +34,8 @@ protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Globalization;
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Italian;
using Microsoft.Recognizers.Text.Number.Italian;

Expand All @@ -17,6 +18,7 @@ protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Text.Number.Japanese;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Japanese;

namespace Microsoft.Recognizers.Text.NumberWithUnit.Japanese
Expand All @@ -16,6 +17,7 @@ protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -32,6 +34,8 @@ protected JapaneseNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; }

public Regex SpecialTimeRegex { get; set; }

public IExtractor IntegerExtractor { get; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Portuguese;
using Microsoft.Recognizers.Text.Number;
using Microsoft.Recognizers.Text.Number.Portuguese;
Expand All @@ -17,6 +18,7 @@ protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Spanish;
using Microsoft.Recognizers.Text.Number;
using Microsoft.Recognizers.Text.Number.Spanish;
Expand All @@ -17,6 +18,7 @@ protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci)
this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix;
this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
this.SpecialTimeRegex = new Regex(BaseUnits.SpecialTimeRegex, RegexOptions.IgnoreCase);
}

public abstract string ExtractType { get; }
Expand All @@ -33,6 +35,8 @@ protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci)

public Regex CompoundUnitConnectorRegex { get; set; }

public Regex SpecialTimeRegex { get; set; }

public abstract ImmutableDictionary<string, string> SuffixList { get; }

public abstract ImmutableDictionary<string, string> PrefixList { get; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
{
"outputPath": "./src/resources/",
"configFiles": [
{
"input": [ "Base-Units" ],
"output": "baseUnits",
"header": [
"export namespace BaseUnits {"
],
"footer": [ "}" ]
},
{
"input": [ "Base-Numbers" ],
"output": "baseNumbers",
Expand Down
Loading

0 comments on commit d350b91

Please sign in to comment.