Skip to content

Commit

Permalink
[PhoneNumber] Add filter for "bank account" prefix (#1906)
Browse files Browse the repository at this point in the history
  • Loading branch information
Grey0202 authored and tellarin committed Oct 29, 2019
1 parent 4cdbaa4 commit e63e6d3
Show file tree
Hide file tree
Showing 62 changed files with 981 additions and 41 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\English\English-PhoneNumbers.yaml
// - Language: English
// - ClassName: PhoneNumbersDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.English
{
using System;
using System.Collections.Generic;

public static class PhoneNumbersDefinitions
{
public const string NumberReplaceToken = @"@builtin.phonenumber";
public const string FalsePositivePrefixRegex = @"(account|card)(\s+(#|number))?(\s+is)?:?\s*$";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\English\English-PhoneNumbers.yaml";
this.Language = "English";
this.ClassName = "PhoneNumbersDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
</Compile>
<Compile Update="English\PhoneNumbersDefinitions.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PhoneNumbersDefinitions.tt</DependentUpon>
</Compile>
<Compile Update="English\TimeZoneDefinitions.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand Down Expand Up @@ -297,6 +302,11 @@
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
</Compile>
<Compile Update="Portuguese\PhoneNumbersDefinitions.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PhoneNumbersDefinitions.tt</DependentUpon>
</Compile>
<Compile Update="Spanish\ChoiceDefinitions.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand Down Expand Up @@ -408,6 +418,8 @@
</None>
<None Update="English\NumbersWithUnitDefinitions.tt">
</None>
<None Update="English\PhoneNumbersDefinitions.tt">
</None>
<None Update="English\TimeZoneDefinitions.tt">
</None>
<None Update="French\ChoiceDefinitions.tt">
Expand Down Expand Up @@ -458,6 +470,8 @@
</None>
<None Update="Portuguese\NumbersWithUnitDefinitions.tt">
</None>
<None Update="Portuguese\PhoneNumbersDefinitions.tt">
</None>
<None Update="Spanish\ChoiceDefinitions.tt">
</None>
<None Update="Spanish\DateTimeDefinitions.tt">
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Portuguese\Portuguese-PhoneNumbers.yaml
// - Language: Portuguese
// - ClassName: PhoneNumbersDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.Portuguese
{
using System;
using System.Collections.Generic;

public static class PhoneNumbersDefinitions
{
public const string NumberReplaceToken = @"@builtin.phonenumber";
public const string FalsePositivePrefixRegex = @"conta(\s+(número|bancária(\s+número)?))?(\s+é)?:?\s*$";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Portuguese\Portuguese-PhoneNumbers.yaml";
this.Language = "Portuguese";
this.ClassName = "PhoneNumbersDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\BasePhoneNumbers.cs" Link="BasePhoneNumbers.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\BaseUnits.cs" Link="BaseUnits.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\BaseURL.cs" Link="BaseURL.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\English\PhoneNumbersDefinitions.cs" Link="English\PhoneNumbersDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Chinese\ChoiceDefinitions.cs" Link="Chinese\ChoiceDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Chinese\DateTimeDefinitions.cs" Link="Chinese\DateTimeDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Chinese\NumbersDefinitions.cs" Link="Chinese\NumbersDefinitions.cs" />
Expand Down Expand Up @@ -83,6 +84,7 @@
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Portuguese\DateTimeDefinitions.cs" Link="Portuguese\DateTimeDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Portuguese\NumbersDefinitions.cs" Link="Portuguese\NumbersDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Portuguese\NumbersWithUnitDefinitions.cs" Link="Portuguese\NumbersWithUnitDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Portuguese\PhoneNumbersDefinitions.cs" Link="Portuguese\PhoneNumbersDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Spanish\ChoiceDefinitions.cs" Link="Spanish\ChoiceDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Spanish\DateTimeDefinitions.cs" Link="Spanish\DateTimeDefinitions.cs" />
<Compile Include="..\Microsoft.Recognizers.Definitions.Common\Spanish\NumbersDefinitions.cs" Link="Spanish\NumbersDefinitions.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Dutch : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_French : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_German : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Hindi : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Italian : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Korean : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Spanish : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Microsoft.Recognizers.Text.DataDrivenTests;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.Recognizers.Text.Sequence.Tests
{
[TestClass]
public class TestSequence_Turkish : TestBase
{
[NetCoreTestDataSource]
[TestMethod]
public void PhoneNumberModel(TestModel testSpec)
{
TestPhoneNumber(testSpec);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public PhoneNumberConfiguration(SequenceOptions options = SequenceOptions.None)

public Regex ColonPrefixCheckRegex { get; set; }

public Regex FalsePositivePrefixRegex { get; set; }

public List<char> ColonMarkers { get; set; }

public List<char> ForbiddenPrefixMarkers { get; set; }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.Dutch;

namespace Microsoft.Recognizers.Text.Sequence.Dutch
{
public class DutchPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration
{
public DutchPhoneNumberExtractorConfiguration(SequenceOptions options)
: base(options)
{
FalsePositivePrefixRegex = null;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,21 +1,14 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.English;

namespace Microsoft.Recognizers.Text.Sequence.English
{
public class EnglishPhoneNumberExtractorConfiguration : PhoneNumberConfiguration
public class EnglishPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration
{
public EnglishPhoneNumberExtractorConfiguration(SequenceOptions options)
: base(options)
{
WordBoundariesRegex = BasePhoneNumbers.WordBoundariesRegex;
NonWordBoundariesRegex = BasePhoneNumbers.NonWordBoundariesRegex;
EndWordBoundariesRegex = BasePhoneNumbers.EndWordBoundariesRegex;
ColonPrefixCheckRegex = new Regex(BasePhoneNumbers.ColonPrefixCheckRegex);
ColonMarkers = (List<char>)BasePhoneNumbers.ColonMarkers;
ForbiddenPrefixMarkers = (List<char>)BasePhoneNumbers.ForbiddenPrefixMarkers;
ForbiddenSuffixMarkers = (List<char>)BasePhoneNumbers.ForbiddenSuffixMarkers;
FalsePositivePrefixRegex = new Regex(PhoneNumbersDefinitions.FalsePositivePrefixRegex);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,16 @@ public override List<ExtractResult> Extract(string text)
if (er.Start != 0)
{
var ch = text[(int)(er.Start - 1)];
var front = text.Substring(0, (int)(er.Start - 1));

if (this.config.FalsePositivePrefixRegex != null &&
this.config.FalsePositivePrefixRegex.IsMatch(front))
{
ers.Remove(er);
i--;
continue;
}

if (BasePhoneNumbers.BoundaryMarkers.Contains(ch))
{
if (SpecialBoundaryMarkers.Contains(ch) &&
Expand All @@ -131,7 +141,6 @@ public override List<ExtractResult> Extract(string text)
}

// check the international dialing prefix
var front = text.Substring(0, (int)(er.Start - 1));
if (InternationDialingPrefixRegex.IsMatch(front))
{
var moveOffset = InternationDialingPrefixRegex.Match(front).Length + 1;
Expand All @@ -152,7 +161,6 @@ public override List<ExtractResult> Extract(string text)
// Handle "tel:123456".
if (BasePhoneNumbers.ColonMarkers.Contains(ch))
{
var front = text.Substring(0, (int)(er.Start - 1));
if (this.config.ColonPrefixCheckRegex.IsMatch(front))
{
continue;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions;

namespace Microsoft.Recognizers.Text.Sequence
{
public class BasePhoneNumberExtractorConfiguration : PhoneNumberConfiguration
{
public BasePhoneNumberExtractorConfiguration(SequenceOptions options)
: base(options)
{
WordBoundariesRegex = BasePhoneNumbers.WordBoundariesRegex;
NonWordBoundariesRegex = BasePhoneNumbers.NonWordBoundariesRegex;
EndWordBoundariesRegex = BasePhoneNumbers.EndWordBoundariesRegex;
ColonPrefixCheckRegex = new Regex(BasePhoneNumbers.ColonPrefixCheckRegex);
ColonMarkers = (List<char>)BasePhoneNumbers.ColonMarkers;
ForbiddenPrefixMarkers = (List<char>)BasePhoneNumbers.ForbiddenPrefixMarkers;
ForbiddenSuffixMarkers = (List<char>)BasePhoneNumbers.ForbiddenSuffixMarkers;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.French;

namespace Microsoft.Recognizers.Text.Sequence.French
{
public class FrenchPhoneNumberExtractorConfiguration : BasePhoneNumberExtractorConfiguration
{
public FrenchPhoneNumberExtractorConfiguration(SequenceOptions options)
: base(options)
{
FalsePositivePrefixRegex = null;
}
}
}
Loading

0 comments on commit e63e6d3

Please sign in to comment.