Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

新增IP地址识别 #777

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions src/main/java/org/ansj/recognition/impl/IDCardRecognition.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,36 @@ public class IDCardRecognition implements Recognition {
*/
private static final long serialVersionUID = -32133440735240290L;
private static final Nature ID_CARD_NATURE = new Nature("idcard");
private static final String REGEX_ID_NO_18 = "\\d{6}(18|19|([23]\\d))\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{3}[0-9Xx]";

@Override
public void recognition(Result result) {

List<Term> terms = result.getTerms() ;

for (Term term : terms) {
if ("m".equals(term.getNatureStr())) {

if (term.getName().length() == 18) {
if (term.getName().length() == 18 && term.getName().matches(REGEX_ID_NO_18)) {
term.setNature(ID_CARD_NATURE);
} else if (term.getName().length() == 17) {
Term to = term.to();
if ("x".equals(to.getName())) {
if ("x".equals(to.getName()) && (term.getName() + to.getName()).matches(REGEX_ID_NO_18)) {
term.merage(to);
to.setName(null);
term.setNature(ID_CARD_NATURE);
} else if (to.getName().startsWith("x") || to.getName().startsWith("X")) {
String start = to.getName().substring(0, 1);
if ((term.getName() + start).matches(REGEX_ID_NO_18)) {
String substring = to.getName().substring(1);
String real_substring = to.getRealName().substring(1);
String real_start = to.getRealName().substring(0,1);
term.setRealName(term.getRealName() + real_start);
term.setName(term.getName() + start);
to.setRealName(real_substring);
to.setName(substring);
term.setNature(ID_CARD_NATURE);
}
}
}

Expand Down
31 changes: 31 additions & 0 deletions src/main/java/org/ansj/recognition/impl/IPRecognition.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.ansj.recognition.impl;

import org.ansj.app.extracting.Extracting;
import org.ansj.app.extracting.exception.RuleFormatException;
import org.ansj.domain.TermNature;
import org.ansj.domain.TermNatures;

public class IPRecognition extends ExtractingRecognition {

private static final long serialVersionUID = 1L;

private static final TermNatures IP_T_N = new TermNatures(new TermNature("ip", 1));

private static final Extracting EXTRACTING = new Extracting();

private static final String REGEX_IPv4 = "(:m)[25[0-5]|][2[0-4]\\\\d|][1\\\\d{2}|][[0-9]\\\\d][\\\\d](\\.)" +
"(:m)[25[0-5]|][2[0-4]\\\\d|][1\\\\d{2}|][[0-9]\\\\d][\\\\d](\\.)(:m)[25[0-5]|][2[0-4]\\\\d|][1\\\\d{2}|][[0-9]\\\\d][\\\\d](\\.)" +
"(:m)[25[0-5]|][2[0-4]\\\\d|][1\\\\d{2}|][[0-9]\\\\d][\\\\d]";

static {
try {
EXTRACTING.addRuleStr(REGEX_IPv4);
} catch (RuleFormatException e) {
e.printStackTrace();
}
}

public IPRecognition() {
super(EXTRACTING, IP_T_N);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ public class IDCardRecognitionTest {
@Test
public void test() {
Result result = ToAnalysis.parse("我吃了一个西瓜,我今年25岁。13282619771220503X这里有一万个东西,我的身份证号码是130722198506280057h");
Result result2 = ToAnalysis.parse("132826197713205030这,身份证号码是13072219850628005xx");

System.out.println(result.recognition(new IDCardRecognition()));
System.out.println(result2.recognition(new IDCardRecognition()));
}

}
20 changes: 20 additions & 0 deletions src/test/java/org/ansj/recognition/impl/IPRecognitionTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.ansj.recognition.impl;

import org.ansj.domain.Result;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.junit.Assert;
import org.junit.Test;

public class IPRecognitionTest {

@Test
public void recognition() throws Exception {

Result recognition = null ;
recognition = ToAnalysis.parse("192.168.1.1, 1.1.1.1, 255.254.251.256, 0.0.0.0").recognition(new IPRecognition());

System.out.println(recognition);
Assert.assertEquals(recognition.get(0).getName(), ("192.168.1.1"));
}

}