From 19b58438c9403c61a8d9360eb294aa7a49d36ec8 Mon Sep 17 00:00:00 2001 From: Sylvain Kuchen Date: Wed, 14 Sep 2022 14:36:03 +0200 Subject: [PATCH 1/6] Add allChar function, fix #281 This function corresponds to `re.allchar` in http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml. It is implemented in the Z3 and CVC4 solvers. --- src/org/sosy_lab/java_smt/api/StringFormulaManager.java | 6 ++++++ .../java_smt/basicimpl/AbstractStringFormulaManager.java | 7 +++++++ .../statistics/StatisticsStringFormulaManager.java | 6 ++++++ .../synchronize/SynchronizedStringFormulaManager.java | 7 +++++++ .../java_smt/solvers/cvc4/CVC4StringFormulaManager.java | 5 +++++ .../java_smt/solvers/z3/Z3StringFormulaManager.java | 5 +++++ .../sosy_lab/java_smt/test/StringFormulaManagerTest.java | 8 ++++++++ 7 files changed, 44 insertions(+) diff --git a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java index 4ba3235f86..064964af86 100644 --- a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java @@ -128,6 +128,12 @@ default StringFormula concat(StringFormula... parts) { */ RegexFormula all(); + /** + * @return formula denoting the set of all strings of length 1, also known as DOT operator which + * represents an arbitrary char. . + */ + RegexFormula allChar(); + /** * @return formula denoting the range regular expression over two sequences of length 1. */ diff --git a/src/org/sosy_lab/java_smt/basicimpl/AbstractStringFormulaManager.java b/src/org/sosy_lab/java_smt/basicimpl/AbstractStringFormulaManager.java index 12efe42c95..2b3b87915b 100644 --- a/src/org/sosy_lab/java_smt/basicimpl/AbstractStringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/basicimpl/AbstractStringFormulaManager.java @@ -205,6 +205,13 @@ public RegexFormula all() { protected abstract TFormulaInfo allImpl(); + @Override + public RegexFormula allChar() { + return wrapRegex(allCharImpl()); + } + + protected abstract TFormulaInfo allCharImpl(); + @Override public RegexFormula range(StringFormula start, StringFormula end) { return wrapRegex(range(extractInfo(start), extractInfo(end))); diff --git a/src/org/sosy_lab/java_smt/delegate/statistics/StatisticsStringFormulaManager.java b/src/org/sosy_lab/java_smt/delegate/statistics/StatisticsStringFormulaManager.java index b47a38a72d..3d2032abb6 100644 --- a/src/org/sosy_lab/java_smt/delegate/statistics/StatisticsStringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/delegate/statistics/StatisticsStringFormulaManager.java @@ -156,6 +156,12 @@ public RegexFormula all() { return delegate.all(); } + @Override + public RegexFormula allChar() { + stats.stringOperations.getAndIncrement(); + return delegate.allChar(); + } + @Override public RegexFormula range(StringFormula start, StringFormula end) { stats.stringOperations.getAndIncrement(); diff --git a/src/org/sosy_lab/java_smt/delegate/synchronize/SynchronizedStringFormulaManager.java b/src/org/sosy_lab/java_smt/delegate/synchronize/SynchronizedStringFormulaManager.java index 5c3a0b807a..c41e991eab 100644 --- a/src/org/sosy_lab/java_smt/delegate/synchronize/SynchronizedStringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/delegate/synchronize/SynchronizedStringFormulaManager.java @@ -178,6 +178,13 @@ public RegexFormula all() { } } + @Override + public RegexFormula allChar() { + synchronized (sync) { + return delegate.allChar(); + } + } + @Override public RegexFormula range(StringFormula start, StringFormula end) { synchronized (sync) { diff --git a/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java b/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java index c9cb49a46f..8ac9aa6136 100644 --- a/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java @@ -138,6 +138,11 @@ protected Expr allImpl() { return exprManager.mkExpr(Kind.REGEXP_COMPLEMENT, noneImpl()); } + @Override + protected Expr allCharImpl() { + return exprManager.mkExpr(Kind.REGEXP_SIGMA, noneImpl()); + } + @Override protected Expr range(Expr start, Expr end) { return exprManager.mkExpr(Kind.REGEXP_RANGE, start, end); diff --git a/src/org/sosy_lab/java_smt/solvers/z3/Z3StringFormulaManager.java b/src/org/sosy_lab/java_smt/solvers/z3/Z3StringFormulaManager.java index 4793e6f95f..61765ea531 100644 --- a/src/org/sosy_lab/java_smt/solvers/z3/Z3StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/solvers/z3/Z3StringFormulaManager.java @@ -131,6 +131,11 @@ protected Long allImpl() { return Native.mkReFull(z3context, formulaCreator.getRegexType()); } + @Override + protected Long allCharImpl() { + return Native.mkReAllchar(z3context, formulaCreator.getRegexType()); + } + @Override protected Long range(Long start, Long end) { return Native.mkReRange(z3context, start, end); diff --git a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java index b396059949..af502c3c93 100644 --- a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java +++ b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java @@ -128,6 +128,14 @@ public void testRegexAll3() throws SolverException, InterruptedException { assertThatFormula(smgr.in(smgr.makeString(".*"), regex)).isSatisfiable(); } + @Test + public void testRegexAllChar() throws SolverException, InterruptedException { + RegexFormula regexAllChar = smgr.allChar(); + RegexFormula regexDot = smgr.makeRegex("."); + assertThatFormula(smgr.in(smgr.makeString("a"), regexAllChar)).isSatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("a"), regexDot)).isUnsatisfiable(); + } + @Test public void testStringRegex2() throws SolverException, InterruptedException { RegexFormula regex = smgr.concat(smgr.closure(a2z), smgr.makeRegex("ll"), smgr.closure(a2z)); From 9e00bd297a068d3996486d662aa766a6cc072de7 Mon Sep 17 00:00:00 2001 From: Sylvain Kuchen Date: Thu, 15 Sep 2022 11:18:50 +0200 Subject: [PATCH 2/6] Add more test cases for allChar Add one test for unicode support. Z3 supports Unicode (https://microsoft.github.io/z3guide/docs/theories/Strings), but CVC4 does not (see http://cvc4.cs.stanford.edu/wiki/Strings, under limitations). --- .../test/StringFormulaManagerTest.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java index af502c3c93..11c6b9f9d0 100644 --- a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java +++ b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java @@ -134,6 +134,31 @@ public void testRegexAllChar() throws SolverException, InterruptedException { RegexFormula regexDot = smgr.makeRegex("."); assertThatFormula(smgr.in(smgr.makeString("a"), regexAllChar)).isSatisfiable(); assertThatFormula(smgr.in(smgr.makeString("a"), regexDot)).isUnsatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("ab"), regexAllChar)).isUnsatisfiable(); + assertThatFormula(smgr.in(smgr.makeString(""), regexAllChar)).isUnsatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("ab"), smgr.times(regexAllChar, 2))).isSatisfiable(); + assertThatFormula(smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', '9'), + regexAllChar))).isSatisfiable(); + } + + @Test + public void testRegexAllCharUnicode() throws SolverException, InterruptedException { + RegexFormula regexAllChar = smgr.allChar(); + if (solverUnderTest == Solvers.Z3) { + // Z3 supports Unicode characters in the theory of strings. + assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isSatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("\\u{1fa6a}"), regexAllChar)).isSatisfiable(); + assertThatFormula(smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', 'Δ'), + regexAllChar))).isSatisfiable(); + // Combining characters are not matched as one character. + assertThatFormula(smgr.in(smgr.makeString("a\\u0336"), regexAllChar)).isUnsatisfiable(); + // Non-ascii non-printable characters should use the codepoint representation + assertThatFormula(smgr.in(smgr.makeString("Δ"), regexAllChar)).isUnsatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("\\n"), regexAllChar)).isUnsatisfiable(); + } else if (solverUnderTest == Solvers.CVC4){ + // CVC4 does not support Unicode characters + assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isUnsatisfiable(); + } } @Test From 6c6c4004076cd1ba8d65034516a4b03ef87a6727 Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Thu, 15 Sep 2022 22:09:32 +0200 Subject: [PATCH 3/6] smaller cleanup and documentation. --- .../java_smt/api/StringFormulaManager.java | 8 +++-- .../test/StringFormulaManagerTest.java | 29 ++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java index 064964af86..c53cee7969 100644 --- a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java @@ -124,13 +124,17 @@ default StringFormula concat(StringFormula... parts) { RegexFormula none(); /** + * Note: The size of the used alphabet depends on the underlying SMT solver. + * * @return formula denoting the set of all strings, also known as Regex ".*". */ RegexFormula all(); - /** + /** + * Note: The size of the used alphabet depends on the underlying SMT solver. + * * @return formula denoting the set of all strings of length 1, also known as DOT operator which - * represents an arbitrary char. . + * represents one arbitrary char, or as Regex ".". */ RegexFormula allChar(); diff --git a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java index 11c6b9f9d0..4c6b9eea33 100644 --- a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java +++ b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java @@ -131,33 +131,40 @@ public void testRegexAll3() throws SolverException, InterruptedException { @Test public void testRegexAllChar() throws SolverException, InterruptedException { RegexFormula regexAllChar = smgr.allChar(); - RegexFormula regexDot = smgr.makeRegex("."); + assertThatFormula(smgr.in(smgr.makeString("a"), regexAllChar)).isSatisfiable(); - assertThatFormula(smgr.in(smgr.makeString("a"), regexDot)).isUnsatisfiable(); assertThatFormula(smgr.in(smgr.makeString("ab"), regexAllChar)).isUnsatisfiable(); assertThatFormula(smgr.in(smgr.makeString(""), regexAllChar)).isUnsatisfiable(); assertThatFormula(smgr.in(smgr.makeString("ab"), smgr.times(regexAllChar, 2))).isSatisfiable(); - assertThatFormula(smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', '9'), - regexAllChar))).isSatisfiable(); + assertThatFormula( + smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', '9'), regexAllChar))) + .isSatisfiable(); + + RegexFormula regexDot = smgr.makeRegex("."); + assertThatFormula(smgr.in(smgr.makeString("a"), regexDot)).isUnsatisfiable(); } @Test public void testRegexAllCharUnicode() throws SolverException, InterruptedException { RegexFormula regexAllChar = smgr.allChar(); - if (solverUnderTest == Solvers.Z3) { - // Z3 supports Unicode characters in the theory of strings. + + if (solverUnderTest == Solvers.CVC4) { + // CVC4 does not support Unicode characters + assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isUnsatisfiable(); + + } else { + // Z3 and other solvers support Unicode characters in the theory of strings. assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isSatisfiable(); assertThatFormula(smgr.in(smgr.makeString("\\u{1fa6a}"), regexAllChar)).isSatisfiable(); - assertThatFormula(smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', 'Δ'), - regexAllChar))).isSatisfiable(); + assertThatFormula( + smgr.in( + smgr.makeVariable("x"), smgr.intersection(smgr.range('a', 'Δ'), regexAllChar))) + .isSatisfiable(); // Combining characters are not matched as one character. assertThatFormula(smgr.in(smgr.makeString("a\\u0336"), regexAllChar)).isUnsatisfiable(); // Non-ascii non-printable characters should use the codepoint representation assertThatFormula(smgr.in(smgr.makeString("Δ"), regexAllChar)).isUnsatisfiable(); assertThatFormula(smgr.in(smgr.makeString("\\n"), regexAllChar)).isUnsatisfiable(); - } else if (solverUnderTest == Solvers.CVC4){ - // CVC4 does not support Unicode characters - assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isUnsatisfiable(); } } From 79aae826cfb92f7b6fe4e1c2366689c5a1aa0993 Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Thu, 15 Sep 2022 23:44:09 +0200 Subject: [PATCH 4/6] add precondition for regex-range. and fix the corresponding test. --- src/org/sosy_lab/java_smt/api/StringFormulaManager.java | 8 ++++++++ .../sosy_lab/java_smt/test/StringFormulaManagerTest.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java index c53cee7969..ae25341148 100644 --- a/src/org/sosy_lab/java_smt/api/StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/api/StringFormulaManager.java @@ -8,6 +8,7 @@ package org.sosy_lab.java_smt.api; +import com.google.common.base.Preconditions; import java.util.Arrays; import java.util.List; import org.sosy_lab.java_smt.api.NumeralFormula.IntegerFormula; @@ -148,6 +149,13 @@ default StringFormula concat(StringFormula... parts) { * @see #range(StringFormula, StringFormula) */ default RegexFormula range(char start, char end) { + Preconditions.checkArgument( + start <= end, + "Range from start '%s' (%s) to end '%s' (%s) is empty.", + start, + (int) start, + end, + (int) end); return range(makeString(String.valueOf(start)), makeString(String.valueOf(end))); } diff --git a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java index 4c6b9eea33..07e1977027 100644 --- a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java +++ b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java @@ -137,7 +137,7 @@ public void testRegexAllChar() throws SolverException, InterruptedException { assertThatFormula(smgr.in(smgr.makeString(""), regexAllChar)).isUnsatisfiable(); assertThatFormula(smgr.in(smgr.makeString("ab"), smgr.times(regexAllChar, 2))).isSatisfiable(); assertThatFormula( - smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('a', '9'), regexAllChar))) + smgr.in(smgr.makeVariable("x"), smgr.intersection(smgr.range('9', 'a'), regexAllChar))) .isSatisfiable(); RegexFormula regexDot = smgr.makeRegex("."); From 0c32514a47023314c2a6e2004bbfebdbf952a012 Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Thu, 15 Sep 2022 23:44:09 +0200 Subject: [PATCH 5/6] fix allChar for CVC4. --- .../java_smt/solvers/cvc4/CVC4StringFormulaManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java b/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java index 8ac9aa6136..5701157f3f 100644 --- a/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java +++ b/src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java @@ -140,7 +140,7 @@ protected Expr allImpl() { @Override protected Expr allCharImpl() { - return exprManager.mkExpr(Kind.REGEXP_SIGMA, noneImpl()); + return exprManager.mkExpr(Kind.REGEXP_SIGMA, new vectorExpr()); } @Override From ae3a80b9d36e84b6dfa1cde5fdb3ac421cc88159 Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Thu, 15 Sep 2022 23:44:09 +0200 Subject: [PATCH 6/6] improve unit test for allChar. --- .../java_smt/test/StringFormulaManagerTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java index 07e1977027..6a0f946ae6 100644 --- a/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java +++ b/src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java @@ -148,23 +148,24 @@ public void testRegexAllChar() throws SolverException, InterruptedException { public void testRegexAllCharUnicode() throws SolverException, InterruptedException { RegexFormula regexAllChar = smgr.allChar(); - if (solverUnderTest == Solvers.CVC4) { - // CVC4 does not support Unicode characters - assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isUnsatisfiable(); + // Single characters. + assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isSatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("\\u{1fa6a}"), regexAllChar)).isSatisfiable(); - } else { + // Combining characters are not matched as one character. + assertThatFormula(smgr.in(smgr.makeString("a\\u0336"), regexAllChar)).isUnsatisfiable(); + assertThatFormula(smgr.in(smgr.makeString("\\n"), regexAllChar)).isUnsatisfiable(); + + if (solverUnderTest != Solvers.CVC4) { + // CVC4 does not support Unicode characters. // Z3 and other solvers support Unicode characters in the theory of strings. - assertThatFormula(smgr.in(smgr.makeString("\\u0394"), regexAllChar)).isSatisfiable(); - assertThatFormula(smgr.in(smgr.makeString("\\u{1fa6a}"), regexAllChar)).isSatisfiable(); assertThatFormula( smgr.in( smgr.makeVariable("x"), smgr.intersection(smgr.range('a', 'Δ'), regexAllChar))) .isSatisfiable(); // Combining characters are not matched as one character. - assertThatFormula(smgr.in(smgr.makeString("a\\u0336"), regexAllChar)).isUnsatisfiable(); // Non-ascii non-printable characters should use the codepoint representation assertThatFormula(smgr.in(smgr.makeString("Δ"), regexAllChar)).isUnsatisfiable(); - assertThatFormula(smgr.in(smgr.makeString("\\n"), regexAllChar)).isUnsatisfiable(); } }