Skip to content

Commit

Permalink
Merge pull request #250 from serras/master
Browse files Browse the repository at this point in the history
Adding support for String and Regex theory. This theory is provided by the SMT solvers Z3 and CVC4.

For CVC4 we require to enable an additional option which seems to have a slightly negative impact when solving quantifier formulas.
Z3 provides String theory without using additional options.

The support for String and Regex theory is mostly stable and sufficient for further experiments.
  • Loading branch information
kfriedberger committed Nov 27, 2021
2 parents ce41852 + 284eb40 commit 354b8b9
Show file tree
Hide file tree
Showing 51 changed files with 3,793 additions and 44 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@ JavaSMT can express formulas in the following theories:
- Integer
- Rational
- Bitvector
- Floating point
- Floating Point
- Array
- Uninterpreted Function
- String and RegEx

The concrete support for a certain theory depends on the underlying SMT solver.
Only a few SMT solvers provide support for theories like Arrays, Floating Point, String or RegEx.

Currently JavaSMT supports several SMT solvers (see [Getting Started](doc/Getting-started.md) for installation):

Expand Down
7 changes: 7 additions & 0 deletions src/org/sosy_lab/java_smt/api/FormulaManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ public interface FormulaManager {
*/
QuantifiedFormulaManager getQuantifiedFormulaManager();

/**
* Returns the String Theory.
*
* @throws UnsupportedOperationException If the theory is not supported by the solver.
*/
StringFormulaManager getStringFormulaManager();

/**
* Create variable of the type equal to {@code formulaType}.
*
Expand Down
50 changes: 50 additions & 0 deletions src/org/sosy_lab/java_smt/api/FormulaType.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ public boolean isSLType() {
return false;
}

public boolean isStringType() {
return false;
}

public boolean isRegexType() {
return false;
}

@Override
public abstract String toString();

Expand Down Expand Up @@ -344,6 +352,44 @@ public String toSMTLIBString() {
}
}

public static final FormulaType<StringFormula> StringType =
new FormulaType<>() {

@Override
public boolean isStringType() {
return true;
}

@Override
public String toString() {
return "String";
}

@Override
public String toSMTLIBString() {
return "String";
}
};

public static final FormulaType<BooleanFormula> RegexType =
new FormulaType<>() {

@Override
public boolean isRegexType() {
return true;
}

@Override
public String toString() {
return "RegLan";
}

@Override
public String toSMTLIBString() {
return "RegLan";
}
};

/**
* Parse a string and return the corresponding type. This method is the counterpart of {@link
* #toString()}.
Expand All @@ -355,6 +401,10 @@ public static FormulaType<?> fromString(String t) {
return IntegerType;
} else if (RationalType.toString().equals(t)) {
return RationalType;
} else if (StringType.toString().equals(t)) {
return StringType;
} else if (RegexType.toString().equals(t)) {
return RegexType;
} else if (FloatingPointRoundingModeType.toString().equals(t)) {
return FloatingPointRoundingModeType;
} else if (t.startsWith("FloatingPoint<")) {
Expand Down
28 changes: 28 additions & 0 deletions src/org/sosy_lab/java_smt/api/FunctionDeclarationKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,34 @@ public enum FunctionDeclarationKind {
FP_AS_IEEEBV,
FP_FROM_IEEEBV,

// String and Regex theory

STR_CONCAT,
STR_PREFIX,
STR_SUFFIX,
STR_CONTAINS,
STR_SUBSTRING,
STR_REPLACE,
STR_REPLACE_ALL,
STR_CHAR_AT,
STR_LENGTH,
STR_INDEX_OF,
STR_TO_RE,
STR_IN_RE,
STR_TO_INT,
INT_TO_STR,
STR_LT,
STR_LE,
RE_PLUS,
RE_STAR,
RE_OPTIONAL,
RE_CONCAT,
RE_UNION,
RE_RANGE,
RE_INTERSECT,
RE_COMPLEMENT,
RE_DIFFERENCE,

// default case

/**
Expand Down
8 changes: 8 additions & 0 deletions src/org/sosy_lab/java_smt/api/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ public interface Model extends Iterable<ValueAssignment>, AutoCloseable {
@Nullable
BigInteger evaluate(BitvectorFormula f);

/**
* Type-safe evaluation for string formulas.
*
* <p>The formula does not need to be a variable, we also allow complex expression.
*/
@Nullable
String evaluate(StringFormula f);

/**
* Iterate over all values present in the model. Note that iterating multiple times may be
* inefficient for some solvers, it is recommended to use {@link
Expand Down
15 changes: 15 additions & 0 deletions src/org/sosy_lab/java_smt/api/RegexFormula.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// This file is part of JavaSMT,
// an API wrapper for a collection of SMT solvers:
// https://github.com/sosy-lab/java-smt
//
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
//
// SPDX-License-Identifier: Apache-2.0

package org.sosy_lab.java_smt.api;

import com.google.errorprone.annotations.Immutable;

/** A formula of the string sort. */
@Immutable
public interface RegexFormula extends Formula {}
15 changes: 15 additions & 0 deletions src/org/sosy_lab/java_smt/api/StringFormula.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// This file is part of JavaSMT,
// an API wrapper for a collection of SMT solvers:
// https://github.com/sosy-lab/java-smt
//
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
//
// SPDX-License-Identifier: Apache-2.0

package org.sosy_lab.java_smt.api;

import com.google.errorprone.annotations.Immutable;

/** A formula of the string sort. */
@Immutable
public interface StringFormula extends Formula {}
190 changes: 190 additions & 0 deletions src/org/sosy_lab/java_smt/api/StringFormulaManager.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// This file is part of JavaSMT,
// an API wrapper for a collection of SMT solvers:
// https://github.com/sosy-lab/java-smt
//
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
//
// SPDX-License-Identifier: Apache-2.0

package org.sosy_lab.java_smt.api;

import java.util.Arrays;
import java.util.List;
import org.sosy_lab.java_smt.api.NumeralFormula.IntegerFormula;

/**
* Manager for dealing with string formulas. Functions come from
* http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml.
*/
public interface StringFormulaManager {

/**
* Returns a {@link StringFormula} representing the given constant.
*
* @param value the string value the returned <code>Formula</code> should represent
* @return a Formula representing the given value
*/
StringFormula makeString(String value);

/**
* Creates a variable of type String with exactly the given name.
*
* <p>This variable (symbol) represents a "String" for which the SMT solver needs to find a model.
*
* <p>Please make sure that the given name is valid in SMT-LIB2. Take a look at {@link
* FormulaManager#isValidName} for further information.
*
* <p>This method does not quote or unquote the given name, but uses the plain name "AS IS".
* {@link Formula#toString} can return a different String than the given one.
*/
StringFormula makeVariable(String pVar);

// TODO: There is currently no way to use variables of type "Regex", i.e., that represent full
// regular expression for which the SMT solver need to find a model.
// The reason for this is that the current SMT solvers do not support this feature.
// However, we can build a RegexFormula from basic parts like Strings (including
// variables of type String) and operations like range, union, or complement.

BooleanFormula equal(StringFormula str1, StringFormula str2);

BooleanFormula greaterThan(StringFormula str1, StringFormula str2);

BooleanFormula greaterOrEquals(StringFormula str1, StringFormula str2);

BooleanFormula lessThan(StringFormula str1, StringFormula str2);

BooleanFormula lessOrEquals(StringFormula str1, StringFormula str2);

/** Check whether the given prefix is a real prefix of str. */
BooleanFormula prefix(StringFormula prefix, StringFormula str);

/** Check whether the given suffix is a real suffix of str. */
BooleanFormula suffix(StringFormula suffix, StringFormula str);

BooleanFormula contains(StringFormula str, StringFormula part);

/**
* Get the first index for a substring in a String, or -1 if the substring is not found.
* startIndex (inlcuded) denotes the start of the search for the index.
*/
IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex);

/**
* Get a substring of length 1 from the given String.
*
* <p>The result is underspecified, if the index is out of bounds for the given String.
*/
StringFormula charAt(StringFormula str, IntegerFormula index);

/**
* Get a substring from the given String.
*
* <p>The result is underspecified, if the start index is out of bounds for the given String or if
* the requested length is negative. The length of the result is the minimum of the requested
* length and the remaining length of the given String.
*/
StringFormula substring(StringFormula str, IntegerFormula index, IntegerFormula length);

/** Replace the first appearances of target in fullStr with the replacement. */
StringFormula replace(StringFormula fullStr, StringFormula target, StringFormula replacement);

/** Replace all appearances of target in fullStr with the replacement. */
StringFormula replaceAll(StringFormula fullStr, StringFormula target, StringFormula replacement);

IntegerFormula length(StringFormula str);

default StringFormula concat(StringFormula... parts) {
return concat(Arrays.asList(parts));
}

StringFormula concat(List<StringFormula> parts);

/**
* @param str formula representing the string to match
* @param regex formula representing the regular expression
* @return a formula representing the acceptance of the string by the regular expression
*/
BooleanFormula in(StringFormula str, RegexFormula regex);

/**
* Returns a {@link RegexFormula} representing the given constant.
*
* <p>This method does not parse an existing regex from String, but uses the String directly as a
* constant.
*
* @param value the regular expression the returned <code>Formula</code> should represent
*/
RegexFormula makeRegex(String value);

// basic regex operations

/** @return formula denoting the empty set of strings */
RegexFormula none();

/** @return formula denoting the set of all strings, also known as Regex <code>".*"</code>. */
RegexFormula all();

/** @return formula denoting the range regular expression over two sequences of length 1. */
RegexFormula range(StringFormula start, StringFormula end);

/**
* @return formula denoting the range regular expression over two chars.
* @see #range(StringFormula, StringFormula)
*/
default RegexFormula range(char start, char end) {
return range(makeString(String.valueOf(start)), makeString(String.valueOf(end)));
}

/** @return formula denoting the concatenation */
default RegexFormula concat(RegexFormula... parts) {
return concatRegex(Arrays.asList(parts));
}

/** @return formula denoting the concatenation */
// TODO the naming of this function collides with #concat(List<StringFormula>).
// Maybe we should split String and Regex manager.
RegexFormula concatRegex(List<RegexFormula> parts);

/** @return formula denoting the union */
RegexFormula union(RegexFormula regex1, RegexFormula regex2);

/** @return formula denoting the intersection */
RegexFormula intersection(RegexFormula regex1, RegexFormula regex2);

/** @return formula denoting the Kleene closure */
RegexFormula complement(RegexFormula regex);

/** @return formula denoting the Kleene closure (0 or more), also known as STAR operand. */
RegexFormula closure(RegexFormula regex);

// derived regex operations

/** @return formula denoting the difference */
RegexFormula difference(RegexFormula regex1, RegexFormula regex2);

/** @return formula denoting the Kleene cross (1 or more), also known as PLUS operand. */
RegexFormula cross(RegexFormula regex);

/** @return formula denoting the optionality, also known as QUESTIONMARK operand. */
RegexFormula optional(RegexFormula regex);

/** @return formula denoting the concatenation n times */
RegexFormula times(RegexFormula regex, int repetitions);

/**
* Interpret a String formula as an Integer formula.
*
* <p>The number is interpreted in base 10 and has no leading zeros. The method works as expected
* for positive numbers, including zero. It returns the constant value of <code>-1</code> for
* negative numbers or invalid number representations, for example if any char is no digit.
*/
IntegerFormula toIntegerFormula(StringFormula str);

/**
* Interpret an Integer formula as a String formula.
*
* <p>The number is in base 10. The method works as expected for positive numbers, including zero.
* It returns the empty string <code>""</code> for negative numbers.
*/
StringFormula toStringFormula(IntegerFormula number);
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ protected final TType toSolverType(FormulaType<?> formulaType) {
t = getFormulaCreator().getIntegerType();
} else if (formulaType.isRationalType()) {
t = getFormulaCreator().getRationalType();
} else if (formulaType.isStringType()) {
t = getFormulaCreator().getStringType();
} else if (formulaType.isBitvectorType()) {
FormulaType.BitvectorType bitPreciseType = (FormulaType.BitvectorType) formulaType;
t = getFormulaCreator().getBitvectorType(bitPreciseType.getSize());
Expand Down
Loading

0 comments on commit 354b8b9

Please sign in to comment.