Skip to content

Commit

Permalink
compile --rule-file pattern only once / extracted regular expressio…
Browse files Browse the repository at this point in the history
…ns code to separate file [skip ci]
  • Loading branch information
firewave committed Jul 2, 2024
1 parent e9d3d8d commit 1fb855b
Show file tree
Hide file tree
Showing 10 changed files with 549 additions and 380 deletions.
250 changes: 127 additions & 123 deletions Makefile

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions cli/cmdlineparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,13 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
return Result::Fail;
}

auto regex = std::make_shared<Regex>(rule.pattern);
const std::string regex_err = regex->compile();
if (!regex_err.empty()) {
mLogger.printError("failed to compile rule pattern '" + rule.pattern + "' (" + regex_err + ").");
return Result::Fail;
}
rule.regex = std::move(regex);
mSettings.rules.emplace_back(std::move(rule));
#else
mLogger.printError("Option --rule cannot be used as Cppcheck has not been built with rules support.");
Expand Down Expand Up @@ -1178,6 +1185,14 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
return Result::Fail;
}

auto regex = std::make_shared<Regex>(rule.pattern);
const std::string regex_err = regex->compile();
if (!regex_err.empty()) {
mLogger.printError("unable to load rule-file '" + ruleFile + "' - pattern '" + rule.pattern + "' failed to compile (" + regex_err + ").");
return Result::Fail;
}
rule.regex = std::move(regex);

if (rule.severity == Severity::none) {
mLogger.printError("unable to load rule-file '" + ruleFile + "' - a rule has an invalid severity.");
return Result::Fail;
Expand Down
224 changes: 14 additions & 210 deletions lib/cppcheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,6 @@

#include "xml.h"

#ifdef HAVE_RULES
#ifdef _WIN32
#define PCRE_STATIC
#endif
#include <pcre.h>
#endif

class SymbolDatabase;

static constexpr char Version[] = CPPCHECK_VERSION_STRING;
Expand Down Expand Up @@ -1121,135 +1114,6 @@ bool CppCheck::hasRule(const std::string &tokenlist) const
});
}

static const char * pcreErrorCodeToString(const int pcreExecRet)
{
switch (pcreExecRet) {
case PCRE_ERROR_NULL:
return "Either code or subject was passed as NULL, or ovector was NULL "
"and ovecsize was not zero (PCRE_ERROR_NULL)";
case PCRE_ERROR_BADOPTION:
return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)";
case PCRE_ERROR_BADMAGIC:
return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, "
"to catch the case when it is passed a junk pointer and to detect when a "
"pattern that was compiled in an environment of one endianness is run in "
"an environment with the other endianness. This is the error that PCRE "
"gives when the magic number is not present (PCRE_ERROR_BADMAGIC)";
case PCRE_ERROR_UNKNOWN_NODE:
return "While running the pattern match, an unknown item was encountered in the "
"compiled pattern. This error could be caused by a bug in PCRE or by "
"overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)";
case PCRE_ERROR_NOMEMORY:
return "If a pattern contains back references, but the ovector that is passed "
"to pcre_exec() is not big enough to remember the referenced substrings, "
"PCRE gets a block of memory at the start of matching to use for this purpose. "
"If the call via pcre_malloc() fails, this error is given. The memory is "
"automatically freed at the end of matching. This error is also given if "
"pcre_stack_malloc() fails in pcre_exec(). "
"This can happen only when PCRE has been compiled with "
"--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)";
case PCRE_ERROR_NOSUBSTRING:
return "This error is used by the pcre_copy_substring(), pcre_get_substring(), "
"and pcre_get_substring_list() functions (see below). "
"It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)";
case PCRE_ERROR_MATCHLIMIT:
return "The backtracking limit, as specified by the match_limit field in a pcre_extra "
"structure (or defaulted) was reached. "
"See the description above (PCRE_ERROR_MATCHLIMIT)";
case PCRE_ERROR_CALLOUT:
return "This error is never generated by pcre_exec() itself. "
"It is provided for use by callout functions that want to yield a distinctive "
"error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)";
case PCRE_ERROR_BADUTF8:
return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, "
"and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector "
"(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 "
"character is placed in the first element, and a reason code is placed in the "
"second element. The reason codes are listed in the following section. For "
"backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated "
"UTF-8 character at the end of the subject (reason codes 1 to 5), "
"PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8";
case PCRE_ERROR_BADUTF8_OFFSET:
return "The UTF-8 byte sequence that was passed as a subject was checked and found to "
"be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of "
"startoffset did not point to the beginning of a UTF-8 character or the end of "
"the subject (PCRE_ERROR_BADUTF8_OFFSET)";
case PCRE_ERROR_PARTIAL:
return "The subject string did not match, but it did match partially. See the "
"pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)";
case PCRE_ERROR_BADPARTIAL:
return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL "
"option was used with a compiled pattern containing items that were not supported "
"for partial matching. From release 8.00 onwards, there are no restrictions on "
"partial matching (PCRE_ERROR_BADPARTIAL)";
case PCRE_ERROR_INTERNAL:
return "An unexpected internal error has occurred. This error could be caused by a bug "
"in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)";
case PCRE_ERROR_BADCOUNT:
return "This error is given if the value of the ovecsize argument is negative "
"(PCRE_ERROR_BADCOUNT)";
case PCRE_ERROR_RECURSIONLIMIT:
return "The internal recursion limit, as specified by the match_limit_recursion "
"field in a pcre_extra structure (or defaulted) was reached. "
"See the description above (PCRE_ERROR_RECURSIONLIMIT)";
case PCRE_ERROR_DFA_UITEM:
return "PCRE_ERROR_DFA_UITEM";
case PCRE_ERROR_DFA_UCOND:
return "PCRE_ERROR_DFA_UCOND";
case PCRE_ERROR_DFA_WSSIZE:
return "PCRE_ERROR_DFA_WSSIZE";
case PCRE_ERROR_DFA_RECURSE:
return "PCRE_ERROR_DFA_RECURSE";
case PCRE_ERROR_NULLWSLIMIT:
return "PCRE_ERROR_NULLWSLIMIT";
case PCRE_ERROR_BADNEWLINE:
return "An invalid combination of PCRE_NEWLINE_xxx options was "
"given (PCRE_ERROR_BADNEWLINE)";
case PCRE_ERROR_BADOFFSET:
return "The value of startoffset was negative or greater than the length "
"of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)";
case PCRE_ERROR_SHORTUTF8:
return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject "
"string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. "
"Information about the failure is returned as for PCRE_ERROR_BADUTF8. "
"It is in fact sufficient to detect this case, but this special error code for "
"PCRE_PARTIAL_HARD precedes the implementation of returned information; "
"it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)";
case PCRE_ERROR_RECURSELOOP:
return "This error is returned when pcre_exec() detects a recursion loop "
"within the pattern. Specifically, it means that either the whole pattern "
"or a subpattern has been called recursively for the second time at the same "
"position in the subject string. Some simple patterns that might do this "
"are detected and faulted at compile time, but more complicated cases, "
"in particular mutual recursions between two different subpatterns, "
"cannot be detected until run time (PCRE_ERROR_RECURSELOOP)";
case PCRE_ERROR_JIT_STACKLIMIT:
return "This error is returned when a pattern that was successfully studied "
"using a JIT compile option is being matched, but the memory available "
"for the just-in-time processing stack is not large enough. See the pcrejit "
"documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)";
case PCRE_ERROR_BADMODE:
return "This error is given if a pattern that was compiled by the 8-bit library "
"is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)";
case PCRE_ERROR_BADENDIANNESS:
return "This error is given if a pattern that was compiled and saved is reloaded on a "
"host with different endianness. The utility function pcre_pattern_to_host_byte_order() "
"can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)";
case PCRE_ERROR_DFA_BADRESTART:
return "PCRE_ERROR_DFA_BADRESTART";
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
case PCRE_ERROR_BADLENGTH:
return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)";
case PCRE_ERROR_JIT_BADOPTION:
return "This error is returned when a pattern that was successfully studied using a JIT compile "
"option is being matched, but the matching mode (partial or complete match) does not correspond "
"to any JIT compilation mode. When the JIT fast path function is used, this error may be "
"also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)";
#endif
}
return "";
}

void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
{
// There is no rule to execute
Expand All @@ -1271,73 +1135,7 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
reportOut("Processing rule: " + rule.pattern, Color::FgGreen);
}

const char *pcreCompileErrorStr = nullptr;
int erroffset = 0;
pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr);
if (!re) {
if (pcreCompileErrorStr) {
const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr);
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
emptyString,
Severity::error,
msg,
"pcre_compile",
Certainty::normal);

reportErr(errmsg);
}
continue;
}

// Optimize the regex, but only if PCRE_CONFIG_JIT is available
#ifdef PCRE_CONFIG_JIT
const char *pcreStudyErrorStr = nullptr;
pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr);
// pcre_study() returns NULL for both errors and when it can not optimize the regex.
// The last argument is how one checks for errors.
// It is NULL if everything works, and points to an error string otherwise.
if (pcreStudyErrorStr) {
const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr);
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
emptyString,
Severity::error,
msg,
"pcre_study",
Certainty::normal);

reportErr(errmsg);
// pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile().
pcre_free(re);
continue;
}
#else
const pcre_extra * const pcreExtra = nullptr;
#endif

int pos = 0;
int ovector[30]= {0};
while (pos < (int)str.size()) {
const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), (int)str.size(), pos, 0, ovector, 30);
if (pcreExecRet < 0) {
const std::string errorMessage = pcreErrorCodeToString(pcreExecRet);
if (!errorMessage.empty()) {
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
emptyString,
Severity::error,
std::string("pcre_exec failed: ") + errorMessage,
"pcre_exec",
Certainty::normal);

reportErr(errmsg);
}
break;
}
const auto pos1 = (unsigned int)ovector[0];
const auto pos2 = (unsigned int)ovector[1];

// jump to the end of the match for the next pcre_exec
pos = (int)pos2;

auto f = [&](int pos1, int pos2) {
// determine location..
int fileIndex = 0;
int line = 0;
Expand Down Expand Up @@ -1366,15 +1164,21 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)

// Report error
reportErr(errmsg);
}
};

assert(rule.regex);

pcre_free(re);
#ifdef PCRE_CONFIG_JIT
// Free up the EXTRA PCRE value (may be NULL at this point)
if (pcreExtra) {
pcre_free_study(pcreExtra);
const std::string err = rule.regex->match(str, f);
if (!err.empty()) {
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
emptyString,
Severity::error,
err,
"pcre_exec",
Certainty::normal);

reportErr(errmsg);
}
#endif
}
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions lib/cppcheck.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
<ClCompile Include="platform.cpp" />
<ClCompile Include="preprocessor.cpp" />
<ClCompile Include="programmemory.cpp" />
<ClCompile Include="regex.cpp" />
<ClCompile Include="reverseanalyzer.cpp" />
<ClCompile Include="settings.cpp" />
<ClCompile Include="summaries.cpp" />
Expand Down Expand Up @@ -153,6 +154,7 @@
<ClInclude Include="precompiled.h" />
<ClInclude Include="preprocessor.h" />
<ClInclude Include="programmemory.h" />
<ClInclude Include="regex.h" />
<ClInclude Include="reverseanalyzer.h" />
<ClInclude Include="settings.h" />
<ClInclude Include="smallvector.h" />
Expand Down
2 changes: 2 additions & 0 deletions lib/lib.pri
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ HEADERS += $${PWD}/addoninfo.h \
$${PWD}/precompiled.h \
$${PWD}/preprocessor.h \
$${PWD}/programmemory.h \
$${PWD}/regex.h \
$${PWD}/reverseanalyzer.h \
$${PWD}/settings.h \
$${PWD}/smallvector.h \
Expand Down Expand Up @@ -138,6 +139,7 @@ SOURCES += $${PWD}/valueflow.cpp \
$${PWD}/platform.cpp \
$${PWD}/preprocessor.cpp \
$${PWD}/programmemory.cpp \
$${PWD}/regex.cpp \
$${PWD}/reverseanalyzer.cpp \
$${PWD}/settings.cpp \
$${PWD}/summaries.cpp \
Expand Down
Loading

0 comments on commit 1fb855b

Please sign in to comment.