From b51a4582345c3a7b9e6931db8631f98ef3be14af Mon Sep 17 00:00:00 2001 From: firewave Date: Wed, 27 Mar 2024 21:38:18 +0100 Subject: [PATCH] make it possible to pass char buffer to simplecpp --- externals/simplecpp/simplecpp.cpp | 53 +++++++++++++++++++++++++++++-- externals/simplecpp/simplecpp.h | 4 +++ lib/cppcheck.cpp | 51 +++++++++++++++++++---------- lib/cppcheck.h | 45 +++++++++++++++++++++++--- lib/tokenlist.cpp | 37 +++++++++++++++++++++ lib/tokenlist.h | 17 ++++++++++ oss-fuzz/main.cpp | 8 ++--- 7 files changed, 189 insertions(+), 26 deletions(-) diff --git a/externals/simplecpp/simplecpp.cpp b/externals/simplecpp/simplecpp.cpp index 812600bfbd08..a15fcb9e542b 100755 --- a/externals/simplecpp/simplecpp.cpp +++ b/externals/simplecpp/simplecpp.cpp @@ -377,6 +377,42 @@ class StdIStream : public simplecpp::TokenList::Stream { std::istream &istr; }; +class StdCharBufStream : public simplecpp::TokenList::Stream { +public: + // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members + StdCharBufStream(const unsigned char* str, std::size_t size) + : str(str) + , size(size) + , pos(0) + , lastStatus(0) + { + init(); + } + + virtual int get() OVERRIDE { + if (pos >= size) + return lastStatus = EOF; + return str[pos++]; + } + virtual int peek() OVERRIDE { + if (pos >= size) + return lastStatus = EOF; + return str[pos]; + } + virtual void unget() OVERRIDE { + --pos; + } + virtual bool good() OVERRIDE { + return lastStatus != EOF; + } + +private: + const unsigned char *str; + const std::size_t size; + std::size_t pos; + int lastStatus; +}; + class FileStream : public simplecpp::TokenList::Stream { public: // cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members @@ -442,6 +478,20 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector &fi readfile(stream,filename,outputList); } +simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList) + : frontToken(nullptr), backToken(nullptr), files(filenames) +{ + StdCharBufStream stream(data, size); + readfile(stream,filename,outputList); +} + +simplecpp::TokenList::TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename, OutputList *outputList) + : frontToken(nullptr), backToken(nullptr), files(filenames) +{ + StdCharBufStream stream(reinterpret_cast(data), size); + readfile(stream,filename,outputList); +} + simplecpp::TokenList::TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList) : frontToken(nullptr), backToken(nullptr), files(filenames) { @@ -1447,8 +1497,7 @@ namespace simplecpp { Macro(const std::string &name, const std::string &value, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) { const std::string def(name + ' ' + value); - std::istringstream istr(def); - StdIStream stream(istr); + StdCharBufStream stream(reinterpret_cast(def.data()), def.size()); tokenListDefine.readfile(stream); if (!parseDefine(tokenListDefine.cfront())) throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value); diff --git a/externals/simplecpp/simplecpp.h b/externals/simplecpp/simplecpp.h index c72e4c655cf5..71a54e81e1c1 100755 --- a/externals/simplecpp/simplecpp.h +++ b/externals/simplecpp/simplecpp.h @@ -198,6 +198,10 @@ namespace simplecpp { explicit TokenList(std::vector &filenames); /** generates a token list from the given std::istream parameter */ TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + /** generates a token list from the given buffer */ + TokenList(const unsigned char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + /** generates a token list from the given buffer */ + TokenList(const char* data, std::size_t size, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); /** generates a token list from the given filename parameter */ TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList = nullptr); TokenList(const TokenList &other); diff --git a/lib/cppcheck.cpp b/lib/cppcheck.cpp index 60eb88951d2d..52fb9bf1cff1 100644 --- a/lib/cppcheck.cpp +++ b/lib/cppcheck.cpp @@ -547,10 +547,9 @@ unsigned int CppCheck::check(const std::string &path) return checkFile(Path::simplifyPath(path), emptyString); } -unsigned int CppCheck::check(const std::string &path, const std::string &content) +unsigned int CppCheck::check(const std::string &path, const uint8_t* data, std::size_t size) { - std::istringstream iss(content); - return checkFile(Path::simplifyPath(path), emptyString, &iss); + return checkBuffer(Path::simplifyPath(path), emptyString, data, size); } unsigned int CppCheck::check(const FileSettings &fs) @@ -590,15 +589,41 @@ unsigned int CppCheck::check(const FileSettings &fs) return returnValue; } -static simplecpp::TokenList createTokenList(const std::string& filename, std::vector& files, simplecpp::OutputList* outputList, std::istream* fileStream) +unsigned int CppCheck::checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size) { - if (fileStream) - return {*fileStream, files, filename, outputList}; + return checkInternal(filename, cfgname, + [&filename, data, size](TokenList& list) { + list.createTokens(data, size, filename); + }, + [&filename, data, size](std::vector& files, simplecpp::OutputList* outputList) { + return simplecpp::TokenList{data, size, files, filename, outputList}; + }); +} + +unsigned int CppCheck::checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream) +{ + return checkInternal(filename, cfgname, + [&filename, &fileStream](TokenList& list) { + list.createTokens(fileStream, filename); + }, + [&filename, &fileStream](std::vector& files, simplecpp::OutputList* outputList) { + return simplecpp::TokenList{fileStream, files, filename, outputList}; + }); +} - return {filename, files, outputList}; +unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname) +{ + return checkInternal(filename, cfgname, + [&filename](TokenList& list) { + std::ifstream in(filename); + list.createTokens(in, filename); + }, + [&filename](std::vector& files, simplecpp::OutputList* outputList) { + return simplecpp::TokenList{filename, files, outputList}; + }); } -unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream) +unsigned int CppCheck::checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList) { // TODO: move to constructor when CppCheck no longer owns the settings if (mSettings.checks.isEnabled(Checks::unusedFunction) && !mUnusedFunctionsCheck) @@ -644,13 +669,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string // this is not a real source file - we just want to tokenize it. treat it as C anyways as the language needs to be determined. Tokenizer tokenizer(mSettings, *this); tokenizer.list.setLang(Standards::Language::C); - if (fileStream) { - tokenizer.list.createTokens(*fileStream, filename); - } - else { - std::ifstream in(filename); - tokenizer.list.createTokens(in, filename); - } + createTokens(tokenizer.list); mUnusedFunctionsCheck->parseTokens(tokenizer, mSettings); } return EXIT_SUCCESS; @@ -658,7 +677,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string simplecpp::OutputList outputList; std::vector files; - simplecpp::TokenList tokens1 = createTokenList(filename, files, &outputList, fileStream); + simplecpp::TokenList tokens1 = createTokenList(files, &outputList); // If there is a syntax error, report it and stop const auto output_it = std::find_if(outputList.cbegin(), outputList.cend(), [](const simplecpp::Output &output){ diff --git a/lib/cppcheck.h b/lib/cppcheck.h index 2ca1f0238e94..cb96aaa503b9 100644 --- a/lib/cppcheck.h +++ b/lib/cppcheck.h @@ -46,7 +46,10 @@ class CheckUnusedFunctions; class Tokenizer; class FileWithDetails; -namespace simplecpp { class TokenList; } +namespace simplecpp { + class TokenList; + struct Output; +} /// @addtogroup Core /// @{ @@ -96,12 +99,13 @@ class CPPCHECKLIB CppCheck : ErrorLogger { * the disk but the content is given in @p content. In errors the @p path * is used as a filename. * @param path Path to the file to check. - * @param content File content as a string. + * @param data File content as a buffer. + * @param size Size of buffer. * @return amount of errors found or 0 if none were found. * @note You must set settings before calling this function (by calling * settings()). */ - unsigned int check(const std::string &path, const std::string &content); + unsigned int check(const std::string &path, const uint8_t* data, std::size_t size); /** * @brief Get reference to current settings. @@ -176,7 +180,40 @@ class CPPCHECKLIB CppCheck : ErrorLogger { * @param fileStream stream the file content can be read from * @return number of errors found */ - unsigned int checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream = nullptr); + unsigned int checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream); + + + /** + * @brief Check a file + * @param filename the file name to be read from + * @param cfgname cfg name + * @return number of errors found + */ + unsigned int checkFile(const std::string& filename, const std::string &cfgname); + + /** + * @brief Check a file using buffer + * @param filename file name + * @param cfgname cfg name + * @param data the data to be read + * @param size the size of the data to be read + * @return number of errors found + */ + unsigned int checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size); + + using CreateTokensFn = std::function; + // TODO: should use simplecpp::OutputList + using CreateTokenListFn = std::function&, std::list*)>; + + /** + * @brief Check a file using stream + * @param filename file name + * @param cfgname cfg name + * @param createTokens a function to create the tokens with + * @param createTokenList a function to create the TokenList with + * @return number of errors found + */ + unsigned int checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList); /** * @brief Check normal tokens diff --git a/lib/tokenlist.cpp b/lib/tokenlist.cpp index 786647aa4ab7..5dae684fb291 100644 --- a/lib/tokenlist.cpp +++ b/lib/tokenlist.cpp @@ -344,6 +344,17 @@ bool TokenList::createTokens(std::istream &code, const std::string& file0) //--------------------------------------------------------------------------- +bool TokenList::createTokens(const uint8_t* data, size_t size, const std::string& file0) +{ + ASSERT_LANG(!file0.empty()); + + appendFileIfNew(file0); + + return createTokensInternal(data, size, file0); +} + +//--------------------------------------------------------------------------- + bool TokenList::createTokens(std::istream &code, Standards::Language lang) { ASSERT_LANG(lang != Standards::Language::None); @@ -358,6 +369,20 @@ bool TokenList::createTokens(std::istream &code, Standards::Language lang) //--------------------------------------------------------------------------- +bool TokenList::createTokens(const uint8_t* data, size_t size, Standards::Language lang) +{ + ASSERT_LANG(lang != Standards::Language::None); + if (mLang == Standards::Language::None) { + mLang = lang; + } else { + ASSERT_LANG(lang == mLang); + } + + return createTokensInternal(data, size, ""); +} + +//--------------------------------------------------------------------------- + bool TokenList::createTokensInternal(std::istream &code, const std::string& file0) { simplecpp::OutputList outputList; @@ -370,6 +395,18 @@ bool TokenList::createTokensInternal(std::istream &code, const std::string& file //--------------------------------------------------------------------------- +bool TokenList::createTokensInternal(const uint8_t* data, size_t size, const std::string& file0) +{ + simplecpp::OutputList outputList; + simplecpp::TokenList tokens(data, size, mFiles, file0, &outputList); + + createTokens(std::move(tokens)); + + return outputList.empty(); +} + +//--------------------------------------------------------------------------- + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) void TokenList::createTokens(simplecpp::TokenList&& tokenList) { diff --git a/lib/tokenlist.h b/lib/tokenlist.h index c03cf3b6382e..b0345fa9ada0 100644 --- a/lib/tokenlist.h +++ b/lib/tokenlist.h @@ -105,7 +105,23 @@ class CPPCHECKLIB TokenList { * @param file0 source file name */ bool createTokens(std::istream &code, const std::string& file0); + bool createTokens(const uint8_t* data, size_t size, const std::string& file0); + bool createTokens(const char* data, size_t size, const std::string& file0) { + return createTokens(reinterpret_cast(data), size, file0); + } + template + bool createTokens(const char (&data)[size], const std::string& file0) { + return createTokens(reinterpret_cast(data), size-1, file0); + } bool createTokens(std::istream &code, Standards::Language lang); + bool createTokens(const uint8_t* data, size_t size, Standards::Language lang); + bool createTokens(const char* data, size_t size, Standards::Language lang) { + return createTokens(reinterpret_cast(data), size, lang); + } + template + bool createTokens(const char (&data)[size], Standards::Language lang) { + return createTokens(reinterpret_cast(data), size-1, lang); + } void createTokens(simplecpp::TokenList&& tokenList); @@ -204,6 +220,7 @@ class CPPCHECKLIB TokenList { void determineCppC(); bool createTokensInternal(std::istream &code, const std::string& file0); + bool createTokensInternal(const uint8_t* data, std::size_t size, const std::string& file0); /** Token list */ TokensFrontBack mTokensFrontBack; diff --git a/oss-fuzz/main.cpp b/oss-fuzz/main.cpp index ee419d584b37..548ab0c808e5 100644 --- a/oss-fuzz/main.cpp +++ b/oss-fuzz/main.cpp @@ -38,12 +38,12 @@ class DummyErrorLogger : public ErrorLogger { static DummyErrorLogger s_errorLogger; -static void doCheck(const std::string& code) +static void doCheck(const uint8_t *data, size_t dataSize) { CppCheck cppcheck(s_errorLogger, false, nullptr); cppcheck.settings().addEnabled("all"); cppcheck.settings().certainty.setEnabled(Certainty::inconclusive, true); - cppcheck.check("test.cpp", code); + cppcheck.check("test.cpp", data, dataSize); } #ifndef NO_FUZZ @@ -53,7 +53,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize) { if (dataSize < 10000) { const std::string code = generateCode2(data, dataSize); - doCheck(code); + doCheck(code.data(), code.size()); } return 0; } @@ -77,7 +77,7 @@ int main(int argc, char * argv[]) const std::string code = oss.str(); for (int i = 0; i < cnt; ++i) - doCheck(code); + doCheck(reinterpret_cast(code.data()), code.size()); return EXIT_SUCCESS; }