Skip to content

Commit

Permalink
make it possible to pass char buffer to simplecpp
Browse files Browse the repository at this point in the history
  • Loading branch information
firewave committed May 3, 2024
1 parent 34a202d commit b51a458
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 26 deletions.
53 changes: 51 additions & 2 deletions externals/simplecpp/simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,42 @@ class StdIStream : public simplecpp::TokenList::Stream {
std::istream &istr;
};

class StdCharBufStream : public simplecpp::TokenList::Stream {
public:
// cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members
StdCharBufStream(const unsigned char* str, std::size_t size)
: str(str)
, size(size)
, pos(0)
, lastStatus(0)
{
init();
}

virtual int get() OVERRIDE {
if (pos >= size)
return lastStatus = EOF;
return str[pos++];
}
virtual int peek() OVERRIDE {
if (pos >= size)
return lastStatus = EOF;
return str[pos];
}
virtual void unget() OVERRIDE {
--pos;
}
virtual bool good() OVERRIDE {
return lastStatus != EOF;
}

private:
const unsigned char *str;
const std::size_t size;
std::size_t pos;
int lastStatus;
};

class FileStream : public simplecpp::TokenList::Stream {
public:
// cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members
Expand Down Expand Up @@ -442,6 +478,20 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector<std::string> &fi
readfile(stream,filename,outputList);
}

simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
: frontToken(nullptr), backToken(nullptr), files(filenames)
{
StdCharBufStream stream(data, size);
readfile(stream,filename,outputList);
}

simplecpp::TokenList::TokenList(const char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
: frontToken(nullptr), backToken(nullptr), files(filenames)
{
StdCharBufStream stream(reinterpret_cast<const unsigned char*>(data), size);
readfile(stream,filename,outputList);
}

simplecpp::TokenList::TokenList(const std::string &filename, std::vector<std::string> &filenames, OutputList *outputList)
: frontToken(nullptr), backToken(nullptr), files(filenames)
{
Expand Down Expand Up @@ -1447,8 +1497,7 @@ namespace simplecpp {

Macro(const std::string &name, const std::string &value, std::vector<std::string> &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) {
const std::string def(name + ' ' + value);
std::istringstream istr(def);
StdIStream stream(istr);
StdCharBufStream stream(reinterpret_cast<const unsigned char*>(def.data()), def.size());
tokenListDefine.readfile(stream);
if (!parseDefine(tokenListDefine.cfront()))
throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value);
Expand Down
4 changes: 4 additions & 0 deletions externals/simplecpp/simplecpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ namespace simplecpp {
explicit TokenList(std::vector<std::string> &filenames);
/** generates a token list from the given std::istream parameter */
TokenList(std::istream &istr, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
/** generates a token list from the given buffer */
TokenList(const unsigned char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
/** generates a token list from the given buffer */
TokenList(const char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
/** generates a token list from the given filename parameter */
TokenList(const std::string &filename, std::vector<std::string> &filenames, OutputList *outputList = nullptr);
TokenList(const TokenList &other);
Expand Down
51 changes: 35 additions & 16 deletions lib/cppcheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,10 +547,9 @@ unsigned int CppCheck::check(const std::string &path)
return checkFile(Path::simplifyPath(path), emptyString);
}

unsigned int CppCheck::check(const std::string &path, const std::string &content)
unsigned int CppCheck::check(const std::string &path, const uint8_t* data, std::size_t size)
{
std::istringstream iss(content);
return checkFile(Path::simplifyPath(path), emptyString, &iss);
return checkBuffer(Path::simplifyPath(path), emptyString, data, size);
}

unsigned int CppCheck::check(const FileSettings &fs)
Expand Down Expand Up @@ -590,15 +589,41 @@ unsigned int CppCheck::check(const FileSettings &fs)
return returnValue;
}

static simplecpp::TokenList createTokenList(const std::string& filename, std::vector<std::string>& files, simplecpp::OutputList* outputList, std::istream* fileStream)
unsigned int CppCheck::checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size)
{
if (fileStream)
return {*fileStream, files, filename, outputList};
return checkInternal(filename, cfgname,
[&filename, data, size](TokenList& list) {
list.createTokens(data, size, filename);
},
[&filename, data, size](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
return simplecpp::TokenList{data, size, files, filename, outputList};
});
}

unsigned int CppCheck::checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream)
{
return checkInternal(filename, cfgname,
[&filename, &fileStream](TokenList& list) {
list.createTokens(fileStream, filename);
},
[&filename, &fileStream](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
return simplecpp::TokenList{fileStream, files, filename, outputList};
});
}

return {filename, files, outputList};
unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname)
{
return checkInternal(filename, cfgname,
[&filename](TokenList& list) {
std::ifstream in(filename);
list.createTokens(in, filename);
},
[&filename](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
return simplecpp::TokenList{filename, files, outputList};
});
}

unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream)
unsigned int CppCheck::checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList)
{
// TODO: move to constructor when CppCheck no longer owns the settings
if (mSettings.checks.isEnabled(Checks::unusedFunction) && !mUnusedFunctionsCheck)
Expand Down Expand Up @@ -644,21 +669,15 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string
// this is not a real source file - we just want to tokenize it. treat it as C anyways as the language needs to be determined.
Tokenizer tokenizer(mSettings, *this);
tokenizer.list.setLang(Standards::Language::C);
if (fileStream) {
tokenizer.list.createTokens(*fileStream, filename);
}
else {
std::ifstream in(filename);
tokenizer.list.createTokens(in, filename);
}
createTokens(tokenizer.list);
mUnusedFunctionsCheck->parseTokens(tokenizer, mSettings);
}
return EXIT_SUCCESS;
}

simplecpp::OutputList outputList;
std::vector<std::string> files;
simplecpp::TokenList tokens1 = createTokenList(filename, files, &outputList, fileStream);
simplecpp::TokenList tokens1 = createTokenList(files, &outputList);

// If there is a syntax error, report it and stop
const auto output_it = std::find_if(outputList.cbegin(), outputList.cend(), [](const simplecpp::Output &output){
Expand Down
45 changes: 41 additions & 4 deletions lib/cppcheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ class CheckUnusedFunctions;
class Tokenizer;
class FileWithDetails;

namespace simplecpp { class TokenList; }
namespace simplecpp {
class TokenList;
struct Output;
}

/// @addtogroup Core
/// @{
Expand Down Expand Up @@ -96,12 +99,13 @@ class CPPCHECKLIB CppCheck : ErrorLogger {
* the disk but the content is given in @p content. In errors the @p path
* is used as a filename.
* @param path Path to the file to check.
* @param content File content as a string.
* @param data File content as a buffer.
* @param size Size of buffer.
* @return amount of errors found or 0 if none were found.
* @note You must set settings before calling this function (by calling
* settings()).
*/
unsigned int check(const std::string &path, const std::string &content);
unsigned int check(const std::string &path, const uint8_t* data, std::size_t size);

/**
* @brief Get reference to current settings.
Expand Down Expand Up @@ -176,7 +180,40 @@ class CPPCHECKLIB CppCheck : ErrorLogger {
* @param fileStream stream the file content can be read from
* @return number of errors found
*/
unsigned int checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream = nullptr);
unsigned int checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream);


/**
* @brief Check a file
* @param filename the file name to be read from
* @param cfgname cfg name
* @return number of errors found
*/
unsigned int checkFile(const std::string& filename, const std::string &cfgname);

/**
* @brief Check a file using buffer
* @param filename file name
* @param cfgname cfg name
* @param data the data to be read
* @param size the size of the data to be read
* @return number of errors found
*/
unsigned int checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size);

using CreateTokensFn = std::function<void(TokenList&)>;
// TODO: should use simplecpp::OutputList
using CreateTokenListFn = std::function<simplecpp::TokenList(std::vector<std::string>&, std::list<simplecpp::Output>*)>;

/**
* @brief Check a file using stream
* @param filename file name
* @param cfgname cfg name
* @param createTokens a function to create the tokens with
* @param createTokenList a function to create the TokenList with
* @return number of errors found
*/
unsigned int checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList);

/**
* @brief Check normal tokens
Expand Down
37 changes: 37 additions & 0 deletions lib/tokenlist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,17 @@ bool TokenList::createTokens(std::istream &code, const std::string& file0)

//---------------------------------------------------------------------------

bool TokenList::createTokens(const uint8_t* data, size_t size, const std::string& file0)
{
ASSERT_LANG(!file0.empty());

appendFileIfNew(file0);

return createTokensInternal(data, size, file0);
}

//---------------------------------------------------------------------------

bool TokenList::createTokens(std::istream &code, Standards::Language lang)
{
ASSERT_LANG(lang != Standards::Language::None);
Expand All @@ -358,6 +369,20 @@ bool TokenList::createTokens(std::istream &code, Standards::Language lang)

//---------------------------------------------------------------------------

bool TokenList::createTokens(const uint8_t* data, size_t size, Standards::Language lang)
{
ASSERT_LANG(lang != Standards::Language::None);
if (mLang == Standards::Language::None) {
mLang = lang;
} else {
ASSERT_LANG(lang == mLang);
}

return createTokensInternal(data, size, "");
}

//---------------------------------------------------------------------------

bool TokenList::createTokensInternal(std::istream &code, const std::string& file0)
{
simplecpp::OutputList outputList;
Expand All @@ -370,6 +395,18 @@ bool TokenList::createTokensInternal(std::istream &code, const std::string& file

//---------------------------------------------------------------------------

bool TokenList::createTokensInternal(const uint8_t* data, size_t size, const std::string& file0)
{
simplecpp::OutputList outputList;
simplecpp::TokenList tokens(data, size, mFiles, file0, &outputList);

createTokens(std::move(tokens));

return outputList.empty();
}

//---------------------------------------------------------------------------

// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
void TokenList::createTokens(simplecpp::TokenList&& tokenList)
{
Expand Down
17 changes: 17 additions & 0 deletions lib/tokenlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,23 @@ class CPPCHECKLIB TokenList {
* @param file0 source file name
*/
bool createTokens(std::istream &code, const std::string& file0);
bool createTokens(const uint8_t* data, size_t size, const std::string& file0);
bool createTokens(const char* data, size_t size, const std::string& file0) {
return createTokens(reinterpret_cast<const uint8_t*>(data), size, file0);
}
template<size_t size>
bool createTokens(const char (&data)[size], const std::string& file0) {
return createTokens(reinterpret_cast<const uint8_t*>(data), size-1, file0);
}
bool createTokens(std::istream &code, Standards::Language lang);
bool createTokens(const uint8_t* data, size_t size, Standards::Language lang);
bool createTokens(const char* data, size_t size, Standards::Language lang) {
return createTokens(reinterpret_cast<const uint8_t*>(data), size, lang);
}
template<size_t size>
bool createTokens(const char (&data)[size], Standards::Language lang) {
return createTokens(reinterpret_cast<const uint8_t*>(data), size-1, lang);
}

void createTokens(simplecpp::TokenList&& tokenList);

Expand Down Expand Up @@ -204,6 +220,7 @@ class CPPCHECKLIB TokenList {
void determineCppC();

bool createTokensInternal(std::istream &code, const std::string& file0);
bool createTokensInternal(const uint8_t* data, std::size_t size, const std::string& file0);

/** Token list */
TokensFrontBack mTokensFrontBack;
Expand Down
8 changes: 4 additions & 4 deletions oss-fuzz/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ class DummyErrorLogger : public ErrorLogger {

static DummyErrorLogger s_errorLogger;

static void doCheck(const std::string& code)
static void doCheck(const uint8_t *data, size_t dataSize)
{
CppCheck cppcheck(s_errorLogger, false, nullptr);
cppcheck.settings().addEnabled("all");
cppcheck.settings().certainty.setEnabled(Certainty::inconclusive, true);
cppcheck.check("test.cpp", code);
cppcheck.check("test.cpp", data, dataSize);
}

#ifndef NO_FUZZ
Expand All @@ -53,7 +53,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize)
{
if (dataSize < 10000) {
const std::string code = generateCode2(data, dataSize);
doCheck(code);
doCheck(code.data(), code.size());
}
return 0;
}
Expand All @@ -77,7 +77,7 @@ int main(int argc, char * argv[])

const std::string code = oss.str();
for (int i = 0; i < cnt; ++i)
doCheck(code);
doCheck(reinterpret_cast<const unsigned char*>(code.data()), code.size());

return EXIT_SUCCESS;
}
Expand Down

0 comments on commit b51a458

Please sign in to comment.