From 516107a804269bbaf94229ab1ad97bc95acc5735 Mon Sep 17 00:00:00 2001 From: firewave Date: Wed, 24 Jan 2024 09:21:21 +0100 Subject: [PATCH] added command-line option `--cpp-probe` to probe headers and extension-less files for Emacs marker [skip ci] --- cli/cmdlineparser.cpp | 8 +++ lib/cppcheck.cpp | 6 +- lib/path.cpp | 71 ++++++++++++++++++-- lib/path.h | 3 +- lib/preprocessor.cpp | 2 +- lib/settings.h | 3 + lib/tokenlist.cpp | 2 +- test/cli/other_test.py | 31 ++++++++- test/cli/testutils.py | 4 +- test/testcmdlineparser.cpp | 32 +++++++++ test/testpath.cpp | 134 ++++++++++++++++++++++++++----------- 11 files changed, 245 insertions(+), 51 deletions(-) diff --git a/cli/cmdlineparser.cpp b/cli/cmdlineparser.cpp index 4f2209e11f65..25052a67edae 100644 --- a/cli/cmdlineparser.cpp +++ b/cli/cmdlineparser.cpp @@ -544,6 +544,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a } } + else if (std::strcmp(argv[i], "--cpp-probe") == 0) { + mSettings.cppProbe = true; + } + // Show --debug output after the first simplifications else if (std::strcmp(argv[i], "--debug") == 0 || std::strcmp(argv[i], "--debug-normal") == 0) @@ -887,6 +891,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a return Result::Fail; } + else if (std::strcmp(argv[i], "--no-cpp-probe") == 0) { + mSettings.cppProbe = false; + } + // Write results in file else if (std::strncmp(argv[i], "--output-file=", 14) == 0) mSettings.outputFile = Path::simplifyPath(Path::fromNativeSeparators(argv[i] + 14)); diff --git a/lib/cppcheck.cpp b/lib/cppcheck.cpp index 60eb88951d2d..1f02bf38fe6b 100644 --- a/lib/cppcheck.cpp +++ b/lib/cppcheck.cpp @@ -182,7 +182,7 @@ static void createDumpFile(const Settings& settings, case Standards::Language::None: { // TODO: error out on unknown language? - const Standards::Language lang = Path::identify(filename); + const Standards::Language lang = Path::identify(filename, settings.cppProbe); if (lang == Standards::Language::CPP) language = " language=\"cpp\""; else if (lang == Standards::Language::C) @@ -420,7 +420,7 @@ unsigned int CppCheck::checkClang(const std::string &path) mErrorLogger.reportOut(std::string("Checking ") + path + " ...", Color::FgGreen); // TODO: this ignores the configured language - const bool isCpp = Path::identify(path) == Standards::Language::CPP; + const bool isCpp = Path::identify(path, mSettings.cppProbe) == Standards::Language::CPP; const std::string langOpt = isCpp ? "-x c++" : "-x c"; const std::string analyzerInfo = mSettings.buildDir.empty() ? std::string() : AnalyzerInformation::getAnalyzerInfoFile(mSettings.buildDir, path, emptyString); const std::string clangcmd = analyzerInfo + ".clang-cmd"; @@ -783,7 +783,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string TokenList tokenlist(&mSettings); std::istringstream istr2(code); // TODO: asserts when file has unknown extension - tokenlist.createTokens(istr2, Path::identify(*files.begin())); // TODO: check result? + tokenlist.createTokens(istr2, Path::identify(*files.begin(), false)); // TODO: check result? executeRules("define", tokenlist); } #endif diff --git a/lib/path.cpp b/lib/path.cpp index f7d698034629..12ec5b78b4fd 100644 --- a/lib/path.cpp +++ b/lib/path.cpp @@ -24,7 +24,9 @@ #include "utils.h" #include +#include #include +#include #include #include #include @@ -235,7 +237,7 @@ bool Path::isCPP(const std::string &path) bool Path::acceptFile(const std::string &path, const std::set &extra) { bool header = false; - return (identify(path, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end(); + return (identify(path, false, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end(); } // cppcheck-suppress unusedFunction @@ -245,13 +247,72 @@ bool Path::isHeader(const std::string &path) return startsWith(extension, ".h"); } -Standards::Language Path::identify(const std::string &path, bool *header) +#include + +static bool hasEmacsCppMarker(const char* path) +{ + FILE *fp = fopen(path, "rt"); + if (!fp) + return false; + std::unique_ptr fp_deleter(fp, fclose); + std::string buf(1024, '\0'); + // TODO: read first line only + if (fgets(const_cast(buf.data()), buf.size(), fp) == nullptr) + return false; // failed to read file + // TODO: replace with regular expression + const auto pos1 = buf.find("-*-"); + if (pos1 == std::string::npos) + return false; // no start marker + const auto pos_nl = buf.find_first_of("\r\n"); + if (pos_nl != std::string::npos && (pos_nl < pos1)) + return false; // not on first line + const auto pos2 = buf.find("-*-", pos1 + 3); + // TODO: make sure we have read the whole line before bailing out + if (pos2 == std::string::npos) + return false; // no end marker + const std::string buf_trim = trim(buf); // trim whitespaces + if (buf_trim[0] != '/' || buf_trim[1] != '/') + return false; // not a comment + + std::cout /*<< path << " -*/ << "Emacs marker: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << std::endl; + + // there are more variations with lowercase and no whitespaces + // -*- C++ -*- + // -*- Mode: C++; -*- + // -*- Mode: C++; c-basic-offset: 8 -*- + std::string marker = trim(buf.substr(pos1 + 3, pos2 - pos1 - 3), " ;"); + // cut off additional attributes + const auto pos_semi = marker.find(';'); + if (pos_semi != std::string::npos) + marker.resize(pos_semi); + findAndReplace(marker, "mode:", ""); + findAndReplace(marker, "Mode:", ""); + marker = trim(marker); + if (marker == "C++" || marker == "c++") + return true; // C++ marker found + + //if (marker == "C" || marker == "c") + // return false; + std::cout << path << " - unmatched Emacs marker: '" << marker << "'" << std::endl; + + return false; // marker is not a C++ one +} + +Standards::Language Path::identify(const std::string &path, bool cppProbe, bool *header) { // cppcheck-suppress uninitvar - TODO: FP if (header) *header = false; std::string ext = getFilenameExtension(path); + // standard library headers have no extension + if (cppProbe && ext.empty()) { + if (hasEmacsCppMarker(path.c_str())) { + if (header) + *header = true; + return Standards::Language::CPP; + } + } if (ext == ".C") return Standards::Language::CPP; if (c_src_exts.find(ext) != c_src_exts.end()) @@ -262,7 +323,9 @@ Standards::Language Path::identify(const std::string &path, bool *header) if (ext == ".h") { if (header) *header = true; - return Standards::Language::C; // treat as C for now + if (cppProbe && hasEmacsCppMarker(path.c_str())) + return Standards::Language::CPP; + return Standards::Language::C; } if (cpp_src_exts.find(ext) != cpp_src_exts.end()) return Standards::Language::CPP; @@ -277,7 +340,7 @@ Standards::Language Path::identify(const std::string &path, bool *header) bool Path::isHeader2(const std::string &path) { bool header; - (void)Path::identify(path, &header); + (void)identify(path, false, &header); return header; } diff --git a/lib/path.h b/lib/path.h index 9e0e733046cc..a468134c8e34 100644 --- a/lib/path.h +++ b/lib/path.h @@ -187,10 +187,11 @@ class CPPCHECKLIB Path { /** * @brief Identify the language based on the file extension * @param path filename to check. path info is optional + * @param cppProbe check optional Emacs marker to idengtify headers as C++ * @param header if provided indicates if the file is a header * @return the language type */ - static Standards::Language identify(const std::string &path, bool *header = nullptr); + static Standards::Language identify(const std::string &path, bool cppProbe, bool *header = nullptr); /** * @brief Get filename without a directory path part. diff --git a/lib/preprocessor.cpp b/lib/preprocessor.cpp index 70e2e560770c..c87898718ba7 100644 --- a/lib/preprocessor.cpp +++ b/lib/preprocessor.cpp @@ -684,7 +684,7 @@ static simplecpp::DUI createDUI(const Settings &mSettings, const std::string &cf dui.includes = mSettings.userIncludes; // --include // TODO: use mSettings.standards.stdValue instead // TODO: error out on unknown language? - const Standards::Language lang = Path::identify(filename); + const Standards::Language lang = Path::identify(filename, mSettings.cppProbe); if (lang == Standards::Language::CPP) { dui.std = mSettings.standards.getCPP(); splitcfg(mSettings.platform.getLimitsDefines(Standards::getCPP(dui.std)), dui.defines, ""); diff --git a/lib/settings.h b/lib/settings.h index 6cbceeefbaf7..3935a2f34970 100644 --- a/lib/settings.h +++ b/lib/settings.h @@ -167,6 +167,9 @@ class CPPCHECKLIB WARN_UNUSED Settings { /** cppcheck.cfg: About text */ std::string cppcheckCfgAbout; + /** @brief check Emacs marker to detect header files as C++ */ + bool cppProbe{}; + /** @brief Are we running from DACA script? */ bool daca{}; diff --git a/lib/tokenlist.cpp b/lib/tokenlist.cpp index 25d1764f8303..29782e6c751a 100644 --- a/lib/tokenlist.cpp +++ b/lib/tokenlist.cpp @@ -95,7 +95,7 @@ void TokenList::determineCppC() { // only try to determine if it wasn't enforced if (mLang == Standards::Language::None) { - mLang = Path::identify(getSourceFilePath()); + mLang = Path::identify(getSourceFilePath(), mSettings ? mSettings->cppProbe : false); // TODO: cannot enable assert as this might occur for unknown extensions //ASSERT_LANG(mLang != Standards::Language::None); if (mLang == Standards::Language::None) { diff --git a/test/cli/other_test.py b/test/cli/other_test.py index 1ac8e4a341ec..428a8cccdaeb 100644 --- a/test/cli/other_test.py +++ b/test/cli/other_test.py @@ -1357,4 +1357,33 @@ def test_rule(tmpdir): lines = stderr.splitlines() assert lines == [ "{}:4:0: style: found 'f' [rule]".format(test_file) - ] \ No newline at end of file + ] + + +def test_cpp_probe(tmpdir): + test_file = os.path.join(tmpdir, 'test.h') + with open(test_file, 'wt') as f: + f.writelines([ + 'class A {};' + ]) + + args = ['-q', '--template=simple', '--cpp-probe', test_file] + err_lines = [ + "{}:1:1: error: Code 'classA{{' is invalid C code. Use --std or --language to configure the language. [syntaxError]".format(test_file) + ] + + assert_cppcheck(args, ec_exp=0, err_exp=err_lines, out_exp=[]) + + +def test_cpp_probe_2(tmpdir): + test_file = os.path.join(tmpdir, 'test.h') + with open(test_file, 'wt') as f: + f.writelines([ + '// -*- C++ -*-', + 'class A {};' + ]) + + # TODO: the probing is performed twice + args = ['-q', '--template=simple', '--cpp-probe', test_file] + + assert_cppcheck(args, ec_exp=0, err_exp=[], out_exp=[]) diff --git a/test/cli/testutils.py b/test/cli/testutils.py index a6692bde8bc5..8f1bde408708 100644 --- a/test/cli/testutils.py +++ b/test/cli/testutils.py @@ -163,7 +163,7 @@ def assert_cppcheck(args, ec_exp=None, out_exp=None, err_exp=None, env=None): assert exitcode == ec_exp, stdout if out_exp is not None: out_lines = stdout.splitlines() - assert out_lines == out_exp, stdout + assert out_lines == out_exp, out_lines if err_exp is not None: err_lines = stderr.splitlines() - assert err_lines == err_exp, stderr + assert err_lines == err_exp, err_lines diff --git a/test/testcmdlineparser.cpp b/test/testcmdlineparser.cpp index cde8d2381de6..017ddb74c5a3 100644 --- a/test/testcmdlineparser.cpp +++ b/test/testcmdlineparser.cpp @@ -387,6 +387,10 @@ class TestCmdlineParser : public TestFixture { TEST_CASE(checkLevelNormal); TEST_CASE(checkLevelExhaustive); TEST_CASE(checkLevelUnknown); + TEST_CASE(cppProbe); + TEST_CASE(cppProbe2); + TEST_CASE(noCppProbe); + TEST_CASE(noCppProbe2); TEST_CASE(ignorepaths1); TEST_CASE(ignorepaths2); @@ -2603,6 +2607,34 @@ class TestCmdlineParser : public TestFixture { ASSERT_EQUALS("cppcheck: error: unknown '--check-level' value 'default'.\n", logger->str()); } + void cppProbe() { + REDIRECT; + const char * const argv[] = {"cppcheck", "--cpp-probe", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(3, argv)); + ASSERT_EQUALS(true, settings->cppProbe); + } + + void cppProbe2() { + REDIRECT; + const char * const argv[] = {"cppcheck", "--no-cpp-probe", "--cpp-probe", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(4, argv)); + ASSERT_EQUALS(true, settings->cppProbe); + } + + void noCppProbe() { + REDIRECT; + const char * const argv[] = {"cppcheck", "--no-cpp-probe", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(3, argv)); + ASSERT_EQUALS(false, settings->cppProbe); + } + + void noCppProbe2() { + REDIRECT; + const char * const argv[] = {"cppcheck", "--cpp-probe", "--no-cpp-probe", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(4, argv)); + ASSERT_EQUALS(false, settings->cppProbe); + } + void ignorepaths1() { REDIRECT; const char * const argv[] = {"cppcheck", "-isrc", "file.cpp"}; diff --git a/test/testpath.cpp b/test/testpath.cpp index 0bd3ae2f5f92..e8065f1f7506 100644 --- a/test/testpath.cpp +++ b/test/testpath.cpp @@ -48,6 +48,7 @@ class TestPath : public TestFixture { TEST_CASE(sameFileName); TEST_CASE(getFilenameExtension); TEST_CASE(identify); + TEST_CASE(identifyWithCppProbe); TEST_CASE(is_header_2); } @@ -292,80 +293,137 @@ class TestPath : public TestFixture { Standards::Language lang; bool header; - ASSERT_EQUALS(Standards::Language::None, Path::identify("")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("c")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("cpp")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("h")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("hpp")); + ASSERT_EQUALS(Standards::Language::None, Path::identify("", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("c", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("cpp", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("h", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("hpp", false)); // TODO: what about files starting with a "."? - //ASSERT_EQUALS(Standards::Language::None, Path::identify(".c")); - //ASSERT_EQUALS(Standards::Language::None, Path::identify(".cpp")); - //ASSERT_EQUALS(Standards::Language::None, Path::identify(".h")); - //ASSERT_EQUALS(Standards::Language::None, Path::identify(".hpp")); + //ASSERT_EQUALS(Standards::Language::None, Path::identify(".c", false)); + //ASSERT_EQUALS(Standards::Language::None, Path::identify(".cpp", false)); + //ASSERT_EQUALS(Standards::Language::None, Path::identify(".h", false)); + //ASSERT_EQUALS(Standards::Language::None, Path::identify(".hpp", false)); // C - ASSERT_EQUALS(Standards::Language::C, Path::identify("index.c")); - ASSERT_EQUALS(Standards::Language::C, Path::identify("index.cl")); - ASSERT_EQUALS(Standards::Language::C, Path::identify("C:\\foo\\index.c")); - ASSERT_EQUALS(Standards::Language::C, Path::identify("/mnt/c/foo/index.c")); + ASSERT_EQUALS(Standards::Language::C, Path::identify("index.c", false)); + ASSERT_EQUALS(Standards::Language::C, Path::identify("index.cl", false)); + ASSERT_EQUALS(Standards::Language::C, Path::identify("C:\\foo\\index.c", false)); + ASSERT_EQUALS(Standards::Language::C, Path::identify("/mnt/c/foo/index.c", false)); // In unix .C is considered C++ #ifdef _WIN32 - ASSERT_EQUALS(Standards::Language::C, Path::identify("C:\\foo\\index.C")); + ASSERT_EQUALS(Standards::Language::C, Path::identify("C:\\foo\\index.C", false)); #endif - lang = Path::identify("index.c", &header); + lang = Path::identify("index.c", false, &header); ASSERT_EQUALS(Standards::Language::C, lang); ASSERT_EQUALS(false, header); // C++ - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cpp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cxx")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cc")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.c++")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.tpp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.txx")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.ipp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.ixx")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("C:\\foo\\index.cpp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("C:\\foo\\index.Cpp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("/mnt/c/foo/index.cpp")); - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("/mnt/c/foo/index.Cpp")); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cpp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cxx", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.cc", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.c++", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.tpp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.txx", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.ipp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.ixx", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("C:\\foo\\index.cpp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("C:\\foo\\index.Cpp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("/mnt/c/foo/index.cpp", false)); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("/mnt/c/foo/index.Cpp", false)); // TODO: check for case-insenstive filesystem instead // In unix .C is considered C++ #if !defined(_WIN32) && !(defined(__APPLE__) && defined(__MACH__)) - ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.C")); + ASSERT_EQUALS(Standards::Language::CPP, Path::identify("index.C", false)); #else - ASSERT_EQUALS(Standards::Language::C, Path::identify("index.C")); + ASSERT_EQUALS(Standards::Language::C, Path::identify("index.C", false)); #endif - lang = Path::identify("index.cpp", &header); + lang = Path::identify("index.cpp", false, &header); ASSERT_EQUALS(Standards::Language::CPP, lang); ASSERT_EQUALS(false, header); // headers - lang = Path::identify("index.h", &header); + lang = Path::identify("index.h", false, &header); ASSERT_EQUALS(Standards::Language::C, lang); ASSERT_EQUALS(true, header); - lang = Path::identify("index.hpp", &header); + lang = Path::identify("index.hpp", false, &header); ASSERT_EQUALS(Standards::Language::CPP, lang); ASSERT_EQUALS(true, header); - lang = Path::identify("index.hxx", &header); + lang = Path::identify("index.hxx", false, &header); ASSERT_EQUALS(Standards::Language::CPP, lang); ASSERT_EQUALS(true, header); - lang = Path::identify("index.h++", &header); + lang = Path::identify("index.h++", false, &header); ASSERT_EQUALS(Standards::Language::CPP, lang); ASSERT_EQUALS(true, header); - lang = Path::identify("index.hh", &header); + lang = Path::identify("index.hh", false, &header); ASSERT_EQUALS(Standards::Language::CPP, lang); ASSERT_EQUALS(true, header); - ASSERT_EQUALS(Standards::Language::None, Path::identify("index.header")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("index.htm")); - ASSERT_EQUALS(Standards::Language::None, Path::identify("index.html")); + ASSERT_EQUALS(Standards::Language::None, Path::identify("index.header", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("index.htm", false)); + ASSERT_EQUALS(Standards::Language::None, Path::identify("index.html", false)); + } + + void identifyWithCppProbeInternal(const std::string& marker, Standards::Language std) const + { + const ScopedFile file("cppprobe.h", marker); + ASSERT_EQUALS_MSG(std, Path::identify(file.path(), true), marker); + } + + void identifyWithCppProbe() const + { + const std::list markers_cpp = { + "// -*- C++ -*-", + "// -*-C++-*-", + "// -*- Mode: C++; -*-", + "// -*-Mode: C++;-*-", + "// -*- Mode:C++; -*-", + "// -*-Mode:C++;-*-", + "// -*- Mode: C++ -*-", + "// -*-Mode: C++-*-", + "// -*- Mode:C++ -*-", + "// -*-Mode:C++-*-", + "// -*- Mode: C++; c-basic-offset: 8 -*-", + + "// -*- c++ -*-", + "// -*- mode: c++; -*-", + + "//-*- C++ -*-", + " //-*- C++ -*-", + "\t//-*- C++ -*-", + "\t //-*- C++ -*-", + " \t//-*- C++ -*-", + "// -----*- C++ -*-----", + "// comment-*- C++ -*-comment", + "//-*- C++ -*-\r// comment", + "//-*- C++ -*-\n// comment", + "//-*- C++ -*-\r\n// comment" + }; + + for (const auto& m : markers_cpp) { + identifyWithCppProbeInternal(m, Standards::Language::CPP); + } + + const std::list markers_c = { + "-*- C++ -*-", // needs to be in comment + "// -*-C", // no end marker + "// comment\n// -*-C", // not on the first line + "// comment\r// -*-C", // not on the first line + "// comment\r\n// -*-C", // not on the first line + "// -*- C -*-", + "// -*- Mode: C; -*-", + "// -*- f90 -*-", + "// -*- fortran -*-" + }; + + for (const auto& m : markers_c) { + identifyWithCppProbeInternal(m, Standards::Language::C); + } } void is_header_2() const {