Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refs #10692 - added command-line option --cpp-header-probe to probe headers and extension-less files for Emacs C++ marker #6324

Merged
merged 2 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cli/cmdlineparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
}
}

else if (std::strcmp(argv[i], "--cpp-header-probe") == 0) {
danmar marked this conversation as resolved.
Show resolved Hide resolved
mSettings.cppHeaderProbe = true;
}

// Show --debug output after the first simplifications
else if (std::strcmp(argv[i], "--debug") == 0 ||
std::strcmp(argv[i], "--debug-normal") == 0)
Expand Down Expand Up @@ -887,6 +891,10 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
return Result::Fail;
}

else if (std::strcmp(argv[i], "--no-cpp-header-probe") == 0) {
mSettings.cppHeaderProbe = false;
}

// Write results in file
else if (std::strncmp(argv[i], "--output-file=", 14) == 0)
mSettings.outputFile = Path::simplifyPath(Path::fromNativeSeparators(argv[i] + 14));
Expand Down
6 changes: 3 additions & 3 deletions lib/cppcheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ static void createDumpFile(const Settings& settings,
case Standards::Language::None:
{
// TODO: error out on unknown language?
const Standards::Language lang = Path::identify(filename);
const Standards::Language lang = Path::identify(filename, settings.cppHeaderProbe);
if (lang == Standards::Language::CPP)
language = " language=\"cpp\"";
else if (lang == Standards::Language::C)
Expand Down Expand Up @@ -420,7 +420,7 @@ unsigned int CppCheck::checkClang(const std::string &path)
mErrorLogger.reportOut(std::string("Checking ") + path + " ...", Color::FgGreen);

// TODO: this ignores the configured language
const bool isCpp = Path::identify(path) == Standards::Language::CPP;
const bool isCpp = Path::identify(path, mSettings.cppHeaderProbe) == Standards::Language::CPP;
const std::string langOpt = isCpp ? "-x c++" : "-x c";
const std::string analyzerInfo = mSettings.buildDir.empty() ? std::string() : AnalyzerInformation::getAnalyzerInfoFile(mSettings.buildDir, path, emptyString);
const std::string clangcmd = analyzerInfo + ".clang-cmd";
Expand Down Expand Up @@ -784,7 +784,7 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string
TokenList tokenlist(&mSettings);
std::istringstream istr2(code);
// TODO: asserts when file has unknown extension
tokenlist.createTokens(istr2, Path::identify(*files.begin())); // TODO: check result?
tokenlist.createTokens(istr2, Path::identify(*files.begin(), false)); // TODO: check result?
executeRules("define", tokenlist);
}
#endif
Expand Down
104 changes: 100 additions & 4 deletions lib/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@
#undef __STRICT_ANSI__
#endif

//#define LOG_EMACS_MARKER

#include "path.h"
#include "utils.h"

#include <algorithm>
#include <cstdio>
#include <cstdlib>
#ifdef LOG_EMACS_MARKER
#include <iostream>
#endif
#include <sys/stat.h>
#include <unordered_set>
#include <utility>
Expand Down Expand Up @@ -210,16 +216,104 @@ static const std::unordered_set<std::string> header_exts = {
bool Path::acceptFile(const std::string &path, const std::set<std::string> &extra)
{
bool header = false;
return (identify(path, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
return (identify(path, false, &header) != Standards::Language::None && !header) || extra.find(getFilenameExtension(path)) != extra.end();
}

Standards::Language Path::identify(const std::string &path, bool *header)
static bool hasEmacsCppMarker(const char* path)
{
// TODO: identify is called three times for each file
// Preprocessor::loadFiles() -> createDUI()
// Preprocessor::preprocess() -> createDUI()
// TokenList::createTokens() -> TokenList::determineCppC()
#ifdef LOG_EMACS_MARKER
std::cout << path << '\n';
#endif

FILE *fp = fopen(path, "rt");
danmar marked this conversation as resolved.
Show resolved Hide resolved
if (!fp)
return false;
std::string buf(128, '\0');
{
// TODO: read the whole first line only
const char * const res = fgets(const_cast<char*>(buf.data()), buf.size(), fp);
fclose(fp);
fp = nullptr;
if (!res)
return false; // failed to read file
}
// TODO: replace with regular expression
const auto pos1 = buf.find("-*-");
if (pos1 == std::string::npos)
return false; // no start marker
const auto pos_nl = buf.find_first_of("\r\n");
if (pos_nl != std::string::npos && (pos_nl < pos1)) {
#ifdef LOG_EMACS_MARKER
std::cout << path << " - Emacs marker not on the first line" << '\n';
#endif
return false; // not on first line
}
const auto pos2 = buf.find("-*-", pos1 + 3);
// TODO: make sure we have read the whole line before bailing out
if (pos2 == std::string::npos) {
#ifdef LOG_EMACS_MARKER
std::cout << path << " - Emacs marker not terminated" << '\n';
#endif
return false; // no end marker
}
#ifdef LOG_EMACS_MARKER
std::cout << "Emacs marker: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
#endif
// TODO: support /* */ comments
const std::string buf_trim = trim(buf); // trim whitespaces
if (buf_trim[0] != '/' || buf_trim[1] != '/') {
#ifdef LOG_EMACS_MARKER
std::cout << path << " - Emacs marker not in a comment: '" << buf.substr(pos1, (pos2 + 3) - pos1) << "'" << '\n';
#endif
return false; // not a comment
}

// there are more variations with lowercase and no whitespaces
// -*- C++ -*-
// -*- Mode: C++; -*-
// -*- Mode: C++; c-basic-offset: 8 -*-
std::string marker = trim(buf.substr(pos1 + 3, pos2 - pos1 - 3), " ;");
// cut off additional attributes
const auto pos_semi = marker.find(';');
if (pos_semi != std::string::npos)
marker.resize(pos_semi);
findAndReplace(marker, "mode:", "");
findAndReplace(marker, "Mode:", "");
marker = trim(marker);
if (marker == "C++" || marker == "c++") {
// NOLINTNEXTLINE(readability-simplify-boolean-expr) - TODO: FP
Copy link
Collaborator Author

@firewave firewave May 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upstream issue: llvm/llvm-project#93157.

return true; // C++ marker found
}

//if (marker == "C" || marker == "c")
// return false;
#ifdef LOG_EMACS_MARKER
std::cout << path << " - unmatched Emacs marker: '" << marker << "'" << '\n';
#endif

return false; // marker is not a C++ one
}

Standards::Language Path::identify(const std::string &path, bool cppHeaderProbe, bool *header)
{
// cppcheck-suppress uninitvar - TODO: FP
if (header)
*header = false;

std::string ext = getFilenameExtension(path);
// standard library headers have no extension
if (cppHeaderProbe && ext.empty()) {
if (hasEmacsCppMarker(path.c_str())) {
if (header)
*header = true;
return Standards::Language::CPP;
}
return Standards::Language::None;
}
if (ext == ".C")
return Standards::Language::CPP;
if (c_src_exts.find(ext) != c_src_exts.end())
Expand All @@ -230,7 +324,9 @@ Standards::Language Path::identify(const std::string &path, bool *header)
if (ext == ".h") {
if (header)
*header = true;
return Standards::Language::C; // treat as C for now
if (cppHeaderProbe && hasEmacsCppMarker(path.c_str()))
return Standards::Language::CPP;
return Standards::Language::C;
}
if (cpp_src_exts.find(ext) != cpp_src_exts.end())
return Standards::Language::CPP;
Expand All @@ -245,7 +341,7 @@ Standards::Language Path::identify(const std::string &path, bool *header)
bool Path::isHeader(const std::string &path)
{
bool header;
(void)Path::identify(path, &header);
(void)identify(path, false, &header);
return header;
}

Expand Down
3 changes: 2 additions & 1 deletion lib/path.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,11 @@ class CPPCHECKLIB Path {
/**
* @brief Identify the language based on the file extension
* @param path filename to check. path info is optional
* @param cppHeaderProbe check optional Emacs marker to identify extension-less and *.h files as C++
* @param header if provided indicates if the file is a header
* @return the language type
*/
static Standards::Language identify(const std::string &path, bool *header = nullptr);
static Standards::Language identify(const std::string &path, bool cppHeaderProbe, bool *header = nullptr);

/**
* @brief Get filename without a directory path part.
Expand Down
2 changes: 1 addition & 1 deletion lib/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ static simplecpp::DUI createDUI(const Settings &mSettings, const std::string &cf
dui.includes = mSettings.userIncludes; // --include
// TODO: use mSettings.standards.stdValue instead
// TODO: error out on unknown language?
const Standards::Language lang = Path::identify(filename);
const Standards::Language lang = Path::identify(filename, mSettings.cppHeaderProbe);
if (lang == Standards::Language::CPP) {
dui.std = mSettings.standards.getCPP();
splitcfg(mSettings.platform.getLimitsDefines(Standards::getCPP(dui.std)), dui.defines, "");
Expand Down
3 changes: 3 additions & 0 deletions lib/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ class CPPCHECKLIB WARN_UNUSED Settings {
/** cppcheck.cfg: About text */
std::string cppcheckCfgAbout;

/** @brief check Emacs marker to detect extension-less and *.h files as C++ */
bool cppHeaderProbe{};
danmar marked this conversation as resolved.
Show resolved Hide resolved

/** @brief Are we running from DACA script? */
bool daca{};

Expand Down
2 changes: 1 addition & 1 deletion lib/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8067,7 +8067,7 @@ void Tokenizer::unmatchedToken(const Token *tok) const
void Tokenizer::syntaxErrorC(const Token *tok, const std::string &what) const
{
printDebugOutput(0);
throw InternalError(tok, "Code '"+what+"' is invalid C code. Use --std or --language to configure the language.", InternalError::SYNTAX);
throw InternalError(tok, "Code '"+what+"' is invalid C code.", "Use --std, -x or --language to enforce C++. Or --cpp-header-probe to identify C++ headers via the Emacs marker.", InternalError::SYNTAX);
}

void Tokenizer::unknownMacroError(const Token *tok1) const
Expand Down
2 changes: 1 addition & 1 deletion lib/tokenlist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ void TokenList::determineCppC()
// only try to determine if it wasn't enforced
if (mLang == Standards::Language::None) {
ASSERT_LANG(!getSourceFilePath().empty());
mLang = Path::identify(getSourceFilePath());
mLang = Path::identify(getSourceFilePath(), mSettings ? mSettings->cppHeaderProbe : false);
// TODO: cannot enable assert as this might occur for unknown extensions
//ASSERT_LANG(mLang != Standards::Language::None);
if (mLang == Standards::Language::None) {
Expand Down
4 changes: 3 additions & 1 deletion releasenotes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ Deprecations:
-

Other:
- Add support for 'CLICOLOR_FORCE'/'NO_COLOR' environment variables to force/disable ANSI color output for diagnostics.
- Add support for 'CLICOLOR_FORCE'/'NO_COLOR' environment variables to force/disable ANSI color output for diagnostics.
- added command-line option `--cpp-header-probe` (and `--no-cpp-header-probe`) to probe headers and extension-less files for Emacs marker (see https://trac.cppcheck.net/ticket/10692 for more details)
-
29 changes: 29 additions & 0 deletions test/cli/other_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,3 +1487,32 @@ def test_markup_lang(tmpdir):

exitcode, stdout, _ = cppcheck(args)
assert exitcode == 0, stdout


def test_cpp_probe(tmpdir):
test_file = os.path.join(tmpdir, 'test.h')
with open(test_file, 'wt') as f:
f.writelines([
'class A {};'
])

args = ['-q', '--template=simple', '--cpp-header-probe', '--verbose', test_file]
err_lines = [
# TODO: fix that awkward format
"{}:1:1: error: Code 'classA{{' is invalid C code.: Use --std, -x or --language to enforce C++. Or --cpp-header-probe to identify C++ headers via the Emacs marker. [syntaxError]".format(test_file)
]

assert_cppcheck(args, ec_exp=0, err_exp=err_lines, out_exp=[])


def test_cpp_probe_2(tmpdir):
test_file = os.path.join(tmpdir, 'test.h')
with open(test_file, 'wt') as f:
f.writelines([
'// -*- C++ -*-',
'class A {};'
])

args = ['-q', '--template=simple', '--cpp-header-probe', test_file]

assert_cppcheck(args, ec_exp=0, err_exp=[], out_exp=[])
4 changes: 2 additions & 2 deletions test/cli/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def assert_cppcheck(args, ec_exp=None, out_exp=None, err_exp=None, env=None):
assert exitcode == ec_exp, stdout
if out_exp is not None:
out_lines = stdout.splitlines()
assert out_lines == out_exp, stdout
assert out_lines == out_exp, out_lines
if err_exp is not None:
err_lines = stderr.splitlines()
assert err_lines == err_exp, stderr
assert err_lines == err_exp, err_lines
1 change: 1 addition & 0 deletions test/fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ class TestFixture : public ErrorLogger {
// *INDENT-ON*
#define ASSERT_EQUALS_WITHOUT_LINENUMBERS( EXPECTED, ACTUAL ) assertEqualsWithoutLineNumbers(__FILE__, __LINE__, EXPECTED, ACTUAL)
#define ASSERT_EQUALS_DOUBLE( EXPECTED, ACTUAL, TOLERANCE ) assertEqualsDouble(__FILE__, __LINE__, EXPECTED, ACTUAL, TOLERANCE)
#define ASSERT_EQUALS_LOC_MSG( EXPECTED, ACTUAL, MSG, FILE_, LINE_ ) assertEquals(FILE_, LINE_, EXPECTED, ACTUAL, MSG)
#define ASSERT_EQUALS_MSG( EXPECTED, ACTUAL, MSG ) assertEquals(__FILE__, __LINE__, EXPECTED, ACTUAL, MSG)
#define ASSERT_EQUALS_ENUM( EXPECTED, ACTUAL ) if (!assertEqualsEnum(__FILE__, __LINE__, (EXPECTED), (ACTUAL))) return
#define ASSERT_THROW( CMD, EXCEPTION ) do { try { CMD; assertThrowFail(__FILE__, __LINE__); } catch (const EXCEPTION&) {} catch (...) { assertThrowFail(__FILE__, __LINE__); } } while (false)
Expand Down
32 changes: 32 additions & 0 deletions test/testcmdlineparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ class TestCmdlineParser : public TestFixture {
TEST_CASE(checkLevelNormal);
TEST_CASE(checkLevelExhaustive);
TEST_CASE(checkLevelUnknown);
TEST_CASE(cppHeaderProbe);
TEST_CASE(cppHeaderProbe2);
TEST_CASE(noCppHeaderProbe);
TEST_CASE(noCppHeaderProbe2);

TEST_CASE(ignorepaths1);
TEST_CASE(ignorepaths2);
Expand Down Expand Up @@ -2610,6 +2614,34 @@ class TestCmdlineParser : public TestFixture {
ASSERT_EQUALS("cppcheck: error: unknown '--check-level' value 'default'.\n", logger->str());
}

void cppHeaderProbe() {
REDIRECT;
const char * const argv[] = {"cppcheck", "--cpp-header-probe", "file.cpp"};
ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(3, argv));
ASSERT_EQUALS(true, settings->cppHeaderProbe);
}

void cppHeaderProbe2() {
REDIRECT;
const char * const argv[] = {"cppcheck", "--no-cpp-header-probe", "--cpp-header-probe", "file.cpp"};
ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(4, argv));
ASSERT_EQUALS(true, settings->cppHeaderProbe);
}

void noCppHeaderProbe() {
REDIRECT;
const char * const argv[] = {"cppcheck", "--no-cpp-header-probe", "file.cpp"};
ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(3, argv));
ASSERT_EQUALS(false, settings->cppHeaderProbe);
}

void noCppHeaderProbe2() {
REDIRECT;
const char * const argv[] = {"cppcheck", "--cpp-header-probe", "--no-cpp-header-probe", "file.cpp"};
ASSERT_EQUALS_ENUM(CmdLineParser::Result::Success, parser->parseFromArgs(4, argv));
ASSERT_EQUALS(false, settings->cppHeaderProbe);
}

void ignorepaths1() {
REDIRECT;
const char * const argv[] = {"cppcheck", "-isrc", "file.cpp"};
Expand Down
Loading
Loading