diff --git a/include/clangmetatool/source_util.h b/include/clangmetatool/source_util.h index 49d30e3..87588c6 100644 --- a/include/clangmetatool/source_util.h +++ b/include/clangmetatool/source_util.h @@ -26,12 +26,24 @@ struct SourceUtil { }; /** - * Expand the given range by resolving macros and iterating past tokens. + * Expand the given range as much as possible by resolving macros and + * iterating past tokens. */ static clang::CharSourceRange expandRange(const clang::SourceRange &range, const clang::SourceManager &sourceManager); + /** + * Expand the given range as much as possible by resolving macros and + * iterating past tokens. This function will return an invalid range if no + * contiguous expansion can be found for the range, e.g. the begin and end of + * each expansion are in different macros. + */ + static clang::CharSourceRange + expandRangeIfValid(const clang::SourceRange &range, + const clang::SourceManager &sourceManager, + clang::Preprocessor &preprocessor); + /** * Return a range for the given statement, using the given source manager to * locate code. The range will point to the location after expanding macros. diff --git a/src/source_util.cpp b/src/source_util.cpp index 52f0669..80e5e40 100644 --- a/src/source_util.cpp +++ b/src/source_util.cpp @@ -46,15 +46,6 @@ const clang::MacroInfo *getMacroInfo(clang::SourceLocation location, return history->findDirectiveAtLoc(location, sourceManager).getMacroInfo(); } -llvm::ArrayRef -getMacroTokens(clang::SourceLocation location, - const clang::SourceManager &sourceManager, - clang::Preprocessor &preprocessor) { - // Get macro identifier for the given location - auto *macroInfo = getMacroInfo(location, sourceManager, preprocessor); - return macroInfo ? macroInfo->tokens() : llvm::ArrayRef(); -} - bool sourceRangeContainsOnly(clang::SourceLocation beginLocation, clang::SourceLocation endLocation, const std::string &allowed, @@ -74,15 +65,165 @@ bool sourceRangeContainsOnly(clang::SourceLocation beginLocation, return true; } +/** + * Record for a single macro expansion step, including the location of that + * expansion and the macro information at that point, if any. + */ +struct ExpansionFrame { + const clang::MacroInfo *macroInfo; + clang::SourceLocation location; +}; + +/** + * Return a sequence of macro expansion records for the given location at the + * front of a range, from least expanded (the macro definition) to most + * expanded (the macro use). Stop if any partial macro expansions are + * encountered. + */ +std::vector +expandBeginLocation(clang::SourceLocation begin, + const clang::SourceManager &sourceManager, + clang::Preprocessor &preprocessor) { + std::vector stack; + + while (begin.isMacroID()) { + // Get the macro information at this location + + auto macroInfo = getMacroInfo(begin, sourceManager, preprocessor); + + // If a macro in the hierarchy uses the GCC '##' extension (see [1]) + // we can't easily trace up the context stack how the statement is formed + // from component macros. Cop out and return. + // [1]: https://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html + + if (macroInfo->isVariadic() && macroInfo->hasCommaPasting()) { + return stack; + } + + // Add the current location to the stack + + stack.push_back(ExpansionFrame{macroInfo, begin}); + + if (sourceManager.isMacroBodyExpansion(begin)) { + // Handle the case where the location is in a macro body + + // Check that there are only spaces or '(' between the beginning of the + // macro and part corresponding to the beginning of the statement. + + llvm::ArrayRef tokens = macroInfo->tokens(); + if (!tokens.empty()) { + clang::SourceLocation macroStart = tokens.front().getLocation(); + + // FIXME + // There is potentially a bug here, this is unable to deal with macros + // that expand to more than one access expression + clang::SourceLocation statementStart = + sourceManager.getSpellingLoc(begin); + + if (!sourceRangeContainsOnly(macroStart, statementStart, " \t(", + sourceManager)) { + return stack; + } + } + + // Move up one level closer to the expansion point. + + begin = sourceManager.getImmediateExpansionRange(begin).getBegin(); + } else { + // Handle the case where the location is in an argument to a function-like + // macro. + + // Start resolving the macro argument instead of the macro itself. + + begin = sourceManager.getImmediateSpellingLoc(begin); + } + } + + // Insert the fully expanded location into the stack, there is no macro + // information at this point. + + stack.push_back(ExpansionFrame{0, begin}); + return stack; +} + +/** + * Return a sequence of macro expansion records for the given location at the + * end of a range, from least expanded (the macro definition) to most + * expanded (the macro use). Stop if any partial macro expansions are + * encountered. + */ +std::vector +expandEndLocation(clang::SourceLocation end, + const clang::SourceManager &sourceManager, + clang::Preprocessor &preprocessor) { + std::vector stack; + + while (end.isMacroID()) { + // Get the macro information at this location + + auto macroInfo = getMacroInfo(end, sourceManager, preprocessor); + + // If a macro in the hierarchy uses the GCC '##' extension (see [1]) + // we can't easily trace up the context stack how the statement is formed + // from component macros. Cop out and return. + // [1]: https://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html + + if (macroInfo->isVariadic() && macroInfo->hasCommaPasting()) { + return stack; + } + + // Add the current location to the stack + + stack.push_back(ExpansionFrame{macroInfo, end}); + + if (sourceManager.isMacroBodyExpansion(end)) { + // Handle the case where the location is in a macro body + + // Check that there are only spaces or ')' between the end of the + // macro and part corresponding to the end of the statement. + + llvm::ArrayRef tokens = macroInfo->tokens(); + if (!tokens.empty()) { + clang::SourceLocation macroEnd = tokens.back().getEndLoc(); + clang::SourceLocation statementEnd = sourceManager.getSpellingLoc(end); + + statementEnd = clang::Lexer::getLocForEndOfToken( + statementEnd, 0, sourceManager, clang::LangOptions()); + + if (!sourceRangeContainsOnly(statementEnd, macroEnd, " \t)", + sourceManager)) { + return stack; + } + } + + // Move up one level closer to the expansion point. + + end = sourceManager.getImmediateExpansionRange(end).getEnd(); + } else { + // Handle the case where the location is in an argument to a function-like + // macro. + + // Start resolving the macro argument instead of the macro itself. + + end = sourceManager.getImmediateSpellingLoc(end); + } + } + + // Insert the fully expanded location into the stack, there is no macro + // information at this point. + + stack.push_back(ExpansionFrame{0, end}); + return stack; +} + } // namespace clang::CharSourceRange SourceUtil::expandRange(const clang::SourceRange &range, const clang::SourceManager &sourceManager) { - // Get the start location, resolving from macro definition to macro call - // location. The loop is adapted from 'clang::SourceManager::getFileLoc'. clang::SourceLocation begin = range.getBegin(); + while (!begin.isFileID()) { if (sourceManager.isMacroArgExpansion(begin)) { begin = sourceManager.getImmediateSpellingLoc(begin); @@ -109,6 +250,73 @@ SourceUtil::expandRange(const clang::SourceRange &range, return clang::CharSourceRange::getCharRange(begin, end); } +clang::CharSourceRange +SourceUtil::expandRangeIfValid(const clang::SourceRange &range, + const clang::SourceManager &sourceManager, + clang::Preprocessor &preprocessor) { + clang::SourceLocation begin = range.getBegin(); + clang::SourceLocation end = range.getEnd(); + + // Get the full set of expansion records for the front and end of the range. + // If there are none in either case then there is something in the code that + // we cannot handle. + + auto beginStack = expandBeginLocation(begin, sourceManager, preprocessor); + if (beginStack.empty()) { + return {}; + } + + auto endStack = expandEndLocation(end, sourceManager, preprocessor); + if (endStack.empty()) { + return {}; + } + + // Search through each record in the expansion of the front of the range, + // starting from the last, or most expanded, record. + + for (auto beginFrameIt = beginStack.rbegin(); + beginFrameIt != beginStack.rend(); ++beginFrameIt) { + // For each, search for a corresponding expansion record for the end of the + // range, matching by macro information. + + auto endFrameIt = + std::find_if(endStack.rbegin(), endStack.rend(), + [&beginFrameIt, &sourceManager](const auto &frame) { + return frame.macroInfo == beginFrameIt->macroInfo; + }); + + // If no match is found, keep iterating + + if (endFrameIt == endStack.rend()) { + continue; + } + + // Traverse the rest of the stack and ensure that any macro argument + // expansions match up. + + for (auto beginTempIt = beginFrameIt, endTempIt = endFrameIt; + beginTempIt != beginStack.rend() && endTempIt != endStack.rend(); + ++beginTempIt, ++endTempIt) { + if (sourceManager.isMacroArgExpansion(beginTempIt->location) != + sourceManager.isMacroArgExpansion(endTempIt->location)) { + return {}; + } + } + + // Form and return a range with the front and back locations + + begin = sourceManager.getImmediateSpellingLoc(beginFrameIt->location); + end = sourceManager.getImmediateSpellingLoc(endFrameIt->location); + end = clang::Lexer::getLocForEndOfToken(end, 0, sourceManager, + clang::LangOptions()); + return clang::CharSourceRange::getCharRange(begin, end); + } + + // No matching front and end records were found, return an invalid range. + + return {}; +} + clang::CharSourceRange SourceUtil::getRangeForStatement(const clang::Stmt &statement, const clang::SourceManager &sourceManager) { @@ -192,104 +400,21 @@ bool SourceUtil::isPartialMacro(const clang::SourceRange &sourceRange, if (sourceManager.isMacroArgExpansion(begin) != sourceManager.isMacroArgExpansion(end)) { // This catches macros which might receive other macros as arguments - return true; // partial macro - } - - auto usesGccVarargExtensionAtLoc = [&sourceManager, - &preprocessor](const auto &loc) { - if (auto *macroInfo = getMacroInfo(loc, sourceManager, preprocessor)) { - return macroInfo->isVariadic() && macroInfo->hasCommaPasting(); - } - return false; - }; - while (begin.isMacroID()) { - // If a macro in the heierarchy uses the GCC '##' extension (see [1]) - // we can't easily trace up the context stack how the statement is formed - // from component macros. Cop out, return true - // [1]: https://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html - if (usesGccVarargExtensionAtLoc(begin)) { - return true; - } - // Only process macros where the statement is in the body, not ones where - // it is an argument. - - if (sourceManager.isMacroBodyExpansion(begin)) { - // Check that there are only spaces or '(' between the beginning of the - // macro and part corresponding to the beginning of the statement. - - llvm::ArrayRef tokens = - getMacroTokens(begin, sourceManager, preprocessor); - if (!tokens.empty()) { - clang::SourceLocation macroStart = tokens.front().getLocation(); - - // FIXME - // There is potentially a bug here, this is unable to deal with macros - // that expand to more than one access expression - clang::SourceLocation statementStart = - sourceManager.getSpellingLoc(begin); - - if (!sourceRangeContainsOnly(macroStart, statementStart, " \t(", - sourceManager)) { - return true; - } - } - } - - // Move up one level closer to the expansion point. This code is adapted - // from 'clang::SourceManager::getImmediateMacroCallerLoc'. - - if (sourceManager.isMacroArgExpansion(begin)) { - begin = sourceManager.getImmediateSpellingLoc(begin); - } else { - begin = sourceManager.getImmediateExpansionRange(begin).getBegin(); - } + return true; } - // Trace through levels of macros that are expanded by the end of the - // statement. - - const clang::MacroInfo *prevMacro = nullptr; - - while (end.isMacroID()) { - // If a macro in the heierarchy uses the GCC '##' extension (see [1]) - // we can't easily trace up the context stack how the statement is formed - // from component macros. Cop out, return true - // [1]: https://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html - if (usesGccVarargExtensionAtLoc(end)) { - return true; - } - // Only process macros where the statement is in the body, not ones where - // it is an argument. - - if (sourceManager.isMacroBodyExpansion(end)) { - // Check that there are only spaces or '(' between the beginning of the - // macro and part corresponding to the beginning of the statement. - - llvm::ArrayRef tokens = - getMacroTokens(end, sourceManager, preprocessor); - if (!tokens.empty()) { - clang::SourceLocation macroEnd = tokens.back().getEndLoc(); - clang::SourceLocation statementEnd = sourceManager.getSpellingLoc(end); + // Ensure that all macros are fully expanded. That is, the expansion function + // should return a stack with a non-macro at the end. - statementEnd = clang::Lexer::getLocForEndOfToken( - statementEnd, 0, sourceManager, clang::LangOptions()); - - if (!sourceRangeContainsOnly(statementEnd, macroEnd, " \t)", - sourceManager)) { - return true; - } - } - } - - // Move up one level closer to the expansion point. This code is adapted - // from 'clang::SourceManager::getImmediateMacroCallerLoc'. + auto stack = expandBeginLocation(begin, sourceManager, preprocessor); + if (stack.empty() || stack.back().macroInfo) { + return true; + } - if (sourceManager.isMacroArgExpansion(end)) { - end = sourceManager.getImmediateSpellingLoc(end); - } else { - end = sourceManager.getImmediateExpansionRange(end).getEnd(); - } + stack = expandEndLocation(end, sourceManager, preprocessor); + if (stack.empty() || stack.back().macroInfo) { + return true; } return false; diff --git a/t/041-expand-range-if-valid.t.cpp b/t/041-expand-range-if-valid.t.cpp new file mode 100644 index 0000000..2a824b2 --- /dev/null +++ b/t/041-expand-range-if-valid.t.cpp @@ -0,0 +1,161 @@ +#include "clangmetatool-testconfig.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace { + +using namespace clang::ast_matchers; + +class MyTool { +private: + typedef std::map ExpectedDataMap; + + clang::CompilerInstance* ci; + clangmetatool::MatchForwarder mf; + std::vector data; + ExpectedDataMap expectedData; + + class CommentHandler : public clang::CommentHandler { + ExpectedDataMap *expectedData; + + public: + CommentHandler(ExpectedDataMap *expectedData) + : expectedData(expectedData) { + } + + virtual bool HandleComment(clang::Preprocessor& preprocessor, + clang::SourceRange comment) override { + const clang::SourceManager& sourceManager = + preprocessor.getSourceManager(); + unsigned int line = sourceManager.getSpellingLineNumber( + comment.getBegin()); + std::string source = clang::Lexer::getSourceText( + clang::CharSourceRange::getTokenRange(comment), + sourceManager, + preprocessor.getLangOpts()).str(); + source.erase(0, source.find_first_not_of("/ ")); + (*expectedData)[line] = source; + return false; + } + }; + + CommentHandler commentHandler; + + void handleDeclRefExpr(const MatchFinder::MatchResult& r) { + data.push_back( + r.Nodes.getNodeAs("ref")->getSourceRange()); + } + +public: + MyTool(clang::CompilerInstance* ci, MatchFinder *f) + : ci(ci), mf(f), commentHandler(&expectedData) { + using namespace std::placeholders; + StatementMatcher beginMatcher = + declRefExpr(hasDeclaration(namedDecl(hasName("begin")))); + StatementMatcher endMatcher = + declRefExpr(hasDeclaration(namedDecl(hasName("end")))); + StatementMatcher matcher = + binaryOperator(hasLHS(ignoringParenImpCasts(beginMatcher)), + hasRHS(ignoringParenImpCasts(endMatcher))).bind("ref"); + mf.addMatcher(matcher, std::bind(&MyTool::handleDeclRefExpr, this, _1)); + + ci->getPreprocessor().addCommentHandler(&commentHandler); + } + + void postProcessing + (std::map &replacementsMap) { + const clang::SourceManager& sourceManager = ci->getSourceManager(); + clang::Preprocessor& preprocessor = ci->getPreprocessor(); + + for (auto match : data) { + clang::CharSourceRange range = + clangmetatool::SourceUtil::expandRangeIfValid(match, + sourceManager, + preprocessor); + + unsigned int line = sourceManager.getSpellingLineNumber( + sourceManager.getExpansionLoc(match.getBegin())); + + ExpectedDataMap::iterator expectedIt = expectedData.find(line); + if (expectedData.end() == expectedIt) { + EXPECT_NE(expectedIt, expectedData.end()) + << "line: " << line; + } + else { + std::string source = clang::Lexer::getSourceText( + range, + sourceManager, + preprocessor.getLangOpts()).str(); + + EXPECT_EQ(source, expectedIt->second) + << "line: " << line + << ", match: " << match.printToString(sourceManager) + << ", range: " << range.getAsRange().printToString(sourceManager); + } + } + } +}; + +} // namespace anonymous + +TEST(expandRangeIfValid, basic) { + llvm::cl::OptionCategory MyToolCategory("my-tool options"); + int argc = 4; + const char* argv[] = { + "foo", + CMAKE_SOURCE_DIR "/t/data/041-expand-range-if-valid/foo.cpp", + "--", + "-xc++", + "-Wno-unused-value" + }; + + auto result = clang::tooling::CommonOptionsParser::create( + argc, argv, MyToolCategory, llvm::cl::OneOrMore); + ASSERT_TRUE(!!result); + clang::tooling::CommonOptionsParser& optionsParser = result.get(); + + clang::tooling::RefactoringTool tool + (optionsParser.getCompilations(), optionsParser.getSourcePathList()); + clangmetatool::MetaToolFactory> + raf(tool.getReplacements()); + int r = tool.runAndSave(&raf); + ASSERT_EQ(0, r); +} + +// ---------------------------------------------------------------------------- +// Copyright 2021 Bloomberg Finance L.P. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ----------------------------- END-OF-FILE ---------------------------------- diff --git a/t/CMakeLists.txt b/t/CMakeLists.txt index 99a3043..8ae721a 100644 --- a/t/CMakeLists.txt +++ b/t/CMakeLists.txt @@ -57,6 +57,7 @@ foreach( 038-includegraph-nested-name 039-includegraph-nested-name-parameter 040-includegraph-nested-using + 041-expand-range-if-valid ) add_executable(${TEST}.t ${TEST}.t.cpp) diff --git a/t/data/041-expand-range-if-valid/foo.cpp b/t/data/041-expand-range-if-valid/foo.cpp new file mode 100644 index 0000000..974b963 --- /dev/null +++ b/t/data/041-expand-range-if-valid/foo.cpp @@ -0,0 +1,91 @@ +int begin; +int end; + +#define MACRO begin + end +#define BEGIN begin +#define END end +#define WITH_PARENS ( ( begin + end ) ) +#define TRANSITIVE_MACRO BEGIN + END +#define MATCH_IN_BODY(x) begin + end +#define TRANSITIVE_MATCH_IN_BODY(y) MATCH_IN_BODY(y) + +#define FUNC(x) x +#define PARTIAL_FUNC(x) 1 && x && 1 + +#define PARTIAL begin + end && 1 +#define PARTIAL2 1 && begin + end +#define PARTIAL3 1 && WITH_PARENS && 1 +#define PARTIAL4 1 && FUNC(begin + end) && 1 +#define PARTIAL5 1 && FUNC(FUNC(begin + end)) && 1 +#define PARTIAL6 1 && FUNC(FUNC(begin + end) && 1) && 1 +#define PARTIAL7 1 && FUNC(1 && FUNC(begin + end)) && 1 +#define PARTIAL8 1 && MATCH_IN_BODY(4) && 1 +#define PARTIAL9 1&& TRANSITIVE_MACRO && 1 +#define PARTIAL10 1 && FUNC(TRANSITIVE_MACRO) && 1 + +#define ADD_BEGIN(x) begin + x +#define ADD_END(x) x + end +#define TRANSITIVE_INVALID_MACRO ADD_BEGIN(end) +#define END_WITH_EXTRA end && 1 +#define SPLIT_MACROS begin + END_WITH_EXTRA + +int main() { + // These cases can be fully expanded + + begin + end; // begin + end + BEGIN + END; // BEGIN + END + MACRO; // MACRO + WITH_PARENS; // WITH_PARENS + TRANSITIVE_MACRO; // TRANSITIVE_MACRO + MATCH_IN_BODY(1); // MATCH_IN_BODY(1) + TRANSITIVE_MATCH_IN_BODY(2); // TRANSITIVE_MATCH_IN_BODY(2) + + // These cases expand partially + + FUNC(begin + end); // begin + end + FUNC(FUNC(begin + end)); // begin + end + FUNC(begin + end && 1); // begin + end + FUNC(1 && begin + end); // begin + end + FUNC(FUNC(begin + end) && 1); // begin + end + FUNC(1 && FUNC(begin + end)); // begin + end + FUNC(MATCH_IN_BODY(3)); // MATCH_IN_BODY(3) + PARTIAL_FUNC(begin + end); // begin + end + PARTIAL_FUNC(MACRO); // MACRO + PARTIAL_FUNC(TRANSITIVE_MACRO); // TRANSITIVE_MACRO + + // These cases expand inside a macro + + PARTIAL; // begin + end + PARTIAL2; // begin + end + PARTIAL3; // WITH_PARENS + PARTIAL4; // begin + end + PARTIAL5; // begin + end + PARTIAL6; // begin + end + PARTIAL7; // begin + end + PARTIAL8; // MATCH_IN_BODY(4) + PARTIAL9; // TRANSITIVE_MACRO + PARTIAL10; // TRANSITIVE_MACRO + + // These cases are invalid (empty comment = invalid) + + ADD_BEGIN(end); // + ADD_END(begin); // + TRANSITIVE_INVALID_MACRO; // + SPLIT_MACROS; // +} + +// ---------------------------------------------------------------------------- +// Copyright 2021 Bloomberg Finance L.P. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ----------------------------- END-OF-FILE ----------------------------------