diff --git a/libyul/AsmAnalysis.cpp b/libyul/AsmAnalysis.cpp index 84eb39daa834..a234fdb813d1 100644 --- a/libyul/AsmAnalysis.cpp +++ b/libyul/AsmAnalysis.cpp @@ -68,6 +68,8 @@ bool AsmAnalyzer::analyze(Block const& _block) auto watcher = m_errorReporter.errorWatcher(); try { + // FIXME: Pass location of the object name. Now it's a location of first code section in yul + validateObjectStructure(nativeLocationOf(_block)); if (!(ScopeFiller(m_info, m_errorReporter))(_block)) return false; @@ -86,13 +88,13 @@ bool AsmAnalyzer::analyze(Block const& _block) AsmAnalysisInfo AsmAnalyzer::analyzeStrictAssertCorrect(Dialect const& _dialect, Object const& _object) { - return analyzeStrictAssertCorrect(_dialect, _object.code()->root(), _object.qualifiedDataNames()); + return analyzeStrictAssertCorrect(_dialect, _object.code()->root(), _object.summarizeStructure()); } AsmAnalysisInfo AsmAnalyzer::analyzeStrictAssertCorrect( Dialect const& _dialect, Block const& _astRoot, - std::set const& _qualifiedDataNames + Object::Structure const _objectStructure ) { ErrorList errorList; @@ -103,7 +105,7 @@ AsmAnalysisInfo AsmAnalyzer::analyzeStrictAssertCorrect( errors, _dialect, {}, - _qualifiedDataNames + std::move(_objectStructure) ).analyze(_astRoot); yulAssert(success && !errors.hasErrors(), "Invalid assembly/yul code."); return analysisInfo; @@ -408,7 +410,7 @@ size_t AsmAnalyzer::operator()(FunctionCall const& _funCall) if (functionName == "datasize" || functionName == "dataoffset") { auto const& argumentAsLiteral = std::get(arg); - if (!m_dataNames.count(formatLiteral(argumentAsLiteral))) + if (!m_objectStructure.contains(formatLiteral(argumentAsLiteral))) m_errorReporter.typeError( 3517_error, nativeLocationOf(arg), @@ -766,3 +768,16 @@ bool AsmAnalyzer::validateInstructions(FunctionCall const& _functionCall) { return validateInstructions(_functionCall.functionName.name.str(), nativeLocationOf(_functionCall.functionName)); } + +void AsmAnalyzer::validateObjectStructure(langutil::SourceLocation _astRootLocation) +{ + if (m_eofVersion.has_value() && util::contains(m_objectStructure.objectName, '.')) // No dots in object name for EOF + m_errorReporter.syntaxError( + 9822_error, + _astRootLocation, + fmt::format( + "The object name \"{objectName}\" is invalid in EOF context. Object names must not contain 'dot' character.", + fmt::arg("objectName", m_objectStructure.objectName) + ) + ); +} diff --git a/libyul/AsmAnalysis.h b/libyul/AsmAnalysis.h index d8a2b2b1ac5f..7673e375ad20 100644 --- a/libyul/AsmAnalysis.h +++ b/libyul/AsmAnalysis.h @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -61,13 +62,13 @@ class AsmAnalyzer langutil::ErrorReporter& _errorReporter, Dialect const& _dialect, ExternalIdentifierAccess::Resolver _resolver = ExternalIdentifierAccess::Resolver(), - std::set _dataNames = {} + Object::Structure const _objectStructure = {} ): m_resolver(std::move(_resolver)), m_info(_analysisInfo), m_errorReporter(_errorReporter), m_dialect(_dialect), - m_dataNames(std::move(_dataNames)) + m_objectStructure(std::move(_objectStructure)) { if (EVMDialect const* evmDialect = dynamic_cast(&m_dialect)) { @@ -84,7 +85,7 @@ class AsmAnalyzer static AsmAnalysisInfo analyzeStrictAssertCorrect( Dialect const& _dialect, Block const& _astRoot, - std::set const& _qualifiedDataNames + Object::Structure const _objectStructure ); size_t operator()(Literal const& _literal); @@ -120,6 +121,8 @@ class AsmAnalyzer bool validateInstructions(std::string const& _instrIdentifier, langutil::SourceLocation const& _location); bool validateInstructions(FunctionCall const& _functionCall); + void validateObjectStructure(langutil::SourceLocation _astRootLocation); + yul::ExternalIdentifierAccess::Resolver m_resolver; Scope* m_currentScope = nullptr; /// Variables that are active at the current point in assembly (as opposed to @@ -131,7 +134,7 @@ class AsmAnalyzer std::optional m_eofVersion; Dialect const& m_dialect; /// Names of data objects to be referenced by builtin functions with literal arguments. - std::set m_dataNames; + Object::Structure m_objectStructure; ForLoop const* m_currentForLoop = nullptr; /// Worst side effects encountered during analysis (including within defined functions). SideEffects m_sideEffects; diff --git a/libyul/CompilabilityChecker.cpp b/libyul/CompilabilityChecker.cpp index 8ece236bf425..9cc6bd864320 100644 --- a/libyul/CompilabilityChecker.cpp +++ b/libyul/CompilabilityChecker.cpp @@ -44,7 +44,7 @@ CompilabilityChecker::CompilabilityChecker( yul::AsmAnalysisInfo analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect( noOutputDialect, _object.code()->root(), - _object.qualifiedDataNames() + _object.summarizeStructure() ); BuiltinContext builtinContext; diff --git a/libyul/Object.cpp b/libyul/Object.cpp index c6ba5311ad5c..4b8119ffa5b4 100644 --- a/libyul/Object.cpp +++ b/libyul/Object.cpp @@ -110,29 +110,60 @@ Json Object::toJson() const return ret; } -std::set Object::qualifiedDataNames() const + +std::set Object::Structure::topLevelSubObjectNames() const +{ + std::set topLevelObjectNames; + + for (auto const& path: objectPaths) + if (!util::contains(path, '.') && path != objectName) + topLevelObjectNames.insert(path); + + return topLevelObjectNames; +} + +Object::Structure Object::summarizeStructure() const { - std::set qualifiedNames = + Structure structure; + + structure.objectPaths = name.empty() || util::contains(name, '.') ? std::set{} : std::set{name}; + + structure.objectName = name; + for (std::shared_ptr const& subObjectNode: subObjects) { - yulAssert(qualifiedNames.count(subObjectNode->name) == 0, ""); + yulAssert(!structure.contains(subObjectNode->name)); if (util::contains(subObjectNode->name, '.')) continue; - qualifiedNames.insert(subObjectNode->name); + if (auto const* subObject = dynamic_cast(subObjectNode.get())) - for (auto const& subSubObj: subObject->qualifiedDataNames()) + { + structure.objectPaths.insert(subObjectNode->name); + + auto const subObjectStructure = subObject->summarizeStructure(); + + for (auto const& subSubObj: subObjectStructure.objectPaths) if (subObject->name != subSubObj) { - yulAssert(qualifiedNames.count(subObject->name + "." + subSubObj) == 0, ""); - qualifiedNames.insert(subObject->name + "." + subSubObj); + yulAssert(!structure.contains(subObject->name + "." + subSubObj)); + structure.objectPaths.insert(subObject->name + "." + subSubObj); + } + for (auto const& subSubObjData: subObjectStructure.dataPaths) + if (subObject->name != subSubObjData) + { + yulAssert(!structure.contains(subObject->name + "." + subSubObjData)); + structure.dataPaths.insert(subObject->name + "." + subSubObjData); } + } + else + structure.dataPaths.insert(subObjectNode->name); } - yulAssert(qualifiedNames.count("") == 0, ""); - return qualifiedNames; + yulAssert(!structure.contains("")); + return structure; } std::vector Object::pathToSubObject(std::string_view _qualifiedName) const diff --git a/libyul/Object.h b/libyul/Object.h index 230f97866e09..3a89db4bcc23 100644 --- a/libyul/Object.h +++ b/libyul/Object.h @@ -98,10 +98,32 @@ struct Object: public ObjectNode ) const override; /// @returns a compact JSON representation of the AST. Json toJson() const override; + + /// Summarizes the structure of the subtree rooted at a given object, + /// in particular the paths that can be used from within to refer to nested nodes (objects and data). + struct Structure + { + /// The name of the object + std::string objectName; + /// Available dot-separated paths to nested objects (relative to current object). + std::set objectPaths; + /// Available dot-separated paths to nested data entries (relative to current object). + std::set dataPaths; + + /// Checks if a path is available. + bool contains(std::string const& _path) const { return containsObject(_path) || containsData(_path); } + /// Checks if a path is available and leads to an object. + bool containsObject(std::string const& _path) const { return objectPaths.count(_path) > 0; } + /// Checks if a path is available and leads to a data entry. + bool containsData(std::string const& _path) const { return dataPaths.count(_path) > 0; } + + std::set topLevelSubObjectNames() const; + }; + /// @returns the set of names of data objects accessible from within the code of /// this object, including the name of object itself /// Handles all names containing dots as reserved identifiers, not accessible as data. - std::set qualifiedDataNames() const; + Structure summarizeStructure() const; /// @returns vector of subIDs if possible to reach subobject with @a _qualifiedName, throws otherwise /// For "B.C" should return vector of two values if success (subId of B and subId of C in B). diff --git a/libyul/YulStack.cpp b/libyul/YulStack.cpp index 8b559e23f46d..877e447c78db 100644 --- a/libyul/YulStack.cpp +++ b/libyul/YulStack.cpp @@ -166,7 +166,7 @@ bool YulStack::analyzeParsed(Object& _object) m_errorReporter, languageToDialect(m_language, m_evmVersion, m_eofVersion), {}, - _object.qualifiedDataNames() + _object.summarizeStructure() ); bool success = false; diff --git a/libyul/optimiser/StackCompressor.cpp b/libyul/optimiser/StackCompressor.cpp index 2c2ef4272904..6ad3899e96c5 100644 --- a/libyul/optimiser/StackCompressor.cpp +++ b/libyul/optimiser/StackCompressor.cpp @@ -257,7 +257,11 @@ std::tuple StackCompressor::run( Block astRoot = std::get(ASTCopier{}(_object.code()->root())); if (usesOptimizedCodeGenerator) { - yul::AsmAnalysisInfo analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect(_dialect, astRoot, _object.qualifiedDataNames()); + yul::AsmAnalysisInfo analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect( + _dialect, + astRoot, + _object.summarizeStructure() + ); std::unique_ptr cfg = ControlFlowGraphBuilder::build(analysisInfo, _dialect, astRoot); eliminateVariablesOptimizedCodegen( _dialect, diff --git a/libyul/optimiser/StackLimitEvader.cpp b/libyul/optimiser/StackLimitEvader.cpp index d3cb6418fd30..6ef4b267b67e 100644 --- a/libyul/optimiser/StackLimitEvader.cpp +++ b/libyul/optimiser/StackLimitEvader.cpp @@ -132,7 +132,11 @@ Block StackLimitEvader::run( auto astRoot = std::get(ASTCopier{}(_object.code()->root())); if (evmDialect && evmDialect->evmVersion().canOverchargeGasForCall()) { - yul::AsmAnalysisInfo analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect(*evmDialect, astRoot, _object.qualifiedDataNames()); + yul::AsmAnalysisInfo analysisInfo = yul::AsmAnalyzer::analyzeStrictAssertCorrect( + *evmDialect, + astRoot, + _object.summarizeStructure() + ); std::unique_ptr cfg = ControlFlowGraphBuilder::build(analysisInfo, *evmDialect, astRoot); run(_context, astRoot, StackLayoutGenerator::reportStackTooDeep(*cfg)); } diff --git a/test/libyul/Common.cpp b/test/libyul/Common.cpp index 6562e1743588..41b856edd2f0 100644 --- a/test/libyul/Common.cpp +++ b/test/libyul/Common.cpp @@ -84,7 +84,7 @@ std::pair, std::shared_ptr> yul::t if (!parserResult->hasCode() || errorReporter.hasErrors()) return {}; std::shared_ptr analysisInfo = std::make_shared(); - AsmAnalyzer analyzer(*analysisInfo, errorReporter, _dialect, {}, parserResult->qualifiedDataNames()); + AsmAnalyzer analyzer(*analysisInfo, errorReporter, _dialect, {}, parserResult->summarizeStructure()); // TODO this should be done recursively. if (!analyzer.analyze(parserResult->code()->root()) || errorReporter.hasErrors()) return {}; diff --git a/test/libyul/yulSyntaxTests/eof/object_name_in_eof.yul b/test/libyul/yulSyntaxTests/eof/object_name_in_eof.yul new file mode 100644 index 000000000000..bcbbca624cef --- /dev/null +++ b/test/libyul/yulSyntaxTests/eof/object_name_in_eof.yul @@ -0,0 +1,9 @@ +object "a.b" { + code {} +} + +// ==== +// EVMVersion: >=shanghai +// bytecodeFormat: >=EOFv1 +// ---- +// SyntaxError 9822: (24-26): The object name "a.b" is invalid in EOF context. Object names must not contain 'dot' character.