Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eof: Fix stack height calculation for non-returning function. #15636

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions libevmasm/Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -708,10 +708,10 @@ AssemblyItem Assembly::newFunctionCall(uint16_t _functionID) const
solAssert(_functionID < m_codeSections.size(), "Call to undeclared function.");
solAssert(_functionID > 0, "Cannot call section 0");
auto const& section = m_codeSections.at(_functionID);
if (section.outputs != 0x80)
return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs);
else
if (section.nonReturning)
return AssemblyItem::jumpToFunction(_functionID, section.inputs, section.outputs);
else
return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs);
}

AssemblyItem Assembly::newFunctionReturn() const
Expand All @@ -720,14 +720,14 @@ AssemblyItem Assembly::newFunctionReturn() const
return AssemblyItem::functionReturn();
}

uint16_t Assembly::createFunction(uint8_t _args, uint8_t _rets)
uint16_t Assembly::createFunction(uint8_t _args, uint8_t _rets, bool _nonReturning)
{
size_t functionID = m_codeSections.size();
solRequire(functionID < 1024, AssemblyException, "Too many functions for EOF");
solAssert(m_currentCodeSection == 0, "Functions need to be declared from the main block.");
solAssert(_rets <= 0x80, "Too many function returns.");
solAssert(_args <= 127, "Too many function inputs.");
m_codeSections.emplace_back(CodeSection{_args, _rets, {}});
solRequire(_rets <= 127, AssemblyException, "Too many function returns.");
solRequire(_args <= 127, AssemblyException, "Too many function inputs.");
m_codeSections.emplace_back(CodeSection{_args, _rets, _nonReturning, {}});
return static_cast<uint16_t>(functionID);
}

Expand Down Expand Up @@ -1103,7 +1103,8 @@ std::tuple<bytes, std::vector<size_t>, size_t> Assembly::createEOFHeader(std::se
for (auto const& codeSection: m_codeSections)
{
retBytecode.push_back(codeSection.inputs);
retBytecode.push_back(codeSection.outputs);
// According to EOF spec function output num equals 0x80 means non-returning function
retBytecode.push_back(codeSection.nonReturning ? 0x80 : codeSection.outputs);
appendBigEndianUint16(retBytecode, calculateMaxStackHeight(codeSection));
}

Expand Down Expand Up @@ -1522,7 +1523,8 @@ LinkerObject const& Assembly::assembleEOF() const

solRequire(!m_codeSections.empty(), AssemblyException, "Expected at least one code section.");
solRequire(
m_codeSections.front().inputs == 0 && m_codeSections.front().outputs == 0x80, AssemblyException,
m_codeSections.front().inputs == 0 && m_codeSections.front().outputs == 0 && m_codeSections.front().nonReturning,
AssemblyException,
"Expected the first code section to have zero inputs and be non-returning."
);

Expand Down Expand Up @@ -1624,12 +1626,11 @@ LinkerObject const& Assembly::assembleEOF() const
size_t const index = static_cast<uint16_t>(item.data());
solAssert(index < m_codeSections.size());
solAssert(item.functionSignature().argsNum <= 127);
solAssert(item.type() == JumpF || item.functionSignature().retsNum <= 127);
solAssert(item.type() == CallF || item.functionSignature().retsNum <= 128);
solAssert(item.functionSignature().retsNum <= 127);
solAssert(m_codeSections[index].inputs == item.functionSignature().argsNum);
solAssert(m_codeSections[index].outputs == item.functionSignature().retsNum);
// If CallF the function can continue.
solAssert(item.type() == JumpF || item.functionSignature().canContinue());
// If CallF the function cannot be non-returning.
solAssert(item.type() == JumpF || !m_codeSections[index].nonReturning);
appendBigEndianUint16(ret.bytecode, item.data());
break;
}
Expand Down
7 changes: 5 additions & 2 deletions libevmasm/Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Assembly
m_name(std::move(_name))
{
// Code section number 0 has to be non-returning.
m_codeSections.emplace_back(CodeSection{0, 0x80, {}});
m_codeSections.emplace_back(CodeSection{0, 0, true, {}});
}

std::optional<uint8_t> eofVersion() const { return m_eofVersion; }
Expand All @@ -72,7 +72,7 @@ class Assembly

AssemblyItem newFunctionCall(uint16_t _functionID) const;
AssemblyItem newFunctionReturn() const;
uint16_t createFunction(uint8_t _args, uint8_t _rets);
uint16_t createFunction(uint8_t _args, uint8_t _rets, bool _nonReturning);
void beginFunction(uint16_t _functionID);
void endFunction();

Expand Down Expand Up @@ -221,7 +221,10 @@ class Assembly
struct CodeSection
{
uint8_t inputs = 0;
// Number of outputs needs to be set properly even for non-returning function.
// It matters in case of stack height calculation of the function call instruction.
uint8_t outputs = 0;
bool nonReturning = false;
AssemblyItems items{};
};

Expand Down
2 changes: 1 addition & 1 deletion libevmasm/AssemblyItem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ size_t AssemblyItem::returnValues() const
return 1;
case JumpF:
case CallF:
return functionSignature().canContinue() ? functionSignature().retsNum : 0;
return functionSignature().retsNum;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This made me realize something: in Assembly::calculateMaxStackHeight() we're applying the stack height change even for terminating instructions, especially JUMPF, but the spec say:

terminating instructions do not need to update stack heights.

This must be making the calculation wrong in some cases and I'm also not confident that our semantic tests would catch that, so it would be good to cover that with some extra tests. For example a non-returning JUMPF with tons of inputs should affect the max height in a way that cause a mismatch with EVM's validation.

Fixing that might be a separate PR though. It's related to this one since it's also stack height calculation but I guess we could consider it a separate bug.

case AssignImmutable:
case UndefinedItem:
break;
Expand Down
8 changes: 3 additions & 5 deletions libevmasm/AssemblyItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class AssemblyItem
static AssemblyItem jumpToFunction(uint16_t _functionID, uint8_t _args, uint8_t _rets, langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create())
{
AssemblyItem result(JumpF, Instruction::JUMPF, _functionID, _debugData);
solAssert(_args <= 127 && _rets <= 128);
solAssert(_args <= 127 && _rets <= 127);
result.m_functionSignature = {_args, _rets};
return result;
}
Expand Down Expand Up @@ -292,12 +292,10 @@ class AssemblyItem

struct FunctionSignature
{
/// Number of EOF function arguments. must be less than 127
/// Number of EOF function arguments. must be less than 128
uint8_t argsNum;
/// Number of EOF function return values. Must be less than 128. 128(0x80) means that it's non-returning.
/// Number of EOF function return values. Must be less than 128.
uint8_t retsNum;

bool canContinue() const { return retsNum != 0x80;}
};

FunctionSignature const& functionSignature() const
Expand Down
3 changes: 2 additions & 1 deletion libyul/backends/evm/AbstractAssembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class AbstractAssembly

/// Registers a new function with given signature and returns its ID.
/// The function is initially empty and its body must be filled with instructions.
virtual FunctionID registerFunction(uint8_t _args, uint8_t _rets) = 0;
/// `_rets` even for non-returning function matters in case of stack height calculation.
virtual FunctionID registerFunction(uint8_t _args, uint8_t _rets, bool _nonReturning) = 0;
/// Selects a function as a target for newly appended instructions.
/// May only be called after the main code section is already filled and
/// must not be called when another function is already selected.
Expand Down
4 changes: 2 additions & 2 deletions libyul/backends/evm/EthAssemblyAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ std::pair<std::shared_ptr<AbstractAssembly>, AbstractAssembly::SubID> EthAssembl
return {std::make_shared<EthAssemblyAdapter>(*assembly), static_cast<size_t>(sub.data())};
}

AbstractAssembly::FunctionID EthAssemblyAdapter::registerFunction(uint8_t _args, uint8_t _rets)
AbstractAssembly::FunctionID EthAssemblyAdapter::registerFunction(uint8_t _args, uint8_t _rets, bool _nonReturning)
{
return m_assembly.createFunction(_args, _rets);
return m_assembly.createFunction(_args, _rets, _nonReturning);
}

void EthAssemblyAdapter::beginFunction(AbstractAssembly::FunctionID _functionID)
Expand Down
2 changes: 1 addition & 1 deletion libyul/backends/evm/EthAssemblyAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class EthAssemblyAdapter: public AbstractAssembly
void appendJumpToIf(LabelID _labelId, JumpType _jumpType) override;
void appendAssemblySize() override;
std::pair<std::shared_ptr<AbstractAssembly>, SubID> createSubAssembly(bool _creation, std::string _name = {}) override;
AbstractAssembly::FunctionID registerFunction(uint8_t _args, uint8_t _rets) override;
AbstractAssembly::FunctionID registerFunction(uint8_t _args, uint8_t _rets, bool _nonReturning) override;
void beginFunction(AbstractAssembly::FunctionID _functionID) override;
void endFunction() override;
void appendFunctionCall(FunctionID _functionID) override;
Expand Down
13 changes: 7 additions & 6 deletions libyul/backends/evm/NoOutputAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ std::pair<std::shared_ptr<AbstractAssembly>, AbstractAssembly::SubID> NoOutputAs
return {};
}

AbstractAssembly::FunctionID NoOutputAssembly::registerFunction(uint8_t _args, uint8_t _rets)
AbstractAssembly::FunctionID NoOutputAssembly::registerFunction(uint8_t _args, uint8_t _rets, bool)
{
yulAssert(m_context.numFunctions <= std::numeric_limits<AbstractAssembly::FunctionID>::max());
AbstractAssembly::FunctionID id = static_cast<AbstractAssembly::FunctionID>(m_context.numFunctions++);
m_context.functionSignatures[id] = std::make_pair(_args, _rets);
m_context.functionSignatures[id] = {_args, _rets};
return id;
}

Expand All @@ -139,22 +139,23 @@ void NoOutputAssembly::beginFunction(FunctionID _functionID)
void NoOutputAssembly::endFunction()
{
yulAssert(m_currentFunctionID != 0, "End function without begin function.");
auto const rets = m_context.functionSignatures.at(m_currentFunctionID).second;
yulAssert(rets == 0x80 || m_stackHeight == rets, "Stack height mismatch at function end.");
auto const [_, rets] = m_context.functionSignatures.at(m_currentFunctionID);
yulAssert(m_stackHeight == rets, "Stack height mismatch at function end.");
m_currentFunctionID = 0;
}

void NoOutputAssembly::appendFunctionCall(FunctionID _functionID)
{
auto [args, rets] = m_context.functionSignatures.at(_functionID);
m_stackHeight += static_cast<int>(rets) - static_cast<int>(args);
solAssert(m_stackHeight >= 0);
}

void NoOutputAssembly::appendFunctionReturn()
{
yulAssert(m_currentFunctionID != 0, "End function without begin function.");
auto const rets = m_context.functionSignatures.at(m_currentFunctionID).second;
yulAssert(rets == 0x80 || m_stackHeight == rets, "Stack height mismatch at function end.");
auto const [_, rets] = m_context.functionSignatures.at(m_currentFunctionID);
yulAssert(m_stackHeight == rets, "Stack height mismatch at function end.");
}

void NoOutputAssembly::appendDataOffset(std::vector<AbstractAssembly::SubID> const&)
Expand Down
2 changes: 1 addition & 1 deletion libyul/backends/evm/NoOutputAssembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class NoOutputAssembly: public AbstractAssembly

void appendAssemblySize() override;
std::pair<std::shared_ptr<AbstractAssembly>, SubID> createSubAssembly(bool _creation, std::string _name = "") override;
FunctionID registerFunction(uint8_t _args, uint8_t _rets) override;
FunctionID registerFunction(uint8_t _args, uint8_t _rets, bool _nonReturning) override;
void beginFunction(FunctionID) override;
void endFunction() override;
void appendFunctionCall(FunctionID _functionID) override;
Expand Down
10 changes: 4 additions & 6 deletions libyul/backends/evm/OptimizedEVMCodeTransform.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to make sure I understand what we're fixing here. inline_assembly_in_modifiers.sol triggers this assert:

yulAssert(m_assembly.stackHeight() == static_cast<int>(m_stack.size()), "");

The assert failing means that Assembly::stackHeight() (which is calculated by adding deposit(), i.e. returnValue() - arguments(), for each appended assembly item) does not match the stack height tracked internally by the Yul->EVM transform.

And the mismatch comes from the fact that for non-returning calls the transform uses the number of returns declared by the function while the assembly uses 0. The fix here is to change the calculation in assembly to use the number from function definition as well.

This means that in AssemblyItem::FunctionSignature the number of returns will never be 0x80, but the actual number of returns. Since AssemblyItem::returnValues() was the only place that checked for that special value, canContinue() is now unused and can be removed.

On the other hand in Assembly::CodeSection we never need to know the number of returns of non-returning functions so we could have left it as is. But changing it to match FunctionSignature and replacing the special value with a canContinue flag makes things simpler (e.g. we don't need to special-case it when comparing them).

Am I missing anything?

Copy link
Contributor Author

@rodiazet rodiazet Dec 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You get it right. Potentially we could leave CodeSection definition unchanged because the flag canContinue (nonReturning) is needed only to decide on CallF or JumpF when adding function call but I would leave it as it is now. It's clearer IMO.

Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ std::vector<StackTooDeepError> OptimizedEVMCodeTransform::run(
for (Scope::Function const* function: dfg->functions)
{
auto const& info = dfg->functionInfo.at(function);
yulAssert(info.parameters.size() <= 0x7f);
yulAssert(info.returnVariables.size() <= 0x7f);
// According to EOF spec function output num equals 0x80 means non-returning function
yulAssert(info.parameters.size() <= std::numeric_limits<uint8_t>::max());
yulAssert(info.returnVariables.size() <= std::numeric_limits<uint8_t>::max());
auto functionID = _assembly.registerFunction(
static_cast<uint8_t>(info.parameters.size()),
static_cast<uint8_t>(info.canContinue ? info.returnVariables.size() : 0x80)
static_cast<uint8_t>(info.returnVariables.size()),
!info.canContinue
);
_builtinContext.functionIDs[function] = functionID;
}
Expand Down Expand Up @@ -128,8 +128,6 @@ void OptimizedEVMCodeTransform::operator()(CFG::FunctionCall const& _call)
for (size_t i = 0; i < _call.function.get().numArguments + (useReturnLabel ? 1 : 0); ++i)
m_stack.pop_back();
// Push return values to m_stack.
if (!m_simulateFunctionsWithJumps)
yulAssert(_call.function.get().numReturns < 0x80, "Num of function output >= 128");
for (size_t index: ranges::views::iota(0u, _call.function.get().numReturns))
m_stack.emplace_back(TemporarySlot{_call.functionCall, index});
yulAssert(m_assembly.stackHeight() == static_cast<int>(m_stack.size()), "");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ contract C {
return true;
}
}
// ====
// bytecodeFormat: legacy,>=EOFv1
// ----
// f() -> true
// g() -> FAILURE