Skip to content

Commit

Permalink
Merge pull request #15550 from ipsilon/eof-functions
Browse files Browse the repository at this point in the history
eof: Support functions (`CALLF`, `RETF`, `JUMPF`)
  • Loading branch information
cameel authored Nov 29, 2024
2 parents 89e43cb + 363f3dd commit b4ecc58
Show file tree
Hide file tree
Showing 36 changed files with 563 additions and 70 deletions.
81 changes: 77 additions & 4 deletions libevmasm/Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,15 @@ void Assembly::assemblyStream(
f.feed(i, _debugInfoSelection);
f.flush();

// Implementing this requires introduction of CALLF, RETF and JUMPF
if (m_codeSections.size() > 1)
solUnimplemented("Add support for more code sections");
for (size_t i = 1; i < m_codeSections.size(); ++i)
{
_out << std::endl << _prefix << "code_section_" << i << ": assembly {\n";
Functionalizer codeSectionF(_out, _prefix + " ", _sourceCodes, *this);
for (auto const& item: m_codeSections[i].items)
codeSectionF.feed(item, _debugInfoSelection);
codeSectionF.flush();
_out << _prefix << "}" << std::endl;
}

if (!m_data.empty() || !m_subs.empty())
{
Expand Down Expand Up @@ -696,6 +702,49 @@ AssemblyItem Assembly::namedTag(std::string const& _name, size_t _params, size_t
return AssemblyItem{Tag, m_namedTags.at(_name).id};
}

AssemblyItem Assembly::newFunctionCall(uint16_t _functionID) const
{
solAssert(_functionID < m_codeSections.size(), "Call to undeclared function.");
solAssert(_functionID > 0, "Cannot call section 0");
auto const& section = m_codeSections.at(_functionID);
if (section.outputs != 0x80)
return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs);
else
return AssemblyItem::jumpToFunction(_functionID, section.inputs, section.outputs);
}

AssemblyItem Assembly::newFunctionReturn() const
{
solAssert(m_currentCodeSection != 0, "Appending function return without begin function.");
return AssemblyItem::functionReturn();
}

uint16_t Assembly::createFunction(uint8_t _args, uint8_t _rets)
{
size_t functionID = m_codeSections.size();
solRequire(functionID < 1024, AssemblyException, "Too many functions for EOF");
solAssert(m_currentCodeSection == 0, "Functions need to be declared from the main block.");
solAssert(_rets <= 0x80, "Too many function returns.");
solAssert(_args <= 127, "Too many function inputs.");
m_codeSections.emplace_back(CodeSection{_args, _rets, {}});
return static_cast<uint16_t>(functionID);
}

void Assembly::beginFunction(uint16_t _functionID)
{
solAssert(m_currentCodeSection == 0, "Attempted to begin a function before ending the last one.");
solAssert(_functionID != 0, "Attempt to begin a function with id 0");
solAssert(_functionID < m_codeSections.size(), "Attempt to begin an undeclared function.");
auto& section = m_codeSections.at(_functionID);
solAssert(section.items.empty(), "Function already defined.");
m_currentCodeSection = _functionID;
}
void Assembly::endFunction()
{
solAssert(m_currentCodeSection != 0, "End function without begin function.");
m_currentCodeSection = 0;
}

AssemblyItem Assembly::newPushLibraryAddress(std::string const& _identifier)
{
h256 h(util::keccak256(_identifier));
Expand Down Expand Up @@ -1402,6 +1451,7 @@ LinkerObject const& Assembly::assembleEOF() const
for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate)
{
auto const sectionStart = ret.bytecode.size();
solAssert(!codeSection.items.empty(), "Empty code section.");
for (AssemblyItem const& item: codeSection.items)
{
// store position of the invalid jump destination
Expand All @@ -1416,7 +1466,10 @@ LinkerObject const& Assembly::assembleEOF() const
item.instruction() != Instruction::RETURNCONTRACT &&
item.instruction() != Instruction::EOFCREATE &&
item.instruction() != Instruction::RJUMP &&
item.instruction() != Instruction::RJUMPI
item.instruction() != Instruction::RJUMPI &&
item.instruction() != Instruction::CALLF &&
item.instruction() != Instruction::JUMPF &&
item.instruction() != Instruction::RETF
);
solAssert(!(item.instruction() >= Instruction::PUSH0 && item.instruction() <= Instruction::PUSH32));
ret.bytecode += assembleOperation(item);
Expand Down Expand Up @@ -1475,6 +1528,26 @@ LinkerObject const& Assembly::assembleEOF() const
appendBigEndianUint16(ret.bytecode, item.data());
break;
}
case CallF:
case JumpF:
{
ret.bytecode.push_back(static_cast<uint8_t>(item.instruction()));
solAssert(item.data() <= std::numeric_limits<uint16_t>::max(), "Invalid callf/jumpf index value.");
size_t const index = static_cast<uint16_t>(item.data());
solAssert(index < m_codeSections.size());
solAssert(item.functionSignature().argsNum <= 127);
solAssert(item.type() == JumpF || item.functionSignature().retsNum <= 127);
solAssert(item.type() == CallF || item.functionSignature().retsNum <= 128);
solAssert(m_codeSections[index].inputs == item.functionSignature().argsNum);
solAssert(m_codeSections[index].outputs == item.functionSignature().retsNum);
// If CallF the function can continue.
solAssert(item.type() == JumpF || item.functionSignature().canContinue());
appendBigEndianUint16(ret.bytecode, item.data());
break;
}
case RetF:
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::RETF));
break;
default:
solAssert(false, "Unexpected opcode while assembling.");
}
Expand Down
19 changes: 19 additions & 0 deletions libevmasm/Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,17 @@ class Assembly
}

std::optional<uint8_t> eofVersion() const { return m_eofVersion; }
bool supportsFunctions() const { return m_eofVersion.has_value(); }
bool supportsRelativeJumps() const { return m_eofVersion.has_value(); }
AssemblyItem newTag() { assertThrow(m_usedTags < 0xffffffff, AssemblyException, ""); return AssemblyItem(Tag, m_usedTags++); }
AssemblyItem newPushTag() { assertThrow(m_usedTags < 0xffffffff, AssemblyException, ""); return AssemblyItem(PushTag, m_usedTags++); }

AssemblyItem newFunctionCall(uint16_t _functionID) const;
AssemblyItem newFunctionReturn() const;
uint16_t createFunction(uint8_t _args, uint8_t _rets);
void beginFunction(uint16_t _functionID);
void endFunction();

/// Returns a tag identified by the given name. Creates it if it does not yet exist.
AssemblyItem namedTag(std::string const& _name, size_t _params, size_t _returns, std::optional<uint64_t> _sourceID);
AssemblyItem newData(bytes const& _data) { util::h256 h(util::keccak256(util::asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
Expand Down Expand Up @@ -111,6 +119,17 @@ class Assembly
return append(AssemblyItem::returnContract(_containerId));
}

AssemblyItem appendFunctionCall(uint16_t _functionID)
{
return append(newFunctionCall(_functionID));
}

AssemblyItem appendFunctionReturn()
{
solAssert(m_currentCodeSection != 0, "Appending function return without begin function.");
return append(newFunctionReturn());
}

AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; }
AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; }
AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; }
Expand Down
39 changes: 34 additions & 5 deletions libevmasm/AssemblyItem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ std::pair<std::string, std::string> AssemblyItem::nameAndData(langutil::EVMVersi
case ReturnContract:
case RelativeJump:
case ConditionalRelativeJump:
return {instructionInfo(instruction(), _evmVersion).name, m_data != nullptr ? toStringInHex(*m_data) : ""};
case CallF:
case JumpF:
case RetF:
return {instructionInfo(instruction(), _evmVersion).name, ""};
case Push:
return {"PUSH", toStringInHex(data())};
case PushTag:
Expand Down Expand Up @@ -139,6 +142,7 @@ size_t AssemblyItem::bytesRequired(size_t _addressLength, langutil::EVMVersion _
{
case Operation:
case Tag: // 1 byte for the JUMPDEST
case RetF:
return 1;
case Push:
return
Expand Down Expand Up @@ -181,6 +185,8 @@ size_t AssemblyItem::bytesRequired(size_t _addressLength, langutil::EVMVersion _
case RelativeJump:
case ConditionalRelativeJump:
case AuxDataLoadN:
case JumpF:
case CallF:
return 1 + 2;
case EOFCreate:
return 2;
Expand All @@ -195,10 +201,15 @@ size_t AssemblyItem::bytesRequired(size_t _addressLength, langutil::EVMVersion _

size_t AssemblyItem::arguments() const
{
if (hasInstruction())
if (type() == CallF || type() == JumpF)
return functionSignature().argsNum;
else if (hasInstruction())
{
solAssert(instruction() != Instruction::CALLF && instruction() != Instruction::JUMPF);
// The latest EVMVersion is used here, since the InstructionInfo is assumed to be
// the same across all EVM versions except for the instruction name.
return static_cast<size_t>(instructionInfo(instruction(), EVMVersion()).args);
}
else if (type() == VerbatimBytecode)
return std::get<0>(*m_verbatimBytecode);
else if (type() == AssignImmutable)
Expand All @@ -216,6 +227,7 @@ size_t AssemblyItem::returnValues() const
case ReturnContract:
case RelativeJump:
case ConditionalRelativeJump:
case RetF:
// The latest EVMVersion is used here, since the InstructionInfo is assumed to be
// the same across all EVM versions except for the instruction name.
return static_cast<size_t>(instructionInfo(instruction(), EVMVersion()).ret);
Expand All @@ -235,6 +247,9 @@ size_t AssemblyItem::returnValues() const
return std::get<1>(*m_verbatimBytecode);
case AuxDataLoadN:
return 1;
case JumpF:
case CallF:
return functionSignature().canContinue() ? functionSignature().retsNum : 0;
case AssignImmutable:
case UndefinedItem:
break;
Expand All @@ -253,6 +268,9 @@ bool AssemblyItem::canBeFunctional() const
case ReturnContract:
case RelativeJump:
case ConditionalRelativeJump:
case CallF:
case JumpF:
case RetF:
return !isDupInstruction(instruction()) && !isSwapInstruction(instruction());
case Push:
case PushTag:
Expand Down Expand Up @@ -383,6 +401,15 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
case ConditionalRelativeJump:
text = "rjumpi{" + std::string("tag_") + std::to_string(relativeJumpTagID()) + "}";
break;
case CallF:
text = "callf{" + std::string("code_section_") + std::to_string(static_cast<size_t>(data())) + "}";
break;
case JumpF:
text = "jumpf{" + std::string("code_section_") + std::to_string(static_cast<size_t>(data())) + "}";
break;
case RetF:
text = "retf";
break;
}
if (m_jumpType == JumpType::IntoFunction || m_jumpType == JumpType::OutOfFunction)
{
Expand All @@ -405,6 +432,9 @@ std::ostream& solidity::evmasm::operator<<(std::ostream& _out, AssemblyItem cons
case ReturnContract:
case RelativeJump:
case ConditionalRelativeJump:
case CallF:
case JumpF:
case RetF:
_out << " " << instructionInfo(_item.instruction(), EVMVersion()).name;
if (_item.instruction() == Instruction::JUMP || _item.instruction() == Instruction::JUMPI)
_out << "\t" << _item.getJumpTypeAsString();
Expand Down Expand Up @@ -509,10 +539,9 @@ std::string AssemblyItem::computeSourceMapping(
static_cast<int>(_sourceIndicesMap.at(*location.sourceName)) :
-1;
char jump = '-';
// TODO: Uncomment when EOF functions introduced.
if (item.getJumpType() == evmasm::AssemblyItem::JumpType::IntoFunction /*|| item.type() == CallF || item.type() == JumpF*/)
if (item.getJumpType() == evmasm::AssemblyItem::JumpType::IntoFunction || item.type() == CallF || item.type() == JumpF)
jump = 'i';
else if (item.getJumpType() == evmasm::AssemblyItem::JumpType::OutOfFunction /*|| item.type() == RetF*/)
else if (item.getJumpType() == evmasm::AssemblyItem::JumpType::OutOfFunction || item.type() == RetF)
jump = 'o';
int modifierDepth = static_cast<int>(item.m_modifierDepth);

Expand Down
49 changes: 47 additions & 2 deletions libevmasm/AssemblyItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ enum AssemblyItemType
ReturnContract, ///< Returns new container (with auxiliary data filled in) to be deployed
RelativeJump, ///< Jumps to relative position accordingly to its argument
ConditionalRelativeJump, ///< Same as RelativeJump but takes condition from the stack
CallF, ///< Jumps to a returning EOF function, adding a new frame to the return stack.
JumpF, ///< Jumps to a returning or non-returning EOF function without changing the return stack.
RetF, ///< Returns from an EOF function, removing a frame from the return stack.
VerbatimBytecode ///< Contains data that is inserted into the bytecode code section without modification.
};

Expand Down Expand Up @@ -105,6 +108,27 @@ class AssemblyItem
m_debugData{langutil::DebugData::create()}
{}

static AssemblyItem functionCall(uint16_t _functionID, uint8_t _args, uint8_t _rets, langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create())
{
// TODO: Make this constructor this way that it's impossible to create it without setting functions signature.
// It can be done by template constructor with Instruction as template parameter i.e. Same for JumpF below.
AssemblyItem result(CallF, Instruction::CALLF, _functionID, _debugData);
solAssert(_args <= 127 && _rets <= 127);
result.m_functionSignature = {_args, _rets};
return result;
}
static AssemblyItem jumpToFunction(uint16_t _functionID, uint8_t _args, uint8_t _rets, langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create())
{
AssemblyItem result(JumpF, Instruction::JUMPF, _functionID, _debugData);
solAssert(_args <= 127 && _rets <= 128);
result.m_functionSignature = {_args, _rets};
return result;
}
static AssemblyItem functionReturn(langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create())
{
return AssemblyItem(RetF, Instruction::RETF, 0, std::move(_debugData));
}

static AssemblyItem eofCreate(ContainerID _containerID, langutil::DebugData::ConstPtr _debugData = langutil::DebugData::create())
{
return AssemblyItem(EOFCreate, Instruction::EOFCREATE, _containerID, std::move(_debugData));
Expand Down Expand Up @@ -143,7 +167,7 @@ class AssemblyItem
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);

AssemblyItemType type() const { return m_type; }
u256 const& data() const { assertThrow(m_type != Operation, util::Exception, ""); return *m_data; }
u256 const& data() const { solAssert(m_type != Operation && m_data != nullptr); return *m_data; }
void setData(u256 const& _data) { assertThrow(m_type != Operation, util::Exception, ""); m_data = std::make_shared<u256>(_data); }

/// This function is used in `Assembly::assemblyJSON`.
Expand All @@ -164,7 +188,10 @@ class AssemblyItem
m_type == EOFCreate ||
m_type == ReturnContract ||
m_type == RelativeJump ||
m_type == ConditionalRelativeJump;
m_type == ConditionalRelativeJump ||
m_type == CallF ||
m_type == JumpF ||
m_type == RetF;
}
/// @returns the instruction of this item (only valid if hasInstruction returns true)
Instruction instruction() const
Expand Down Expand Up @@ -263,12 +290,30 @@ class AssemblyItem

void setImmutableOccurrences(size_t _n) const { m_immutableOccurrences = _n; }

struct FunctionSignature
{
/// Number of EOF function arguments. must be less than 127
uint8_t argsNum;
/// Number of EOF function return values. Must be less than 128. 128(0x80) means that it's non-returning.
uint8_t retsNum;

bool canContinue() const { return retsNum != 0x80;}
};

FunctionSignature const& functionSignature() const
{
solAssert(m_type == CallF || m_type == JumpF);
solAssert(m_functionSignature.has_value());
return *m_functionSignature;
}

private:
size_t opcodeCount() const noexcept;

AssemblyItemType m_type;
Instruction m_instruction; ///< Only valid if m_type == Operation
std::shared_ptr<u256> m_data; ///< Only valid if m_type != Operation
std::optional<FunctionSignature> m_functionSignature; ///< Only valid if m_type == CallF or JumpF
/// If m_type == VerbatimBytecode, this holds number of arguments, number of
/// return variables and verbatim bytecode.
std::optional<std::tuple<size_t, size_t, bytes>> m_verbatimBytecode;
Expand Down
3 changes: 3 additions & 0 deletions libevmasm/GasMeter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,10 @@ unsigned GasMeter::runGas(Instruction _instruction, langutil::EVMVersion _evmVer
case Tier::RJump: return GasCosts::tier1Gas;
case Tier::RJumpI: return GasCosts::rjumpiGas;
case Tier::VeryLow: return GasCosts::tier2Gas;
case Tier::RetF: return GasCosts::tier2Gas;
case Tier::Low: return GasCosts::tier3Gas;
case Tier::CallF: return GasCosts::tier3Gas;
case Tier::JumpF: return GasCosts::tier3Gas;
case Tier::Mid: return GasCosts::tier4Gas;
case Tier::High: return GasCosts::tier5Gas;
case Tier::BlockHash: return GasCosts::tier6Gas;
Expand Down
6 changes: 6 additions & 0 deletions libevmasm/Instruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ std::map<std::string, Instruction> const solidity::evmasm::c_instructions =
{ "LOG3", Instruction::LOG3 },
{ "LOG4", Instruction::LOG4 },
{ "DATALOADN", Instruction::DATALOADN },
{ "CALLF", Instruction::CALLF },
{ "RETF", Instruction::RETF },
{ "JUMPF", Instruction::JUMPF },
{ "RJUMP", Instruction::RJUMP },
{ "RJUMPI", Instruction::RJUMPI },
{ "EOFCREATE", Instruction::EOFCREATE },
Expand Down Expand Up @@ -330,6 +333,9 @@ static std::map<Instruction, InstructionInfo> const c_instructionInfo =
{Instruction::LOG2, {"LOG2", 0, 4, 0, true, Tier::Special}},
{Instruction::LOG3, {"LOG3", 0, 5, 0, true, Tier::Special}},
{Instruction::LOG4, {"LOG4", 0, 6, 0, true, Tier::Special}},
{Instruction::RETF, {"RETF", 0, 0, 0, true, Tier::RetF}},
{Instruction::CALLF, {"CALLF", 2, 0, 0, true, Tier::CallF}},
{Instruction::JUMPF, {"JUMPF", 2, 0, 0, true, Tier::JumpF}},
{Instruction::EOFCREATE, {"EOFCREATE", 1, 4, 1, true, Tier::Special}},
{Instruction::RETURNCONTRACT, {"RETURNCONTRACT", 1, 2, 0, true, Tier::Special}},
{Instruction::CREATE, {"CREATE", 0, 3, 1, true, Tier::Special}},
Expand Down
Loading

0 comments on commit b4ecc58

Please sign in to comment.