Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inconsistent negative subassembly indices between different sizeof(size_t) #15955

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Compiler Features:


Bugfixes:
* Commandline Interface: Fix possible inconsistency in subassembly IDs between target architectures in `--asm-json` output.
* SMTChecker: Fix incorrect analysis when only a subset of contracts is selected with `--model-checker-contracts`.


Expand Down
69 changes: 34 additions & 35 deletions libevmasm/Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <fmt/format.h>

#include <range/v3/algorithm/any_of.hpp>
#include <range/v3/algorithm/find_if.hpp>
#include <range/v3/view/drop_exactly.hpp>
#include <range/v3/view/enumerate.hpp>
#include <range/v3/view/map.hpp>
Expand Down Expand Up @@ -667,17 +668,17 @@ std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSO
return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{});
}

void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
{
_assembliesOnPath.push_back(this);
for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back())
for (_pathFromRoot.push_back(SubAssemblyID{0}); _pathFromRoot.back().value < m_subs.size(); ++_pathFromRoot.back().value)
{
for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot)
_assembliesOnPath[distanceFromRoot]->encodeSubPath(
_pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector>
);

m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
m_subs[_pathFromRoot.back().asIndex()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
}
}

Expand Down Expand Up @@ -798,20 +799,20 @@ std::map<u256, u256> const& Assembly::optimiseInternal(

// Run optimisation for sub-assemblies.
// TODO: verify and double-check this for EOF.
for (size_t subId = 0; subId < m_subs.size(); ++subId)
for (SubAssemblyID subID {0}; subID.value < m_subs.size(); ++subID.value)
{
OptimiserSettings settings = _settings;
Assembly& sub = *m_subs[subId];
Assembly& sub = *m_subs[subID.asIndex()];
std::set<size_t> referencedTags;
for (auto& codeSection: m_codeSections)
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId);
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subID);
std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal(
settings,
referencedTags
);
// Apply the replacements (can be empty).
for (auto& codeSection: m_codeSections)
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId);
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subID);
}

std::map<u256, u256> tagReplacements;
Expand Down Expand Up @@ -1188,7 +1189,7 @@ LinkerObject const& Assembly::assemble() const
[[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const
{
solRequire(_item.data() != 0, AssemblyException, "Invalid tag position.");
solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag.");
solRequire(_item.splitForeignPushTag().first.empty(), AssemblyException, "Foreign tag.");
solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large.");
size_t tagId = static_cast<size_t>(_item.data());
solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position.");
Expand Down Expand Up @@ -1259,10 +1260,10 @@ LinkerObject const& Assembly::assembleLegacy() const
if (item.type() == PushTag)
{
auto [subId, tagId] = item.splitForeignPushTag();
if (subId == std::numeric_limits<size_t>::max())
if (subId.empty())
continue;
assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id");
auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId);
solRequire(subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
auto subTagPosition = m_subs[subId.asIndex()]->m_tagPositionsInBytecode.at(tagId);
assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition));
}
Expand Down Expand Up @@ -1332,17 +1333,18 @@ LinkerObject const& Assembly::assembleLegacy() const
break;
case PushSub:
assembleInstruction([&]() {
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
ret.bytecode.push_back(dataRefPush);
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
subRefs.emplace(
SubAssemblyID{item.data()},
ret.bytecode.size()
);
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
});
break;
case PushSubSize:
{
assembleInstruction([&](){
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
auto s = subAssemblyById(SubAssemblyID{item.data()})->assemble().bytecode.size();
item.setPushedValue(u256(s));
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
Expand Down Expand Up @@ -1478,14 +1480,12 @@ LinkerObject const& Assembly::assembleLegacy() const
}
for (auto const& i: tagRefs)
{
size_t subId;
size_t tagId;
std::tie(subId, tagId) = i.second;
assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id");
auto [subId, tagId] = i.second;
solRequire(subId.empty() || subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
std::vector<size_t> const& tagPositions =
subId == std::numeric_limits<size_t>::max() ?
subId.empty() ?
m_tagPositionsInBytecode :
m_subs[subId]->m_tagPositionsInBytecode;
m_subs[subId.asIndex()]->m_tagPositionsInBytecode;
assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag.");
size_t pos = tagPositions[tagId];
assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
Expand Down Expand Up @@ -1796,47 +1796,46 @@ LinkerObject const& Assembly::assembleEOF() const
return ret;
}

std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const
std::vector<SubAssemblyID> Assembly::decodeSubPath(SubAssemblyID _subObjectId) const
{
if (_subObjectId < m_subs.size())
if (_subObjectId.value < m_subs.size())
return {_subObjectId};

auto subIdPathIt = find_if(
m_subPaths.begin(),
m_subPaths.end(),
auto subIdPathIt = ranges::find_if(
m_subPaths,
[_subObjectId](auto const& subId) { return subId.second == _subObjectId; }
);

assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, "");
return subIdPathIt->first;
}

size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath)
SubAssemblyID Assembly::encodeSubPath(std::vector<SubAssemblyID> const& _subPath)
{
assertThrow(!_subPath.empty(), AssemblyException, "");
if (_subPath.size() == 1)
{
assertThrow(_subPath[0] < m_subs.size(), AssemblyException, "");
solAssert(_subPath[0].value < m_subs.size());
return _subPath[0];
}

if (m_subPaths.find(_subPath) == m_subPaths.end())
if (!m_subPaths.contains(_subPath))
{
size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size();
assertThrow(objectId >= m_subs.size(), AssemblyException, "");
SubAssemblyID const objectId{std::numeric_limits<SubAssemblyID::value_type>::max() - m_subPaths.size()};
solAssert(objectId.value >= m_subs.size());
m_subPaths[_subPath] = objectId;
}

return m_subPaths[_subPath];
}

Assembly const* Assembly::subAssemblyById(size_t _subId) const
Assembly const* Assembly::subAssemblyById(SubAssemblyID const _subId) const
{
std::vector<size_t> subIds = decodeSubPath(_subId);
std::vector<SubAssemblyID> subIDs = decodeSubPath(_subId);
Assembly const* currentAssembly = this;
for (size_t currentSubId: subIds)
for (auto const& subID: subIDs)
{
currentAssembly = currentAssembly->m_subs.at(currentSubId).get();
currentAssembly = currentAssembly->m_subs.at(subID.asIndex()).get();
assertThrow(currentAssembly, AssemblyException, "");
}

Expand Down
23 changes: 12 additions & 11 deletions libevmasm/Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/LinkerObject.h>
#include <libevmasm/Exceptions.h>
#include <libevmasm/SubAssemblyID.h>

#include <liblangutil/DebugInfoSelection.h>
#include <liblangutil/EVMVersion.h>
Expand All @@ -47,9 +48,9 @@ using AssemblyPointer = std::shared_ptr<Assembly>;

class Assembly
{
using TagRefs = std::map<size_t, std::pair<size_t, size_t>>;
using TagRefs = std::map<size_t, std::pair<SubAssemblyID, size_t>>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use size_t for tag ID but that should really be uint64_t as well. Would be nice to change that too at some point.

The tag+sub pair would also be better off as a proper struct with methods for converting to/from u256. We wouldn't then have to hard-code all those masks and 64s all over the place.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, it would also make it easier to reason about the whole program and data flow.

using DataRefs = std::multimap<util::h256, unsigned>;
using SubAssemblyRefs = std::multimap<size_t, size_t>;
using SubAssemblyRefs = std::multimap<SubAssemblyID, size_t>;
using ProgramSizeRefs = std::vector<unsigned>;
using LinkRef = std::pair<size_t, std::string>;

Expand Down Expand Up @@ -81,8 +82,8 @@ class Assembly
AssemblyItem newData(bytes const& _data) { util::h256 h(util::keccak256(util::asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
bytes const& data(util::h256 const& _i) const { return m_data.at(_i); }
AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); }
Assembly& sub(size_t _sub) { return *m_subs.at(_sub); }
Assembly const& sub(SubAssemblyID const _sub) const { return *m_subs.at(_sub.asIndex()); }
Assembly& sub(SubAssemblyID const _sub) { return *m_subs.at(_sub.asIndex()); }
size_t numSubs() const { return m_subs.size(); }
AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
AssemblyItem newPushLibraryAddress(std::string const& _identifier);
Expand Down Expand Up @@ -142,9 +143,9 @@ class Assembly
/// Adds a subroutine to the code (in the data section) and pushes its size (via a tag)
/// on the stack. @returns the pushsub assembly item.
AssemblyItem appendSubroutine(AssemblyPointer const& _assembly) { auto sub = newSub(_assembly); append(newPushSubSize(size_t(sub.data()))); return sub; }
void pushSubroutineSize(size_t _subRoutine) { append(newPushSubSize(_subRoutine)); }
void pushSubroutineSize(SubAssemblyID _subRoutine) { append(newPushSubSize(_subRoutine.value)); }
/// Pushes the offset of the subroutine.
void pushSubroutineOffset(size_t _subRoutine) { append(AssemblyItem(PushSub, _subRoutine)); }
void pushSubroutineOffset(SubAssemblyID _subRoutine) { append(AssemblyItem(PushSub, _subRoutine.value)); }

/// Appends @a _data literally to the very end of the bytecode.
void appendToAuxiliaryData(bytes const& _data) { m_auxiliaryData += _data; }
Expand Down Expand Up @@ -216,8 +217,8 @@ class Assembly
/// Mark this assembly as invalid. Calling ``assemble`` on it will throw.
void markAsInvalid() { m_invalid = true; }

std::vector<size_t> decodeSubPath(size_t _subObjectId) const;
size_t encodeSubPath(std::vector<size_t> const& _subPath);
std::vector<SubAssemblyID> decodeSubPath(SubAssemblyID _subObjectId) const;
SubAssemblyID encodeSubPath(std::vector<SubAssemblyID> const& _subPath);

bool isCreation() const { return m_creation; }

Expand Down Expand Up @@ -265,9 +266,9 @@ class Assembly
private:
bool m_invalid = false;

Assembly const* subAssemblyById(size_t _subId) const;
Assembly const* subAssemblyById(SubAssemblyID _subId) const;

void encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});

std::shared_ptr<std::string const> sharedSourceName(std::string const& _name) const;

Expand Down Expand Up @@ -315,7 +316,7 @@ class Assembly

/// Map from a vector representing a path to a particular sub assembly to sub assembly id.
/// This map is used only for sub-assemblies which are not direct sub-assemblies (where path is having more than one value).
std::map<std::vector<size_t>, size_t> m_subPaths;
std::map<std::vector<SubAssemblyID>, SubAssemblyID> m_subPaths;

/// Contains the tag replacements relevant for super-assemblies.
/// If set, it means the optimizer has run and we will not run it again.
Expand Down
37 changes: 18 additions & 19 deletions libevmasm/AssemblyItem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ std::string toStringInHex(u256 _value)

}

AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
AssemblyItem AssemblyItem::toSubAssemblyTag(SubAssemblyID _subId) const
{
assertThrow(data() < (u256(1) << 64), util::Exception, "Tag already has subassembly set.");
Copy link
Member

@cameel cameel Mar 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this assert correct? I think it will fail when _subID is zero, but zero is a valid ID and an empty one is represented with max() instead. The message indicates that the function considers max() to be set and 0 to be unset, which does not seem right.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, now that the sub ID is well-defined as 64-bits, I think we should have asserts that the upper bits of data are actually zeros.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this assert correct? I think it will fail when _subID is zero, but zero is a valid ID and an empty one is represented with max() instead. The message indicates that the function considers max() to be set and 0 to be unset, which does not seem right.

You are not the only whose head hurts by this stuff :) the condition data() < (u256(1) << 64) checks just if data() fits into 64 bits, as we have

u256(std::numeric_limits<uint64_t>::max()) + 1 == (u256(1) << 64)

So it would pass if data() is zero and it indeed already asserts that the upper bits are zero. Or am I misunderstanding your concern?

assertThrow(m_type == PushTag || m_type == Tag, util::Exception, "");
Expand All @@ -61,20 +61,21 @@ AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
return r;
}

std::pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
std::pair<SubAssemblyID, size_t> AssemblyItem::splitForeignPushTag() const
{
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
u256 combined = u256(data());
size_t subId = static_cast<size_t>((combined >> 64) - 1);
// the combined u256 is 'dirty', so we can't use the conversion constructor of SubAssemblyID here
SubAssemblyID subID {static_cast<SubAssemblyID::value_type>((combined >> 64) - 1)};
size_t tag = static_cast<size_t>(combined & 0xffffffffffffffffULL);
return std::make_pair(subId, tag);
return std::make_pair(subID, tag);
}

size_t AssemblyItem::relativeJumpTagID() const
{
solAssert(m_type == RelativeJump || m_type == ConditionalRelativeJump);
auto const [subId, tagId] = splitForeignPushTag();
solAssert(subId == std::numeric_limits<size_t>::max(), "Relative jump to sub");
solAssert(subId.empty(), "Relative jump to sub");
return tagId;
}

Expand Down Expand Up @@ -130,13 +131,13 @@ std::pair<std::string, std::string> AssemblyItem::nameAndData(langutil::EVMVersi
util::unreachable();
}

void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
void AssemblyItem::setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag)
{
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId == std::numeric_limits<size_t>::max());
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId.empty());
u256 data = _tag;
if (_subId != std::numeric_limits<size_t>::max())
data |= (u256(_subId) + 1) << 64;
if (!_subId.empty())
data |= (u256(_subId.value) + 1) << 64;
setData(data);
}

Expand Down Expand Up @@ -352,13 +353,11 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
break;
case PushTag:
{
size_t sub{0};
size_t tag{0};
std::tie(sub, tag) = splitForeignPushTag();
if (sub == std::numeric_limits<size_t>::max())
auto [sub, tag] = splitForeignPushTag();
if (sub.empty())
text = std::string("tag_") + std::to_string(tag);
else
text = std::string("tag_") + std::to_string(sub) + "_" + std::to_string(tag);
text = std::string("tag_") + std::to_string(sub.value) + "_" + std::to_string(tag);
break;
}
case Tag:
Expand All @@ -372,8 +371,8 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
case PushSubSize:
{
std::vector<std::string> subPathComponents;
for (size_t subPathComponentId: _assembly.decodeSubPath(static_cast<size_t>(data())))
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId));
for (SubAssemblyID subPathComponentId: _assembly.decodeSubPath(SubAssemblyID{data()}))
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId.value));
text =
(type() == PushSub ? "dataOffset"s : "dataSize"s) +
"(" +
Expand Down Expand Up @@ -469,11 +468,11 @@ std::ostream& solidity::evmasm::operator<<(std::ostream& _out, AssemblyItem cons
break;
case PushTag:
{
size_t subId = _item.splitForeignPushTag().first;
if (subId == std::numeric_limits<size_t>::max())
SubAssemblyID subId = _item.splitForeignPushTag().first;
if (subId.empty())
_out << " PushTag " << _item.splitForeignPushTag().second;
else
_out << " PushTag " << subId << ":" << _item.splitForeignPushTag().second;
_out << " PushTag " << subId.value << ":" << _item.splitForeignPushTag().second;
break;
}
case Tag:
Expand Down
7 changes: 4 additions & 3 deletions libevmasm/AssemblyItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <libevmasm/Instruction.h>
#include <libevmasm/Exceptions.h>
#include <libevmasm/SubAssemblyID.h>
#include <liblangutil/DebugData.h>
#include <liblangutil/Exceptions.h>
#include <libsolutil/Common.h>
Expand Down Expand Up @@ -169,14 +170,14 @@ class AssemblyItem
AssemblyItem pushTag() const { solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump); return AssemblyItem(PushTag, data()); }
/// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies.
/// @param _subId the identifier of the subassembly the tag is taken from.
AssemblyItem toSubAssemblyTag(size_t _subId) const;
AssemblyItem toSubAssemblyTag(SubAssemblyID _subId) const;
/// @returns splits the data of the push tag into sub assembly id and actual tag id.
/// The sub assembly id of non-foreign push tags is -1.
std::pair<size_t, size_t> splitForeignPushTag() const;
std::pair<SubAssemblyID, size_t> splitForeignPushTag() const;
/// @returns relative jump target tag ID. Asserts that it is not foreign tag.
size_t relativeJumpTagID() const;
/// Sets sub-assembly part and tag for a push tag.
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);
void setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag);

AssemblyItemType type() const { return m_type; }
u256 const& data() const { solAssert(m_type != Operation && m_data != nullptr); return *m_data; }
Expand Down
Loading