diff --git a/contracts/utils/RLP.sol b/contracts/utils/RLP.sol index 5347be2fcd8..f5468ddb78b 100644 --- a/contracts/utils/RLP.sol +++ b/contracts/utils/RLP.sol @@ -17,6 +17,35 @@ import {Memory} from "./Memory.sol"; * * * https://github.com/succinctlabs/optimism-bedrock-contracts/blob/main/rlp/RLPWriter.sol * * https://github.com/succinctlabs/optimism-bedrock-contracts/blob/main/rlp/RLPReader.sol + * + * == Canonical vs Non-Canonical Encodings + * + * According to the Ethereum Yellow Paper, a "canonical" RLP encoding is the unique, minimal + * representation of a value. For scalar values (integers), this means: + * + * * No leading zero bytes (e.g., `0x0123` should be encoded as 2 bytes, not `0x000123` as 3 bytes) + * * Single bytes less than 0x80 must be encoded directly without a prefix wrapper + * * Zero is represented as an empty byte array (prefix `0x80`) + * + * A "non-canonical" encoding represents the same value but doesn't follow these minimality rules. + * For example, encoding the integer 1234 (0x04d2) with a leading zero as `0x830004d2` instead + * of the canonical `0x8204d2`. + * + * [IMPORTANT] + * ==== + * This implementation takes a permissive approach to decoding, accepting some non-canonical + * encodings (e.g., scalar values with leading zero bytes) that would be rejected by + * strict implementations like go-ethereum. This design choice prioritizes compatibility + * with diverse RLP encoders in the ecosystem over strict adherence to the Yellow Paper + * specification's canonicalization requirements. + * + * Users should be aware that: + * + * * Multiple different RLP encodings may decode to the same value (non-injective) + * * Encoding followed by decoding is guaranteed to work correctly + * * External RLP data from untrusted sources may have non-canonical encodings + * * Improperly wrapped single bytes (< 0x80) are still rejected as invalid + * ==== */ library RLP { using Accumulators for *; @@ -121,7 +150,12 @@ library RLP { } } - /// @dev Encode an address as RLP. + /** + * @dev Encode an address as RLP. + * + * The address is encoded with its leading zeros (if it has any). If someone wants to encode the address as a scalar, + * they can cast it to an uint256 and then call the corresponding {encode} function. + */ function encode(address input) internal pure returns (bytes memory result) { assembly ("memory-safe") { result := mload(0x40) @@ -136,7 +170,7 @@ library RLP { if (input < SHORT_OFFSET) { assembly ("memory-safe") { result := mload(0x40) - mstore(result, 1) // length of the encoded data: 1 byte + mstore(result, 0x01) // length of the encoded data: 1 byte mstore8(add(result, 0x20), or(input, mul(0x80, iszero(input)))) // input (zero is encoded as 0x80) mstore(0x40, add(result, 0x21)) // reserve memory } @@ -167,13 +201,17 @@ library RLP { return encode(bytes(input)); } - /// @dev Encode an array of bytes as RLP. + /** + * @dev Encode an array of bytes as RLP. + * This function expects an array of already encoded bytes, not raw bytes. + * Users should call {encode} on each element of the array before calling it. + */ function encode(bytes[] memory input) internal pure returns (bytes memory) { return _encode(input.concat(), LONG_OFFSET); } /// @dev Encode an encoder (list of bytes) as RLP - function encode(Encoder memory self) internal pure returns (bytes memory result) { + function encode(Encoder memory self) internal pure returns (bytes memory) { return _encode(self.acc.flatten(), LONG_OFFSET); } @@ -208,19 +246,55 @@ library RLP { * DECODING - READ FROM AN RLP ENCODED MEMORY SLICE * ****************************************************************************************************************/ - /// @dev Decode an RLP encoded bool. See {encode-bool} + /** + * @dev Decode an RLP encoded bool. See {encode-bool} + * + * NOTE: This function treats any non-zero value as `true`, which is more permissive + * than some implementations (e.g., go-ethereum only accepts `0x00` for false and `0x01` + * for true). For example, `0x02`, `0x03`, etc. will all decode as `true`. + */ function readBool(Memory.Slice item) internal pure returns (bool) { return readUint256(item) != 0; } - /// @dev Decode an RLP encoded address. See {encode-address} + /** + * @dev Decode an RLP encoded address. See {encode-address} + * + * [NOTE] + * ==== + * This function accepts both single-byte encodings (for values 0-127, including + * precompile addresses like 0x01) and the standard 21-byte encoding with the `0x94` prefix. + * For example, `0x01` decodes to `0x0000000000000000000000000000000000000001`. + * + * Additionally, like {readUint256}, this function accepts non-canonical encodings with + * leading zeros. For instance, both `0x01` and `0x940000000000000000000000000000000000000001` + * decode to the same address. + * ==== + */ function readAddress(Memory.Slice item) internal pure returns (address) { uint256 length = item.length(); require(length == 1 || length == 21, RLPInvalidEncoding()); return address(uint160(readUint256(item))); } - /// @dev Decode an RLP encoded uint256. See {encode-uint256} + /** + * @dev Decode an RLP encoded uint256. See {encode-uint256} + * + * [NOTE] + * ==== + * This function accepts non-canonical encodings with leading zero bytes for multi-byte values, + * which differs from the Ethereum Yellow Paper specification and some reference + * implementations like go-ethereum. For example, both `0x88ab54a98ceb1f0ad2` and + * `0x8900ab54a98ceb1f0ad2` will decode to the same uint256 value (12345678901234567890). + * + * However, single bytes less than 0x80 must NOT be wrapped with a prefix. For example, + * `0x8100` is invalid (should be `0x00`), but `0x820000` is valid (two zero bytes). + * + * This permissive behavior is intentional for compatibility with various RLP encoders + * in the ecosystem, but users should be aware that multiple RLP encodings may map + * to the same decoded value (non-injective decoding). + * ==== + */ function readUint256(Memory.Slice item) internal pure returns (uint256) { uint256 length = item.length(); require(length <= 33, RLPInvalidEncoding()); @@ -231,7 +305,14 @@ library RLP { return itemLength == 0 ? 0 : uint256(item.load(itemOffset)) >> (256 - 8 * itemLength); } - /// @dev Decode an RLP encoded bytes32. See {encode-bytes32} + /** + * @dev Decode an RLP encoded bytes32. See {encode-bytes32} + * + * NOTE: Since this function delegates to {readUint256}, it inherits the non-canonical + * encoding acceptance behavior for multi-byte values. Multiple RLP encodings with different + * leading zero bytes may decode to the same bytes32 value, but single bytes < 0x80 must + * not be wrapped with a prefix (e.g., `0x820000` is valid, but `0x8100` is not). + */ function readBytes32(Memory.Slice item) internal pure returns (bytes32) { return bytes32(readUint256(item)); } @@ -241,7 +322,7 @@ library RLP { (uint256 offset, uint256 length, ItemType itemType) = _decodeLength(item); require(itemType == ItemType.Data, RLPInvalidEncoding()); - // Length is checked by {toBytes} + // Length is checked by {slice} return item.slice(offset, length).toBytes(); } @@ -326,9 +407,7 @@ library RLP { * @dev Decodes an RLP `item`'s `length and type from its prefix. * Returns the offset, length, and type of the RLP item based on the encoding rules. */ - function _decodeLength( - Memory.Slice item - ) private pure returns (uint256 _offset, uint256 _length, ItemType _itemtype) { + function _decodeLength(Memory.Slice item) private pure returns (uint256, uint256, ItemType) { uint256 itemLength = item.length(); require(itemLength != 0, RLPInvalidEncoding()); diff --git a/test/utils/RLP.test.js b/test/utils/RLP.test.js index 4759afee7d1..6bf372f0b0d 100644 --- a/test/utils/RLP.test.js +++ b/test/utils/RLP.test.js @@ -32,6 +32,11 @@ describe('RLP', function () { await expect(this.mock.$decodeBool('0x80')).to.eventually.equal(false); // 0 await expect(this.mock.$decodeBool('0x01')).to.eventually.equal(true); // 1 + + // Non-canonical encodings treated as true + await expect(this.mock.$decodeBool('0x02')).to.eventually.equal(true); + await expect(this.mock.$decodeBool('0x03')).to.eventually.equal(true); + await expect(this.mock.$decodeBool('0x7f')).to.eventually.equal(true); }); it('encode/decode addresses', async function () { @@ -44,6 +49,24 @@ describe('RLP', function () { await expect(this.mock.$encode_address(addr)).to.eventually.equal(expected); await expect(this.mock.$decodeAddress(expected)).to.eventually.equal(addr); } + + await expect(this.mock.$decodeAddress('0x940000000000000000000000000000000000001234')).to.eventually.equal( + '0x0000000000000000000000000000000000001234', + ); // Canonical encoding (address as 20 bytes with prefix) + + // Single-byte encoding for precompile addresses + await expect(this.mock.$decodeAddress('0x01')).to.eventually.equal('0x0000000000000000000000000000000000000001'); + await expect(this.mock.$decodeAddress('0x940000000000000000000000000000000000000001')).to.eventually.equal( + '0x0000000000000000000000000000000000000001', + ); // 21-byte encoding + await expect(this.mock.$decodeAddress('0x05')).to.eventually.equal('0x0000000000000000000000000000000000000005'); + await expect(this.mock.$decodeAddress('0x940000000000000000000000000000000000000005')).to.eventually.equal( + '0x0000000000000000000000000000000000000005', + ); // 21-byte encoding + await expect(this.mock.$decodeAddress('0x7f')).to.eventually.equal('0x000000000000000000000000000000000000007f'); + await expect(this.mock.$decodeAddress('0x94000000000000000000000000000000000000007f')).to.eventually.equal( + '0x000000000000000000000000000000000000007f', + ); // 21-byte encoding }); it('encode/decode uint256', async function () { @@ -52,6 +75,18 @@ describe('RLP', function () { await expect(this.mock.$encode_uint256(input)).to.eventually.equal(expected); await expect(this.mock.$decodeUint256(expected)).to.eventually.equal(input); + + await expect(this.mock.$decodeUint256('0x88ab54a98ceb1f0ad2')).to.eventually.equal(12345678901234567890n); // Canonical encoding for 12345678901234567890 + await expect(this.mock.$decodeUint256('0x8900ab54a98ceb1f0ad2')).to.eventually.equal(12345678901234567890n); // Non-canonical encoding with leading zero for the same value + + await expect(this.mock.$decodeUint256('0x80')).to.eventually.equal(0n); // Canonical encoding for 0 + await expect(this.mock.$decodeUint256('0x820000')).to.eventually.equal(0n); // Non-canonical encoding with leading zero + await expect(this.mock.$decodeUint256('0x83000000')).to.eventually.equal(0n); // Non-canonical encoding with two leading zeros + await expect(this.mock.$decodeUint256('0x8400000000')).to.eventually.equal(0n); // Non-canonical encoding with three leading zeros + + await expect(this.mock.$decodeUint256('0x8204d2')).to.eventually.equal(1234n); // Canonical + await expect(this.mock.$decodeUint256('0x830004d2')).to.eventually.equal(1234n); // With leading zero + await expect(this.mock.$decodeUint256('0x84000004d2')).to.eventually.equal(1234n); // With two leading zeros } }); @@ -67,6 +102,25 @@ describe('RLP', function () { await expect(this.mock.$encode_bytes32(input)).to.eventually.equal(expected); await expect(this.mock.$decodeBytes32(expected)).to.eventually.equal(input); } + + await expect(this.mock.$decodeBytes32('0x8204d2')).to.eventually.equal( + '0x00000000000000000000000000000000000000000000000000000000000004d2', + ); // Canonical encoding for 1234 + await expect(this.mock.$decodeBytes32('0x830004d2')).to.eventually.equal( + '0x00000000000000000000000000000000000000000000000000000000000004d2', + ); // Non-canonical encoding with leading zero + await expect(this.mock.$decodeBytes32('0x84000004d2')).to.eventually.equal( + '0x00000000000000000000000000000000000000000000000000000000000004d2', + ); // Non-canonical encoding with two leading zeros + + // Canonical encoding for zero and non-canonical encodings with leading zeros + await expect(this.mock.$decodeBytes32('0x80')).to.eventually.equal( + '0x0000000000000000000000000000000000000000000000000000000000000000', + ); + // 1 leading zero is not allowed for single bytes less than 0x80, they must be encoded as themselves + await expect(this.mock.$decodeBytes32('0x820000')).to.eventually.equal( + '0x0000000000000000000000000000000000000000000000000000000000000000', + ); // Non-canonical encoding with two leading zeros }); it('encode/decode empty byte', async function () { @@ -123,6 +177,7 @@ describe('RLP', function () { it('encode/decode strings', async function () { for (const input of [ '', // empty string + '000000000cat', // string with leading zeros 'dog', 'Lorem ipsum dolor sit amet, consectetur adipisicing elit', ]) {