Skip to content

Commit e9915c7

Browse files
Add parser for LEB128 integers. (#31)
* Add parser for LEB128 integers. * Add tests for LEB128 overflow. Fixed overflow check and added a max byte count check.
1 parent 24ff0d6 commit e9915c7

File tree

4 files changed

+176
-0
lines changed

4 files changed

+176
-0
lines changed

Sources/BinaryParsing/Parsers/Integer.swift

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,86 @@ extension FixedWidthInteger where Self: BitwiseCopyable {
696696
}
697697
self = try Self(_throwing: T(truncatingIfNeeded: result))
698698
}
699+
700+
/// Creates an integer by parsing a little-endian base 128 (LEB128) encoded value of this type's size
701+
/// from the start of the given parser span.
702+
///
703+
/// - Parameter input: The `ParserSpan` to parse from. If parsing succeeds,
704+
/// the start position of `input` is moved forward by the number of bytes consumed. This will
705+
/// usually be `ceil(N / 7)` where N is the minimum number of bits required to encode
706+
/// this integer. In rare cases an encoder may produce valid but unnecessary padding bytes,
707+
/// in which case the number of bytes consumed can be up to `ceil(bitWidth / 7)` where
708+
/// bitWidth is the full width of this type.
709+
/// - Throws: A `ParsingError` if `input` overflows the max value of this integer type,
710+
/// or if the maximum byte count for this type's size has been consumed.
711+
@inlinable
712+
@_lifetime(&input)
713+
public init(parsingLEB128 input: inout ParserSpan) throws(ParsingError) {
714+
var result: Self = 0
715+
var shift = 0
716+
var byte: UInt8 = 0
717+
while true {
718+
byte = try UInt8(parsing: &input)
719+
let lowBits = byte & 0x7F
720+
let availableBits = Self.bitWidth - shift
721+
let isFinalByte = (byte & 0x80) == 0
722+
if availableBits <= 0 {
723+
let maxBytes = (Self.bitWidth + 6) / 7
724+
let byteCount = shift / 7 + 1
725+
if byteCount > maxBytes {
726+
throw ParsingError(
727+
status: .invalidValue,
728+
location: input.startPosition)
729+
}
730+
// Allow padding bytes that do not affect the value
731+
let expectedBits: UInt8 = (result < 0) ? 0x7F : 0x00
732+
guard lowBits == expectedBits else {
733+
throw ParsingError(
734+
status: .invalidValue,
735+
location: input.startPosition)
736+
}
737+
} else if availableBits < 7 {
738+
let allowedMask: UInt8 = (1 &<< availableBits) &- 1
739+
let extraBits: UInt8 = lowBits & ~allowedMask
740+
if Self.isSigned {
741+
let signPadding: UInt8 = (~allowedMask) & 0x7F
742+
guard extraBits == signPadding || extraBits == 0 else {
743+
throw ParsingError(
744+
status: .invalidValue, location: input.startPosition)
745+
}
746+
} else {
747+
guard extraBits == 0 else {
748+
throw ParsingError(
749+
status: .invalidValue,
750+
location: input.startPosition)
751+
}
752+
}
753+
let part = Self(lowBits & allowedMask) << shift
754+
result |= part
755+
if Self.isSigned && isFinalByte {
756+
let finalByteNegative = (byte & 0x40) != 0
757+
let resultNegative = result & (1 << (Self.bitWidth - 1)) != 0
758+
if finalByteNegative != resultNegative {
759+
// The value's sign has flipped - it has wrapped around.
760+
throw ParsingError(
761+
status: .invalidValue,
762+
location: input.startPosition)
763+
}
764+
}
765+
} else {
766+
result |= Self(lowBits) &<< shift
767+
}
768+
shift += 7
769+
if isFinalByte { break }
770+
}
771+
if Self.isSigned {
772+
// Sign-extend if needed
773+
if shift < Self.bitWidth && (byte & 0x40) != 0 {
774+
result |= (~0) << shift
775+
}
776+
}
777+
self = result
778+
}
699779
}
700780

701781
extension RawRepresentable where RawValue: MultiByteInteger {

Tests/BinaryParsingTests/IntegerParsingTests.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ struct IntegerParsingTests {
171171
}
172172
}
173173
}
174+
175+
do {
176+
let lebEncoded = [UInt8](encodingLEB128: number)
177+
let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) }
178+
#expect(parsed == number)
179+
}
174180
}
175181

176182
try runTest(for: .zero)
@@ -258,6 +264,12 @@ struct IntegerParsingTests {
258264
}
259265
}
260266
}
267+
268+
do {
269+
let lebEncoded = [UInt8](encodingLEB128: number)
270+
let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) }
271+
#expect(parsed == number)
272+
}
261273
}
262274

263275
try runTest(for: .zero)
@@ -352,6 +364,12 @@ struct IntegerParsingTests {
352364
}
353365
}
354366
}
367+
368+
do {
369+
let lebEncoded = [UInt8](encodingLEB128: number)
370+
let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) }
371+
#expect(parsed == number)
372+
}
355373
}
356374

357375
try runTest(for: .zero)
@@ -621,4 +639,20 @@ struct IntegerParsingTests {
621639
try fuzzIntegerCasting(
622640
UInt.self, loadingFrom: UInt64.self, using: &rng)
623641
}
642+
643+
// Some LEB128 encoders output padding bytes which are considered
644+
// valid if the number of bytes does not exceed `ceil(bitWidth / 7)`.
645+
@Test(arguments: [
646+
([0x80, 0x81, 0x80, 0x00], 0x80),
647+
([0xFF, 0x00], 0x7F),
648+
([0xFF, 0x80, 0x00], 0x7F),
649+
([0x80, 0x81, 0x00], 0x80),
650+
([0x80, 0x81, 0x80, 0x00], 0x80),
651+
([0xFE, 0xFF, 0x7F], -0x02),
652+
])
653+
func validPaddingLEB128(input: [Int], expected: Int) throws {
654+
let lebEncoded = input.map(UInt8.init)
655+
let result = try lebEncoded.withParserSpan { try Int(parsingLEB128: &$0) }
656+
#expect(result == expected)
657+
}
624658
}

Tests/BinaryParsingTests/TestingSupport.swift

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,35 @@ extension Array where Element == UInt8 {
108108
Swift.withUnsafeBytes(of: value.littleEndian, Array.init)
109109
+ Array(repeating: paddingByte, count: paddingCount)
110110
}
111+
112+
init<T: FixedWidthInteger>(encodingLEB128 value: T) {
113+
var out: [UInt8] = []
114+
if T.isSigned {
115+
var v = value
116+
while true {
117+
var byte = UInt8(truncatingIfNeeded: v)
118+
v >>= 6 // Keep the sign bit
119+
let done = v == 0 || v == -1
120+
if done {
121+
byte &= 0x7F
122+
} else {
123+
v >>= 1
124+
byte |= 0x80
125+
}
126+
out.append(byte)
127+
if done { break }
128+
}
129+
} else {
130+
var v = value
131+
repeat {
132+
var byte = UInt8(truncatingIfNeeded: v)
133+
v >>= 7
134+
if v != 0 { byte |= 0x80 }
135+
out.append(byte)
136+
} while v != 0
137+
}
138+
self = out
139+
}
111140
}
112141

113142
/// A seeded random number generator type.

Tests/BinaryParsingTests/ThrowingOperationsTests.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,37 @@ struct ThrowingOperationsTests {
172172
}
173173
}
174174
}
175+
176+
@Test(arguments: [[0xFE, 0xFF, 0xFF, 0x7F]])
177+
func tooManyPaddingBytesLEB128(_ input: [Int]) throws {
178+
let lebEncoded = input.map(UInt8.init)
179+
#expect(throws: ParsingError.self) {
180+
try lebEncoded.withParserSpan { try Int16(parsingLEB128: &$0) }
181+
}
182+
}
183+
184+
@Test func overflowLEB128() async throws {
185+
func overflowTest<
186+
T: FixedWidthInteger & BitwiseCopyable, U: MultiByteInteger
187+
>(
188+
_ type: T.Type,
189+
value: U,
190+
) throws {
191+
let lebEncoded: [UInt8] = .init(encodingLEB128: value)
192+
#expect(throws: ParsingError.self) {
193+
try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) }
194+
}
195+
}
196+
for i in 1...100 {
197+
try overflowTest(Int8.self, value: Int16(Int8.min) - Int16(i))
198+
try overflowTest(Int8.self, value: Int16(Int8.max) + Int16(i))
199+
try overflowTest(UInt8.self, value: UInt16(UInt8.max) + UInt16(i))
200+
try overflowTest(Int16.self, value: Int32(Int16.min) - Int32(i))
201+
try overflowTest(Int16.self, value: Int32(Int16.max) + Int32(i))
202+
try overflowTest(UInt16.self, value: UInt32(UInt16.max) + UInt32(i))
203+
try overflowTest(Int32.self, value: Int64(Int32.min) - Int64(i))
204+
try overflowTest(Int32.self, value: Int64(Int32.max) + Int64(i))
205+
try overflowTest(UInt32.self, value: UInt64(UInt32.max) + UInt64(i))
206+
}
207+
}
175208
}

0 commit comments

Comments
 (0)