diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 0000000..54a4e61 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,37 @@ +version: '{build}' + +image: Visual Studio 2015 + +cache: + - NimBinaries + +matrix: + # We always want 32 and 64-bit compilation + fast_finish: false + +platform: + - x86 + - x64 + +# when multiple CI builds are queued, the tested commit needs to be in the last X commits cloned with "--depth X" +clone_depth: 10 + +install: + # use the newest versions documented here: https://www.appveyor.com/docs/windows-images-software/#mingw-msys-cygwin + - IF "%PLATFORM%" == "x86" SET PATH=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin;%PATH% + - IF "%PLATFORM%" == "x64" SET PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH% + + # build nim from our own branch - this to avoid the day-to-day churn and + # regressions of the fast-paced Nim development while maintaining the + # flexibility to apply patches + - curl -O -L -s -S https://raw.githubusercontent.com/status-im/nimbus-build-system/master/scripts/build_nim.sh + - env MAKE="mingw32-make -j2" ARCH_OVERRIDE=%PLATFORM% bash build_nim.sh Nim csources dist/nimble NimBinaries + - SET PATH=%CD%\Nim\bin;%PATH% + +build_script: + - nimble install -y + +test_script: + - nimble test + +deploy: off diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..9ee1b9e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,27 @@ +language: c + +# https://docs.travis-ci.com/user/caching/ +cache: + directories: + - NimBinaries + +git: + # when multiple CI builds are queued, the tested commit needs to be in the last X commits cloned with "--depth X" + depth: 10 + +os: + - linux + - osx + +install: + # build nim from our own branch - this to avoid the day-to-day churn and + # regressions of the fast-paced Nim development while maintaining the + # flexibility to apply patches + - curl -O -L -s -S https://raw.githubusercontent.com/status-im/nimbus-build-system/master/scripts/build_nim.sh + - env MAKE="make -j2" bash build_nim.sh Nim csources dist/nimble NimBinaries + - export PATH=$PWD/Nim/bin:$PATH + +script: + - nimble install -y + - nimble test + diff --git a/protobuf_serialization.nim b/protobuf_serialization.nim new file mode 100644 index 0000000..ba3110a --- /dev/null +++ b/protobuf_serialization.nim @@ -0,0 +1,376 @@ +import macros, strformat, typetraits, options +import faststreams + +template sint32*() {.pragma.} +template sint64*() {.pragma.} +template sfixed32*() {.pragma.} +template sfixed64*() {.pragma.} +template fixed32*() {.pragma.} +template fixed64*() {.pragma.} +template float*() {.pragma.} +template double*() {.pragma.} + +const + MaxMessageSize* = 1'u shl 22 + +type + ProtoBuffer* = object + fieldNum: int + outstream: OutputStreamVar + + ProtoWireType* = enum + ## Protobuf's field types enum + Varint, Fixed64, LengthDelimited, StartGroup, EndGroup, Fixed32 + + EncodingKind* = enum + ekNormal, ekZigzag + + ProtoField*[T] = object + ## Protobuf's message field representation object + index*: int + value*: T + + SomeSVarint* = int | int64 | int32 | int16 | int8 | enum + SomeByte* = byte | bool | char | uint8 + SomeUVarint* = uint | uint64 | uint32 | uint16 | SomeByte + SomeVarint* = SomeSVarint | SomeUVarint + SomeLengthDelimited* = string | seq[SomeByte] | cstring + SomeFixed64* = float64 + SomeFixed32* = float32 + SomeFixed* = SomeFixed32 | SomeFixed64 + + AnyProtoType* = SomeVarint | SomeLengthDelimited | SomeFixed | object + + UnexpectedTypeError* = object of ValueError + +proc newProtoBuffer*(): ProtoBuffer = + ProtoBuffer(outstream: OutputStream.init(), fieldNum: 1) + +proc output*(proto: ProtoBuffer): seq[byte] {.inline.} = + proto.outstream.getOutput + +template wireType(firstByte: byte): ProtoWireType = + (firstByte and 0b111).ProtoWireType + +template fieldNumber(firstByte: byte): int = + ((firstByte shr 3) and 0b1111).int + +template protoHeader*(fieldNum: int, wire: ProtoWireType): byte = + ## Get protobuf's field header integer for ``index`` and ``wire``. + ((cast[uint](fieldNum) shl 3) or cast[uint](wire)).byte + +template increaseBytesRead(amount = 1) = + ## Convenience template for increasing + ## all of the counts + mixin isSome + bytesRead += amount + outOffset += amount + outBytesProcessed += amount + if numBytesToRead.isSome(): + if (bytesRead > numBytesToRead.get()).unlikely: + raise newException(Exception, &"Number of bytes read ({bytesRead}) exceeded bytes requested ({numBytesToRead})") + +proc encodeField*[T: not AnyProtoType](protobuf: var ProtoBuffer, value: T) {.inline.} +proc encodeField*[T: not AnyProtoType](protobuf: var ProtoBuffer, fieldNum: int, value: T) {.inline.} +proc encodeField[T: not AnyProtoType](stream: OutputStreamVar, fieldNum: int, value: T) {.inline.} + +proc put(stream: OutputStreamVar, value: SomeVarint) {.inline.} = + when value is enum: + var value = cast[type(ord(value))](value) + elif value is bool or value is char: + var value = cast[byte](value) + else: + var value = value + + when type(value) is SomeSVarint: + # Encode using zigzag + if value < type(value)(0): + value = not(value shl type(value)(1)) + else: + value = value shl type(value)(1) + + while value > type(value)(0b0111_1111): + stream.append byte((value and 0b0111_1111) or 0b1000_0000) + value = value shr 7 + stream.append byte(value and 0b1111_1111) + +proc encodeField(stream: OutputStreamVar, fieldNum: int, value: SomeVarint) {.inline.} = + stream.append protoHeader(fieldNum, Varint) + stream.put(value) + +proc put(stream: OutputStreamVar, value: SomeFixed) {.inline.} = + when typeof(value) is SomeFixed64: + var value = cast[int64](value) + else: + var value = cast[int32](value) + + for _ in 0 ..< sizeof(value): + stream.append byte(value and 0b1111_1111) + value = value shr 8 + +proc encodeField(stream: OutputStreamVar, fieldNum: int, value: SomeFixed64) {.inline.} = + stream.append protoHeader(fieldNum, Fixed64) + stream.put(value) + +proc encodeField(stream: OutputStreamVar, fieldNum: int, value: SomeFixed32) {.inline.} = + stream.append protoHeader(fieldNum, Fixed32) + stream.put(value) + +proc put(stream: OutputStreamVar, value: SomeLengthDelimited) {.inline.} = + stream.put(len(value).uint) + for b in value: + stream.append byte(b) + +proc encodeField(stream: OutputStreamVar, fieldNum: int, value: SomeLengthDelimited) {.inline.} = + stream.append protoHeader(fieldNum, LengthDelimited) + stream.put(value) + +proc put(stream: OutputStreamVar, value: object) {.inline.} + +proc encodeField(stream: OutputStreamVar, fieldNum: int, value: object) {.inline.} = + # This is currently needed in order to get the size + # of the output before adding it to the stream. + # Maybe there is a better way to do this + let objStream = OutputStream.init() + objStream.put(value) + + let objOutput = objStream.getOutput() + if objOutput.len > 0: + stream.append protoHeader(fieldNum, LengthDelimited) + stream.put(objOutput) + +proc put(stream: OutputStreamVar, value: object) {.inline.} = + var fieldNum = 1 + for _, val in value.fieldPairs: + # Only store the value + if default(type(val)) != val: + stream.encodeField(fieldNum, val) + inc fieldNum + +proc encode*(protobuf: var ProtoBuffer, value: object) {.inline.} = + protobuf.outstream.put(value) + +proc encodeField*(protobuf: var ProtoBuffer, fieldNum: int, value: AnyProtoType) {.inline.} = + protobuf.outstream.encodeField(fieldNum, value) + +proc encodeField*(protobuf: var ProtoBuffer, value: AnyProtoType) {.inline.} = + protobuf.encodeField(protobuf.fieldNum, value) + inc protobuf.fieldNum + +proc encodeField[T: not AnyProtoType](stream: OutputStreamVar, fieldNum: int, value: T) {.inline.} = + stream.encodeField(fieldNum, value.toBytes) + +proc encodeField*[T: not AnyProtoType](protobuf: var ProtoBuffer, fieldNum: int, value: T) {.inline.} = + protobuf.outstream.encodeField(fieldNum, value.toBytes) + +proc encodeField*[T: not AnyProtoType](protobuf: var ProtoBuffer, value: T) {.inline.} = + protobuf.encodeField(protobuf.fieldNum, value.toBytes) + inc protobuf.fieldNum + +proc get*[T: SomeFixed]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): T {.inline.} = + var bytesRead = 0 + when T is SomeFixed64: + var value: int64 + else: + var value: int32 + var shiftAmount = 0 + + for _ in 0 ..< sizeof(T): + value += type(value)(bytes[outOffset]) shl shiftAmount + shiftAmount += 8 + increaseBytesRead() + + result = cast[T](value) + +proc get[T: SomeVarint]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): T {.inline.} = + var bytesRead = 0 + # Only up to 128 bits supported by the spec + when T is enum or T is char: + var value: type(ord(result)) + elif T is bool: + var value: byte + else: + var value: T + + var shiftAmount = 0 + while true: + value += type(value)(bytes[outOffset] and 0b0111_1111) shl shiftAmount + shiftAmount += 7 + if (bytes[outOffset] shr 7) == 0: + break + increaseBytesRead() + + increaseBytesRead() + + when ty is SomeSVarint: + if (value and type(value)(1)) != type(value)(0): + result = cast[T](not(value shr type(value)(1))) + else: + result = cast[T](value shr type(value)(1)) + else: + result = T(value) + +proc checkType[T: SomeVarint](tyByte: byte, ty: typedesc[T], offset: int) {.inline.} = + let wireTy = wireType(tyByte) + if wireTy != Varint: + raise newException(UnexpectedTypeError, fmt"Not a varint at offset {offset}! Received a {wireTy}") + +proc checkType[T: SomeFixed](tyByte: byte, ty: typedesc[T], offset: int) {.inline.} = + let wireTy = wireType(tyByte) + if wireTy notin {Fixed32, Fixed64}: + raise newException(UnexpectedTypeError, fmt"Not a fixed32 or fixed64 at offset {offset}! Received a {wireTy}") + +proc checkType[T: SomeLengthDelimited](tyByte: byte, ty: typedesc[T], offset: int) {.inline.} = + let wireTy = wireType(tyByte) + if wireTy != LengthDelimited: + raise newException(UnexpectedTypeError, fmt"Not a length delimited value at offset {offset}! Received a {wireTy}") + +proc checkType[T: object](tyByte: byte, ty: typedesc[T], offset: int) {.inline.} = + let wireTy = wireType(tyByte) + if wireTy != LengthDelimited: + raise newException(UnexpectedTypeError, fmt"Not an object value at offset {offset}! Received a {wireTy}") + +proc get*[T: SomeLengthDelimited]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): T {.inline.} = + var bytesRead = 0 + let decodedSize = bytes.get(uint, outOffset, outBytesProcessed, numBytesToRead) + let length = decodedSize.int + + when T is string: + result = newString(length) + for i in outOffset ..< (outOffset + length): + result[i - outOffset] = bytes[i].chr + elif T is cstring: + result = cast[cstring](bytes[outOffset ..< (outOffset + length)]) + else: + result.setLen(length) + for i in outOffset ..< (outOffset + length): + result[i - outOffset] = type(result[0])(bytes[i]) + + increaseBytesRead(length) + +proc decodeField*[T: SomeFixed | SomeVarint | SomeLengthDelimited]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): ProtoField[T] {.inline.} = + var bytesRead = 0 + + checkType(bytes[outOffset], ty, outOffset) + + result.index = fieldNumber(bytes[outOffset]) + increaseBytesRead() + + result.value = bytes.get(ty, outOffset, outBytesProcessed, numBytesToRead) + +proc decodeField*[T: object]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): ProtoField[T] {.inline.} + +proc decodeField*[T: not AnyProtoType]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): ProtoField[T] {.inline.} = + + var bytesRead = 0 + + checkType(bytes[outOffset], seq[byte], outOffset) + + result.index = fieldNumber(bytes[outOffset]) + increaseBytesRead() + + var value = bytes.get(seq[byte], outOffset, outBytesProcessed, numBytesToRead) + result.value = value.to(T) + +macro setField(obj: typed, fieldNum: int, offset: int, bytesProcessed: int, bytesToRead: Option[int], value: untyped): untyped = + let typeFields = obj.getTypeInst.getType + + let objFields = typeFields[2] + expectKind objFields, nnkRecList + + result = newStmtList() + + let caseStmt = newNimNode(nnkCaseStmt) + caseStmt.add(fieldNum) + + for i in 0 ..< len(objFields) - 1: + let field = objFields[i] + let ofBranch = newNimNode(nnkOfBranch) + ofBranch.add(newLit(i+1)) + ofBranch.add( + quote do: + `obj`.`field` = decodeField(`value`, type(`obj`.`field`), `offset`, `bytesProcessed`, `bytesToRead`).value + ) + caseStmt.add(ofBranch) + + let field = objFields[len(objFields) - 1] + let elseBranch = newNimNode(nnkElse) + elseBranch.add( + nnkStmtList.newTree( + quote do: + `obj`.`field` = decodeField(`value`, type(`obj`.`field`), `offset`, `bytesProcessed`, `bytesToRead`).value + ) + ) + caseStmt.add(elseBranch) + result.add(caseStmt) + +proc decodeField*[T: object]( + bytes: var seq[byte], + ty: typedesc[T], + outOffset: var int, + outBytesProcessed: var int, + numBytesToRead = none(int) +): ProtoField[T] {.inline.} = + var bytesRead = 0 + + checkType(bytes[outOffset], ty, outOffset) + + result.index = fieldNumber(bytes[outOffset]) + + # read LD header + # then read only amount of bytes needed + increaseBytesRead() + let decodedSize = bytes.get(uint, outOffset, outBytesProcessed, numBytesToRead) + let bytesToRead = some(decodedSize.int) + + let oldOffset = outOffset + while outOffset < oldOffset + bytesToRead.get(): + let fieldNum = fieldNumber(bytes[outOffset]) + setField(result.value, fieldNum, outOffset, outBytesProcessed, bytesToRead, bytes) + +proc decode*[T: object]( + bytes: var seq[byte], + ty: typedesc[T], +): T {.inline.} = + var bytesRead = 0 + var offset = 0 + + while offset < bytes.len - 1: + let fieldNum = fieldNumber(bytes[offset]) + setField(result, fieldNum, offset, bytesRead, none(int), bytes) \ No newline at end of file diff --git a/protobuf_serialization.nimble b/protobuf_serialization.nimble new file mode 100644 index 0000000..8512041 --- /dev/null +++ b/protobuf_serialization.nimble @@ -0,0 +1,14 @@ +# Package + +version = "0.1.0" +author = "Joey Yakimowich-Payne" +description = "Protobuf implementation compatible with the nim-serialization framework." +license = "MIT" +srcDir = "src" +skipDirs = @["tests"] + + + +# Dependencies + +requires "nim >= 1.0.6", "faststreams" diff --git a/tests/config.nims b/tests/config.nims new file mode 100644 index 0000000..e355636 --- /dev/null +++ b/tests/config.nims @@ -0,0 +1 @@ +switch("path", "$projectDir/../") \ No newline at end of file diff --git a/tests/test_serialization.nim b/tests/test_serialization.nim new file mode 100644 index 0000000..e3aa176 --- /dev/null +++ b/tests/test_serialization.nim @@ -0,0 +1,280 @@ +import unittest +import sequtils + +import protobuf_serialization + +type + MyEnum = enum + ME1, ME2, ME3 + + Test1 = object + a: uint + b: string + c: char + + Test3 = object + g {.sfixed32.}: int + h: int + i: Test1 + j: string + k: bool + l: MyInt + + MyInt = distinct int + +proc to*(bytes: var seq[byte], ty: typedesc[MyInt]): MyInt = + + var value: int + + var shiftAmount = 0 + + for i in 0 ..< len(bytes): + value += int(bytes[i]) shl shiftAmount + shiftAmount += 8 + + result = MyInt(value) + +proc toBytes*(value: MyInt): seq[byte] = + var value = value.int + + while value > 0: + result.add byte(value and 0b1111_1111) + value = value shr 8 + +proc `==`(a, b: MyInt): bool {.borrow.} + +suite "Test Varint Encoding": + test "Can encode/decode enum field": + var proto = newProtoBuffer() + var bytesProcessed: int + + proto.encodeField(ME3) + proto.encodeField(ME2) + + var output = proto.output + assert output == @[8.byte, 4, 16, 2] + + var offset = 0 + + let decodedME3 = decodeField(output, MyEnum, offset, bytesProcessed) + assert decodedME3.value == ME3 + assert decodedME3.index == 1 + + let decodedME2 = decodeField(output, MyEnum, offset, bytesProcessed) + assert decodedME2.value == ME2 + assert decodedME2.index == 2 + + test "Can encode/decode negative number field": + var proto = newProtoBuffer() + let num = -153452 + var bytesProcessed: int + + proto.encodeField(num) + + var output = proto.output + assert output == @[8.byte, 215, 221, 18] + + var offset = 0 + let decoded = decodeField(output, int, offset, bytesProcessed) + assert decoded.value == num + assert decoded.index == 1 + + test "Can encode/decode distinct number field": + var proto = newProtoBuffer() + let num = 114151.MyInt + var bytesProcessed: int + + proto.encodeField(num) + + var output = proto.output + assert output == @[10.byte, 3, 231, 189, 1] + + var offset = 0 + let decoded = decodeField(output, MyInt, offset, bytesProcessed) + assert decoded.value.int == num.int + assert decoded.index == 1 + + test "Can encode/decode float32 number field": + var proto = newProtoBuffer() + let num = float32(1234.164423) + var bytesProcessed: int + + proto.encodeField(num) + + var output = proto.output + assert output == @[13.byte, 67, 69, 154, 68] + + var offset = 0 + let decoded = decodeField(output, float32, offset, bytesProcessed) + assert decoded.value == num + assert decoded.index == 1 + + test "Can encode/decode float64 number field": + var proto = newProtoBuffer() + let num = 12343121537452.1644232341'f64 + var bytesProcessed: int + + proto.encodeField(num) + + var output = proto.output + assert output == @[9.byte, 84, 88, 211, 191, 182, 115, 166, 66] + + var offset = 0 + let decoded = decodeField(output, float64, offset, bytesProcessed) + assert decoded.value == num + assert decoded.index == 1 + + test "Can encode/decode bool field": + var proto = newProtoBuffer() + let boolean = true + var bytesProcessed: int + + proto.encodeField(boolean) + + var output = proto.output + assert output == @[8.byte, 1] + + var offset = 0 + let decoded = decodeField(output, bool, offset, bytesProcessed) + assert bytesProcessed == 2 + assert decoded.value == boolean + assert decoded.index == 1 + + test "Can encode/decode char field": + var proto = newProtoBuffer() + let charVal = 'G' + var bytesProcessed: int + + proto.encodeField(charVal) + + var output = proto.output + assert output == @[8.byte, ord(charVal).byte] + + var offset = 0 + let decoded = decodeField(output, char, offset, bytesProcessed) + assert bytesProcessed == 2 + assert decoded.value == charVal + assert decoded.index == 1 + + test "Can encode/decode unsigned number field": + var proto = newProtoBuffer() + let num = 123151.uint + var bytesProcessed: int + + proto.encodeField(num) + + var output = proto.output + assert output == @[8.byte, 143, 194, 7] + var offset = 0 + + let decoded = decodeField(output, uint, offset, bytesProcessed) + assert decoded.value == num + assert decoded.index == 1 + + test "Can encode/decode string field": + var proto = newProtoBuffer() + let str = "hey this is a string" + var bytesProcessed: int + + proto.encodeField(str) + + var output = proto.output + assert output == @[10.byte, 20, 104, 101, 121, 32, 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 115, 116, 114, 105, 110, 103] + + var offset = 0 + let decoded = decodeField(output, string, offset, bytesProcessed) + assert decoded.value == str + assert decoded.index == 1 + + test "Can encode/decode char seq field": + var proto = newProtoBuffer() + let charSeq = "hey this is a string".toSeq + var bytesProcessed: int + + proto.encodeField(charSeq) + + var output = proto.output + assert output == @[10.byte, 20, 104, 101, 121, 32, 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 115, 116, 114, 105, 110, 103] + + var offset = 0 + let decoded = decodeField(output, seq[char], offset, bytesProcessed) + assert decoded.value == charSeq + assert decoded.index == 1 + + test "Can encode/decode uint8 seq field": + var proto = newProtoBuffer() + let uint8Seq = cast[seq[uint8]]("hey this is a string".toSeq) + var bytesProcessed: int + + proto.encodeField(uint8Seq) + + var output = proto.output + assert output == @[10.byte, 20, 104, 101, 121, 32, 116, 104, 105, 115, 32, 105, 115, 32, 97, 32, 115, 116, 114, 105, 110, 103] + + var offset = 0 + let decoded = decodeField(output, seq[uint8], offset, bytesProcessed) + assert decoded.value == uint8Seq + assert decoded.index == 1 + + test "Can encode/decode object field": + var proto = newProtoBuffer() + + let obj = Test3(g: 300, h: 200, i: Test1(a: 100, b: "this is a test", c: 'H'), j: "testing", k: true, l: 124521.MyInt) + + proto.encodeField(obj) + var offset, bytesProcessed: int + + var output = proto.output + let decoded = decodeField(output, Test3, offset, bytesProcessed) + assert decoded.value == obj + assert decoded.index == 1 + + test "Can encode/decode object": + var proto = newProtoBuffer() + + let obj = Test3(g: 300, h: 200, i: Test1(a: 100, b: "this is a test", c: 'H'), j: "testing", k: true, l: 124521.MyInt) + + proto.encode(obj) + var output = proto.output + let decoded = output.decode(Test3) + assert decoded == obj + + test "Can encode/decode out of order object": + var proto = newProtoBuffer() + + let obj = Test3(g: 400, h: 100, i: Test1(a: 100, b: "this is a test", c: 'H'), j: "testing", k: true, l: 14514.MyInt) + proto.encodeField(6, 14514.MyInt) + proto.encodeField(2, 100) + proto.encodeField(4, "testing") + proto.encodeField(1, 400) + proto.encodeField(3, Test1(a: 100, b: "this is a test", c: 'H')) + proto.encodeField(5, true) + + var output = proto.output + let decoded = output.decode(Test3) + + assert decoded == obj + + test "Empty object field does not get encoded": + var proto = newProtoBuffer() + + let obj = Test1() + proto.encodeField(1, obj) + + var output = proto.output + assert output.len == 0 + + let decoded = output.decode(Test1) + assert decoded == obj + + test "Empty object does not get encoded": + var proto = newProtoBuffer() + + let obj = Test1() + proto.encode(obj) + + var output = proto.output + assert output.len == 0 + + let decoded = output.decode(Test1) + assert decoded == obj \ No newline at end of file