diff --git a/README.md b/README.md new file mode 100644 index 0000000..2a37190 --- /dev/null +++ b/README.md @@ -0,0 +1,234 @@ +Patty - A pattern matching library +================================== + +Patty is a library to perform pattern matching in Nim. The patterns have to be variant objects, which in Nim are encoded with a field (usually called `kind`) which varies in an enum, and a different object layout based on the value of this tag. An example would be + +```nim +type + ShapeKind = enum + Circle, Rectangle + Shape = object + case kind: ShapeKind + of Circle: + r: float + of Rectangle: + w, h: float +``` + +If you have such an algebraic data type, you can do the following with Patty: + +```nim +import patty + +proc makeRect(w, h: float): Shape = Shape(kind: Rectangle, w: w, h: h) + +match makeRect(3, 4): + Circle(r: radius): + echo "it is a circle of radius ", radius + Rectangle(w: width, h: height): + echo "it is a rectangle of height ", height +``` + +This will be translated by the `match` macro into the following form + +```nim +let :tmp = makeRect(3, 4) +case :tmp.kind +of Circle: + let radius = :tmp.r + echo "it is a circle of radius ", radius +of Rectangle: + let + width = :tmp.w + height = :tmp.h + echo "it is a rectangle of height ", height +``` + +One can also use `_` for a variable, in which case it will not be bound. That is, the following + +```nim +import patty + +proc makeRect(w, h: float): Shape = Shape(kind: Rectangle, w: w, h: h) + +match makeRect(3, 4): + Circle(r: radius): + echo "it is a circle of radius ", radius + Rectangle(w: _, h: height): + echo "it is a rectangle of height ", height +``` + +becomes + +```nim +let :tmp = makeRect(3, 4) +case :tmp.kind +of Circle: + let radius = :tmp.r + echo "it is a circle of radius ", radius +of Rectangle: + let height = :tmp.h + echo "it is a rectangle of height ", height +``` + +Notice that Patty requires the field you dispatch on to be called `kind`. Also, checks are exhaustive: if you miss a case, the compiler will complain. + +Patty also provides another macro to create algebraic data types. It looks like + +```nim +adt Shape: + Circle(r: float) + Rectangle(w: float, h: float) + UnitCircle +``` + +and expands to + +```nim +type + ShapeKind = enum + Circle, Rectangle + Shape = object + case kind: ShapeKind + of Circle: + r: float + of Rectangle: + w: float + h: float + of UnitCircle: + nil +``` + +A couple of limitations fo the `adt` macro: + +* field names must be unique across branches (that is, different variants cannot have two fields with the same name). This is actually a limitation of Nim. +* the shortcut that groups field names by type does not seem to work, that is, in the above example one could not write `Rectangle(w, h: float)`. + +In the future, Patty will also add a proper definition of equality and generated contructors. + +Things that do not work (yet) +----------------------------- + +One would expect many forms of pattern matching but, at least for now, the support in Patty is very limited. Things that would be nice to support but do not work yet include: + +* catch-all patterns + +```nim +match c: + Circle(r: r): + echo "it is a circle" + _: + echo "it is not a circle" +``` + +* matching a constant + +```nim +match c: + "hello": + echo "the string was hello" +``` + +* matching an existing variable + +```nim +let x = 5 +match c: + x: + echo "c == 5" +``` + +* irrefutable patterns (no dispatch on `kind`) + +```nim +type Person = object + name: string + age: int +let p = Person(name: "John Doe", age: 37) +match p: + Person(name: n, age: a): + echo n, "is ", a, " years old" +``` + +* nested pattern matching + +```nim +match c: + Circle(Point(x: x, y: y), r: r): + echo "the abscissa of the center is ", x +``` + +* matching without binding + +```nim +match c: + Circle: + echo "it is a circle!" +``` + +* matching by position + +```nim +match c: + Circle(x, y, r): + echo "the radius is ", r +``` + +* binding subpatterns + +```nim +match getMeACircle(): + c@Circle(x, y, r): + echo "there you have ", c +``` + +* pattern matching as an expression + +```nim +let coord = match c: + Circle(x: x, y: y, r: r): + x + Rectangle(w: w, h: h): + h +``` + +* unification + +```nim +match r: + Rectangle(w: x, h: x): + echo "it is a square" +``` + +* guards + +```nim +match c: + Circle(x: x, y: y, r: r) if r < 0: + echo "the circle has negative length" +``` + +* variable-length pattern matching, such as with arrays + +```nim +match c: + [a, b, c]: + echo "the length is 3 and the first elements is ", a +``` + +* custom pattern matchers, such as in regexes + +```nim +let Email = r"(\w+)@(\w+).(\w+)" +match c: + Email(name, domain, tld): + echo "hello ", name +``` + +* or combinations of patterns + +```nim +match c: + Circle or Rectangle: + echo "it is a shape" +``` \ No newline at end of file diff --git a/patty.nim b/patty.nim index d7dde83..3690193 100644 --- a/patty.nim +++ b/patty.nim @@ -6,6 +6,75 @@ iterator tail(a: NimNode): NimNode = if not first: yield x first = false +proc enumsIn(n: NimNode): seq[NimNode] {. compileTime .} = + result = @[] + for c in children(n): + if c.kind == nnkObjConstr: + # c.expectKind(nnkObjConstr) + let id = c[0] + id.expectKind(nnkIdent) + result.add(id) + elif c.kind == nnkIdent: + result.add(c) + else: + error("Invalid ADT case: " & $(toStrLit(c))) + +proc newEnum(name: NimNode, idents: seq[NimNode]): NimNode {. compileTime .} = + result = newNimNode(nnkTypeDef).add(name, newEmptyNode()) + var choices = newNimNode(nnkEnumTy).add(newEmptyNode()) + for ident in idents: + choices.add(ident) + result.add(choices) + +proc makeBranch(n: NimNode): NimNode {. compileTime .} = + result = newNimNode(nnkOfBranch) + if n.kind == nnkObjConstr: + let id = n[0] + var list = newNimNode(nnkRecList) + for e in tail(n): + e.expectKind(nnkExprColonExpr) + e.expectMinLen(2) + list.add(newIdentDefs(e[0], e[1])) + result.add(id, list) + elif n.kind == nnkIdent: + result.add(n, newNimNode(nnkRecList).add(newNilLit())) + else: + error("Invalid ADT case: " & $(toStrLit(n))) + +macro adt*(e: expr, body: stmt): stmt {. immediate .} = + e.expectKind(nnkIdent) + body.expectKind(nnkStmtList) + # The children of the body should look like object constructors + # + # Circle(r: float) + # Rectangle(w: float, h: float) + # + # Here we first extract the external identifiers (Circle, Rectangle) + # that will be the possible values of the kind enum. + let enumName = ident($(e) & "Enum") + let enumType = newEnum(enumName, enumsIn(body)) + + # Then we put the actual type we are defining + var cases = newNimNode(nnkRecCase).add(newIdentDefs(ident("kind"), enumName)) + for child in children(body): + cases.add(makeBranch(child)) + + let definedType = newNimNode(nnkTypeDef).add( + e, + newEmptyNode(), + newNimNode(nnkObjectTy).add( + newEmptyNode(), + newEmptyNode(), + newNimNode(nnkRecList).add(cases) + ) + ) + + result = newNimNode(nnkTypeSection) + result.add(enumType) + result.add(definedType) + when defined(pattydebug): + echo toStrLit(result) + macro match*(e: expr, body: stmt): stmt {. immediate .} = # A fresh symbol used to hold the evaluation of e let sym = genSym() @@ -34,9 +103,14 @@ macro match*(e: expr, body: stmt): stmt {. immediate .} = obj = child[0] statements = child[1] - obj.expectKind(nnkObjConstr) statements.expectKind(nnkStmtList) + # We have a few cases for obj (the matchin part) + # It could be + # - a matching clause like Circle(r: r) + # - a literal + obj.expectKind(nnkObjConstr) + # This is the thing we dispatch on let kindId = obj[0] kindId.expectKind(nnkIdent) @@ -46,7 +120,9 @@ macro match*(e: expr, body: stmt): stmt {. immediate .} = # These are the clauses for the bound variables for c in tail(obj): child.expectMinLen(2) - decl.add(newIdentDefs(c[1], newEmptyNode(), newDotExpr(sym, c[0]))) + # ignore bindings to _ + if $(c[1]) != "_": + decl.add(newIdentDefs(c[1], newEmptyNode(), newDotExpr(sym, c[0]))) # We transform the matching branch # into a declaration of bound variables @@ -75,26 +151,6 @@ macro match*(e: expr, body: stmt): stmt {. immediate .} = result = newNimNode(nnkStmtList) result.add(newLetStmt(sym, e)) result.add(dispatch) - # echo treeRepr(result) - # echo toStrLit(result) -when isMainModule: - type - ShapeKind = enum - Circle, Rectangle - Shape = object - case kind: ShapeKind - of Circle: - r: float - of Rectangle: - w, h: float - - proc makeRect(w, h: float): Shape = - Shape(kind: Rectangle, w: w, h: h) - - match makeRect(3, 4): - Circle(r: r): - echo "circle ", r - Rectangle(w: a, h: b): - echo "rectangle ", (a + b) - echo "it works!" \ No newline at end of file + when defined(pattydebug): + echo toStrLit(result) \ No newline at end of file diff --git a/test.nim b/test.nim new file mode 100644 index 0000000..2c2f076 --- /dev/null +++ b/test.nim @@ -0,0 +1,76 @@ +import unittest, patty + + +suite "adt construction": + test "basic creation": + adt Shape: + Circle(r: float, x: float, y: float) + Rectangle(w: float, h: float) + Square(side: int) + + let c = Shape(kind: Circle, r: 4, x: 2, y: 0) + check c.r == 4.0 + + test "allowing empty objects": + adt Shape: + Circle(r: float, x: float, y: float) + Rectangle(w: float, h: float) + Square(side: int) + UnitCircle + + let r = Shape(kind: Rectangle, w: 2, h: 5) + check r.h == 5.0 + + +suite "pattern matching": + type + ShapeKind = enum + Circle, Rectangle + Shape = object + case kind: ShapeKind + of Circle: + x, y, r: float + of Rectangle: + w, h: float + + test "basic matching": + let c = Shape(kind: Circle, r: 4, x: 2, y: 0) + var res: float = 0 + match c: + Circle(x: x, y: y, r: r): + res = r + Rectangle(w: w, h: h): + res = 1 + check res == 4.0 + + test "binding to different variable names": + let c = Shape(kind: Circle, r: 4, x: 2, y: 0) + var res: float = 0 + match c: + Circle(x: x, y: y, r: someNumber): + res = someNumber + Rectangle(w: w, h: h): + res = 1 + check res == 4.0 + + test "binding a complex expression": + proc makeRect(w, h: float): Shape = + Shape(kind: Rectangle, w: w, h: h) + + var res: float = 0 + match makeRect(3, 4): + Circle(x: x, y: y, r: r): + res = r + Rectangle(w: w, h: h): + res = w + h + check res == 7.0 + + test "ignoring _ bindings": + let c = Shape(kind: Circle, r: 4, x: 2, y: 0) + var res: float = 0 + match c: + Circle(x: _, y: _, r: r): + res = r + Rectangle(w: w, h: h): + res = w + h + check res == 4.0 \ No newline at end of file