part1 and README

2015-01-27 15:51:30 -08:00 · 2015-01-27 15:51:30 -08:00 · b2ccbfb85c
commit b2ccbfb85c
parent e7999a7842
2 changed files with 115 additions and 0 deletions
--- a/9
+++ b/9
@ -0,0 +1,9 @@
+Pykaleidoscope
+==============
+
+Testing
+-------
+
+Some of the files have unit test classes in them. To run all unit tests::
+
+    $ python3.4 -m unittest discover -p "*.py"
--- a/part1.py
+++ b/part1.py
@ -0,0 +1,106 @@
+# Chapter 1 - Lexer
+
+from collections import namedtuple
+from enum import Enum
+
+
+# Each token is a tuple of kind and value. kind is one of the enumeration values
+# in TokenKind. value is the textual value of the token in the input.
+class TokenKind(Enum):
+    EOF = -1
+    DEF = -2
+    EXTERN = -3
+    IDENTIFIER = -4
+    NUMBER = -5
+    OPERATOR = -6
+
+
+Token = namedtuple('Token', 'kind value')
+
+
+class Lexer(object):
+    """Lexer for Kaleidoscope.
+
+    Initialize the lexer with a string buffer. tokens() returns a generator that
+    can be queried for tokens. The generator will emit an EOF token before
+    stopping.
+    """
+    def __init__(self, buf):
+        assert len(buf) >= 1
+        self.buf = buf
+        self.pos = 0
+        self.lastchar = self.buf[0]
+    
+    def tokens(self):
+        while self.lastchar:
+            # Skip whitespace
+            while self.lastchar.isspace():
+                self._advance()
+            # Identifier or keyword
+            if self.lastchar.isalpha():
+                id_str = ''
+                while self.lastchar.isalnum():
+                    id_str += self.lastchar
+                    self._advance()
+                if id_str == 'def':
+                    yield Token(kind=TokenKind.DEF, value=id_str)
+                elif id_str == 'extern':
+                    yield Token(kind=TokenKind.EXTERN, value=id_str)
+                else:
+                    yield Token(kind=TokenKind.IDENTIFIER, value=id_str)
+            # Number
+            elif self.lastchar.isdigit() or self.lastchar == '.':
+                num_str = ''
+                while self.lastchar.isdigit() or self.lastchar == '.':
+                    num_str += self.lastchar
+                    self._advance()
+                yield Token(kind=TokenKind.NUMBER, value=num_str)
+            # Comment
+            elif self.lastchar == '#':
+                self._advance()
+                while self.lastchar and self.lastchar not in '\r\n':
+                    self._advance()
+            else:
+                # Some other char
+                yield Token(kind=TokenKind.OPERATOR, value=self.lastchar)
+                self._advance()
+        yield Token(kind=TokenKind.EOF, value='')
+
+    def _advance(self):
+        try:
+            self.pos += 1
+            self.lastchar = self.buf[self.pos]
+        except IndexError:
+            self.lastchar = ''
+
+
+import unittest
+
+class TestLexer(unittest.TestCase):
+    def test_lexer_simpletokens(self):
+        l = Lexer('a+b(koko*.12+115)')
+        toks = list(l.tokens())
+        self.assertEqual(toks[0].kind, TokenKind.IDENTIFIER)
+
+
+if __name__ == '__main__':
+    buf = '''
+# Compute the x'th fibonacci number.
+def fib(x)
+  if x < 3 then
+    1
+  else
+    fib(x-1)+fib(x-2)
+
+# This expression will compute the 40th number.
+fib(40)
+'''
+    l = Lexer(buf)
+    tokengen = l.tokens()
+
+    #for i in range(4):
+        #print(next(tokengen))
+
+    #print(list(tokengen))
+    for t in tokengen:
+        print(t)