diff --git a/README b/README new file mode 100644 index 0000000..af4cd81 --- /dev/null +++ b/README @@ -0,0 +1,9 @@ +Pykaleidoscope +============== + +Testing +------- + +Some of the files have unit test classes in them. To run all unit tests:: + + $ python3.4 -m unittest discover -p "*.py" diff --git a/part1.py b/part1.py new file mode 100644 index 0000000..3e0de75 --- /dev/null +++ b/part1.py @@ -0,0 +1,106 @@ +# Chapter 1 - Lexer + +from collections import namedtuple +from enum import Enum + + +# Each token is a tuple of kind and value. kind is one of the enumeration values +# in TokenKind. value is the textual value of the token in the input. +class TokenKind(Enum): + EOF = -1 + DEF = -2 + EXTERN = -3 + IDENTIFIER = -4 + NUMBER = -5 + OPERATOR = -6 + + +Token = namedtuple('Token', 'kind value') + + +class Lexer(object): + """Lexer for Kaleidoscope. + + Initialize the lexer with a string buffer. tokens() returns a generator that + can be queried for tokens. The generator will emit an EOF token before + stopping. + """ + def __init__(self, buf): + assert len(buf) >= 1 + self.buf = buf + self.pos = 0 + self.lastchar = self.buf[0] + + def tokens(self): + while self.lastchar: + # Skip whitespace + while self.lastchar.isspace(): + self._advance() + # Identifier or keyword + if self.lastchar.isalpha(): + id_str = '' + while self.lastchar.isalnum(): + id_str += self.lastchar + self._advance() + if id_str == 'def': + yield Token(kind=TokenKind.DEF, value=id_str) + elif id_str == 'extern': + yield Token(kind=TokenKind.EXTERN, value=id_str) + else: + yield Token(kind=TokenKind.IDENTIFIER, value=id_str) + # Number + elif self.lastchar.isdigit() or self.lastchar == '.': + num_str = '' + while self.lastchar.isdigit() or self.lastchar == '.': + num_str += self.lastchar + self._advance() + yield Token(kind=TokenKind.NUMBER, value=num_str) + # Comment + elif self.lastchar == '#': + self._advance() + while self.lastchar and self.lastchar not in '\r\n': + self._advance() + else: + # Some other char + yield Token(kind=TokenKind.OPERATOR, value=self.lastchar) + self._advance() + yield Token(kind=TokenKind.EOF, value='') + + def _advance(self): + try: + self.pos += 1 + self.lastchar = self.buf[self.pos] + except IndexError: + self.lastchar = '' + + +import unittest + +class TestLexer(unittest.TestCase): + def test_lexer_simpletokens(self): + l = Lexer('a+b(koko*.12+115)') + toks = list(l.tokens()) + self.assertEqual(toks[0].kind, TokenKind.IDENTIFIER) + + +if __name__ == '__main__': + buf = ''' +# Compute the x'th fibonacci number. +def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2) + +# This expression will compute the 40th number. +fib(40) +''' + l = Lexer(buf) + tokengen = l.tokens() + + #for i in range(4): + #print(next(tokengen)) + + #print(list(tokengen)) + for t in tokengen: + print(t)