part1 and README

This commit is contained in:
Eli Bendersky 2015-01-27 15:51:30 -08:00
commit b2ccbfb85c
2 changed files with 115 additions and 0 deletions

9
README Normal file
View file

@ -0,0 +1,9 @@
Pykaleidoscope
==============
Testing
-------
Some of the files have unit test classes in them. To run all unit tests::
$ python3.4 -m unittest discover -p "*.py"

106
part1.py Normal file
View file

@ -0,0 +1,106 @@
# Chapter 1 - Lexer
from collections import namedtuple
from enum import Enum
# Each token is a tuple of kind and value. kind is one of the enumeration values
# in TokenKind. value is the textual value of the token in the input.
class TokenKind(Enum):
EOF = -1
DEF = -2
EXTERN = -3
IDENTIFIER = -4
NUMBER = -5
OPERATOR = -6
Token = namedtuple('Token', 'kind value')
class Lexer(object):
"""Lexer for Kaleidoscope.
Initialize the lexer with a string buffer. tokens() returns a generator that
can be queried for tokens. The generator will emit an EOF token before
stopping.
"""
def __init__(self, buf):
assert len(buf) >= 1
self.buf = buf
self.pos = 0
self.lastchar = self.buf[0]
def tokens(self):
while self.lastchar:
# Skip whitespace
while self.lastchar.isspace():
self._advance()
# Identifier or keyword
if self.lastchar.isalpha():
id_str = ''
while self.lastchar.isalnum():
id_str += self.lastchar
self._advance()
if id_str == 'def':
yield Token(kind=TokenKind.DEF, value=id_str)
elif id_str == 'extern':
yield Token(kind=TokenKind.EXTERN, value=id_str)
else:
yield Token(kind=TokenKind.IDENTIFIER, value=id_str)
# Number
elif self.lastchar.isdigit() or self.lastchar == '.':
num_str = ''
while self.lastchar.isdigit() or self.lastchar == '.':
num_str += self.lastchar
self._advance()
yield Token(kind=TokenKind.NUMBER, value=num_str)
# Comment
elif self.lastchar == '#':
self._advance()
while self.lastchar and self.lastchar not in '\r\n':
self._advance()
else:
# Some other char
yield Token(kind=TokenKind.OPERATOR, value=self.lastchar)
self._advance()
yield Token(kind=TokenKind.EOF, value='')
def _advance(self):
try:
self.pos += 1
self.lastchar = self.buf[self.pos]
except IndexError:
self.lastchar = ''
import unittest
class TestLexer(unittest.TestCase):
def test_lexer_simpletokens(self):
l = Lexer('a+b(koko*.12+115)')
toks = list(l.tokens())
self.assertEqual(toks[0].kind, TokenKind.IDENTIFIER)
if __name__ == '__main__':
buf = '''
# Compute the x'th fibonacci number.
def fib(x)
if x < 3 then
1
else
fib(x-1)+fib(x-2)
# This expression will compute the 40th number.
fib(40)
'''
l = Lexer(buf)
tokengen = l.tokens()
#for i in range(4):
#print(next(tokengen))
#print(list(tokengen))
for t in tokengen:
print(t)