part1 and README
This commit is contained in:
parent
e7999a7842
commit
b2ccbfb85c
2 changed files with 115 additions and 0 deletions
9
README
Normal file
9
README
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
Pykaleidoscope
|
||||
==============
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
Some of the files have unit test classes in them. To run all unit tests::
|
||||
|
||||
$ python3.4 -m unittest discover -p "*.py"
|
||||
106
part1.py
Normal file
106
part1.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# Chapter 1 - Lexer
|
||||
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
|
||||
|
||||
# Each token is a tuple of kind and value. kind is one of the enumeration values
|
||||
# in TokenKind. value is the textual value of the token in the input.
|
||||
class TokenKind(Enum):
|
||||
EOF = -1
|
||||
DEF = -2
|
||||
EXTERN = -3
|
||||
IDENTIFIER = -4
|
||||
NUMBER = -5
|
||||
OPERATOR = -6
|
||||
|
||||
|
||||
Token = namedtuple('Token', 'kind value')
|
||||
|
||||
|
||||
class Lexer(object):
|
||||
"""Lexer for Kaleidoscope.
|
||||
|
||||
Initialize the lexer with a string buffer. tokens() returns a generator that
|
||||
can be queried for tokens. The generator will emit an EOF token before
|
||||
stopping.
|
||||
"""
|
||||
def __init__(self, buf):
|
||||
assert len(buf) >= 1
|
||||
self.buf = buf
|
||||
self.pos = 0
|
||||
self.lastchar = self.buf[0]
|
||||
|
||||
def tokens(self):
|
||||
while self.lastchar:
|
||||
# Skip whitespace
|
||||
while self.lastchar.isspace():
|
||||
self._advance()
|
||||
# Identifier or keyword
|
||||
if self.lastchar.isalpha():
|
||||
id_str = ''
|
||||
while self.lastchar.isalnum():
|
||||
id_str += self.lastchar
|
||||
self._advance()
|
||||
if id_str == 'def':
|
||||
yield Token(kind=TokenKind.DEF, value=id_str)
|
||||
elif id_str == 'extern':
|
||||
yield Token(kind=TokenKind.EXTERN, value=id_str)
|
||||
else:
|
||||
yield Token(kind=TokenKind.IDENTIFIER, value=id_str)
|
||||
# Number
|
||||
elif self.lastchar.isdigit() or self.lastchar == '.':
|
||||
num_str = ''
|
||||
while self.lastchar.isdigit() or self.lastchar == '.':
|
||||
num_str += self.lastchar
|
||||
self._advance()
|
||||
yield Token(kind=TokenKind.NUMBER, value=num_str)
|
||||
# Comment
|
||||
elif self.lastchar == '#':
|
||||
self._advance()
|
||||
while self.lastchar and self.lastchar not in '\r\n':
|
||||
self._advance()
|
||||
else:
|
||||
# Some other char
|
||||
yield Token(kind=TokenKind.OPERATOR, value=self.lastchar)
|
||||
self._advance()
|
||||
yield Token(kind=TokenKind.EOF, value='')
|
||||
|
||||
def _advance(self):
|
||||
try:
|
||||
self.pos += 1
|
||||
self.lastchar = self.buf[self.pos]
|
||||
except IndexError:
|
||||
self.lastchar = ''
|
||||
|
||||
|
||||
import unittest
|
||||
|
||||
class TestLexer(unittest.TestCase):
|
||||
def test_lexer_simpletokens(self):
|
||||
l = Lexer('a+b(koko*.12+115)')
|
||||
toks = list(l.tokens())
|
||||
self.assertEqual(toks[0].kind, TokenKind.IDENTIFIER)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
buf = '''
|
||||
# Compute the x'th fibonacci number.
|
||||
def fib(x)
|
||||
if x < 3 then
|
||||
1
|
||||
else
|
||||
fib(x-1)+fib(x-2)
|
||||
|
||||
# This expression will compute the 40th number.
|
||||
fib(40)
|
||||
'''
|
||||
l = Lexer(buf)
|
||||
tokengen = l.tokens()
|
||||
|
||||
#for i in range(4):
|
||||
#print(next(tokengen))
|
||||
|
||||
#print(list(tokengen))
|
||||
for t in tokengen:
|
||||
print(t)
|
||||
Loading…
Add table
Add a link
Reference in a new issue