Syntax addPrototype: #Lexer derivedFrom: {ReadStream}. "The lexer takes an input character Stream and divides it up into tokens, using a buffer as necessary to hold the tokenized contents. Also, debugging information is stored for now in terms of the line number that the current stream position has reached." Syntax Lexer addSlot: #stream valued: Stream clone. Syntax Lexer addSlot: #undoBuffer valued: ExtensibleArray newEmpty. Syntax Lexer addSlot: #lineNumber valued: 1. Syntax Lexer traits addSlot: #specialChars valued: '()[]{}@.|!'. l@(Syntax Lexer traits) on: stream "Target the lexer to the particular stream and initialize it." [ l stream: stream. l undoBuffer: l undoBuffer newEmpty. l lineNumber: 1. l ]. l@(Syntax Lexer traits) isAtEnd "The lexer has reached its end when the stream is exhausted and the buffer is empty." [| c | (l undoBuffer isEmpty and: [l stream isAtEnd]) ifTrue: [^ True]. [ c: l nextCharacter. (l undoBuffer isEmpty and: [l stream isAtEnd]) or: [c isWhitespace not] ] whileFalse. c isWhitespace ifFalse: [ l undoCharacter: c. ^ False ]. True ]. l@(Syntax Lexer traits) moreCharacters [ l undoBuffer isNotEmpty or: [l stream isAtEnd not] ]. l@(Syntax Lexer traits) peekCharacter "Grab the next character, but leave it in the buffer, so the position is not advanced." [ l undoBuffer isEmpty ifTrue: [l undoCharacter: l nextCharacter] ifFalse: [l undoBuffer last] ]. l@(Syntax Lexer traits) nextCharacter "To get the next character, either pull one from the buffer or read from the stream of characters. Raise an error if this is used at the end, and advance the line number if a new-line is reached." [| c | c: (l undoBuffer isEmpty ifTrue: [l stream isAtEnd ifTrue: [l error: 'Unexpected end of stream']. l stream next] ifFalse: [l undoBuffer removeLast]). c = $\n ifTrue: [l lineNumber: l lineNumber + 1]. c ]. l@(Syntax Lexer traits) undoCharacter: c "Put the character back into the buffer, and decrement the line number if it's a new-line." [ c = $\n ifTrue: [l lineNumber: l lineNumber - 1]. l undoBuffer addLast: c ]. Syntax Lexer addPrototype: #Error derivedFrom: {DescriptiveError}. "An error that occurred in parsing, always requiring a description." Syntax Lexer Error addSlot: #lineNumber valued: 0. "The line number on which the error was detected." l@(Syntax Lexer traits) error: description [| newE | newE: (l Error newDescription: 'Line ' ; l lineNumber print ; ': ' ; description). newE lineNumber: l lineNumber. newE signal ]. l@(Syntax Lexer traits) readInteger: radix "The general method for building integers from the raw characters, with a radix (number of digits) parameter. Grab all following digits for the radix, multiplying the accumulator by the radix and adding the numeric equivalent of the character." [| number | number: 0. [l moreCharacters and: [(l peekCharacter isDigit: radix) or: [l peekCharacter = $_]]] whileTrue: [| c | c: l nextCharacter. (c isDigit: radix) ifTrue: [number: number * radix + (c toDigit: radix)] ]. number ]. l@(Syntax Lexer traits) readMantissa "Build a floating-point number's fractional part." [| number place | number: 0. place: 1. [l moreCharacters and: [l peekCharacter isDigit or: [l peekCharacter = $_]]] whileTrue: [| c | c: l nextCharacter. c isDigit ifTrue: [number: number * 10 + c toDigit. place: place * 10] ]. (number as: Float) / (place as: Float) ]. l@(Syntax Lexer traits) readExponent "Build a floating-point number's exponent as an integer." [| sign c | sign: 1. c: l nextCharacter. (c = $+ or: [c = $-]) ifTrue: [c = $- ifTrue: [sign: -1]] ifFalse: [l undoCharacter: c]. sign * (l readInteger: 10) ]. l@(Syntax Lexer traits) readNumber "The overall routine for building numbers." [| token number sign c | "Assign the default sign, then override it based on the presence of an explicit sign character." sign: 1. c: l nextCharacter. (c = $+ or: [c = $-]) ifTrue: [c = $- ifTrue: [sign: -1]] ifFalse: [l undoCharacter: c]. "Now read in all the continuous string of digits possible as an integer." number: (l readInteger: 10). "Reaching the end of the lexing stream just finalizes the process." l moreCharacters ifFalse: [token: Syntax LiteralToken clone. token value: sign * number. ^ token]. "Conditionalize on the next character: it may set up a radix or a decimal." c: l nextCharacter. (c = $r or: [c = $R]) ifTrue: [number: (l readInteger: number)] ifFalse: [(c = $. and: [l moreCharacters] and: [l peekCharacter isDigit]) ifTrue: [number: (number as: Float) + l readMantissa. l moreCharacters ifFalse: [token: Syntax LiteralToken clone. token value: sign * number. ^ token]. c: l nextCharacter]. (c = $e or: [c = $E]) ifTrue: [number: (number as: Float) * (10.0 raisedTo: l readExponent)] ifFalse: [l undoCharacter: c]]. token: Syntax LiteralToken clone. token value: sign * number. token ]. l@(Syntax Lexer traits) readEscapedCharacter "Language support for character escapes. This should be called at the point after the initial escape is seen, whether as a character or part of a string." [| c | c: l nextCharacter. c caseOf: { $n -> [$\n]. $t -> [$\t]. $r -> [$\r]. $b -> [$\b]. $s -> [$\s]. $a -> [$\a]. $v -> [$\v]. $f -> [$\f]. $e -> [$\e]. $0 -> [$\0]. $x -> [| code | code: (l nextCharacter toDigit: 16). code: code * 16 + (l nextCharacter toDigit: 16). code as: ASCIICharacter ] } otherwise: [c] ]. l@(Syntax Lexer traits) readString "Build a string until the next single-quote character is encountered. Escaping is accounted for." [| writeStream token c | writeStream: '' writer. [c: l nextCharacter. c = $'] whileFalse: [writeStream nextPut: (c = $\\ ifTrue: [l readEscapedCharacter] ifFalse: [c])]. token: Syntax LiteralToken clone. token value: writeStream contents. token ]. l@(Syntax Lexer traits) readComment "Build a comment string until the next double-quote character is encountered. Escaping is accounted for." [| writeStream token c | writeStream: '' writer. [c: l nextCharacter. c = $"] whileFalse: [writeStream nextPut: (c = $\\ ifTrue: [l readEscapedCharacter] ifFalse: [c])]. token: Syntax CommentToken clone. token comment: writeStream contents. token ]. l@(Syntax Lexer traits) readSelector: type "Read a selector symbol into a token." [| writeStream token c | writeStream: '' writer. [l moreCharacters not or: [c: l peekCharacter. c isWhitespace] or: [l specialChars includes: c]] whileFalse: [writeStream nextPut: l nextCharacter]. token: type clone. token selector: (writeStream contents as: Symbol). token ]. l@(Syntax Lexer traits) readLiteral "This handles the literal brace array syntaxes." [| writeStream token c | writeStream: '' writer. l moreCharacters ifTrue: [ c: l nextCharacter. c caseOf: { $( -> [^ Syntax BeginLiteralParenthesisToken]. ${ -> [^ Syntax BeginLiteralArrayToken]. $[ -> [^ Syntax BeginLiteralBlockToken]. $' -> [[c: l nextCharacter. c = $'] whileFalse: [writeStream nextPut: (c = $\\ ifTrue: [l readEscapedCharacter] ifFalse: [c])]. token: Syntax LiteralToken clone. token value: (writeStream contents as: Symbol). ^ token]. }. l undoCharacter: c ]. [l moreCharacters not or: [c: l peekCharacter. c isWhitespace] or: [l specialChars includes: c]] whileFalse: [writeStream nextPut: l nextCharacter]. token: Syntax LiteralToken clone. token value: (writeStream contents as: Symbol). token ]. l@(Syntax Lexer traits) readCharacter "Read in a single character into a token or an escaped one." [| token c | c: l nextCharacter. c = $\\ ifTrue: [c: l readEscapedCharacter]. token: Syntax LiteralToken clone. token value: c. token ]. l@(Syntax Lexer traits) readToken "The overall handler for tokenization, this conditionalizes on the various initializing characters to build the various token objects." "TODO: place these dispatch tables in persistent places, much like a Lisp read-table." [| c | l isAtEnd ifTrue: [^ Syntax EndStreamToken]. c: l nextCharacter. c caseOf: { $' -> [l readString]. $" -> [l readComment]. $$ -> [l readCharacter]. $# -> [l readLiteral]. $( -> [Syntax BeginParenthesisToken]. $) -> [Syntax EndParenthesisToken]. ${ -> [Syntax BeginArrayToken]. $} -> [Syntax EndArrayToken]. $[ -> [Syntax BeginBlockToken]. $] -> [Syntax EndBlockToken]. $@ -> [Syntax AtToken]. $. -> [Syntax EndStatementToken]. $| -> [Syntax BeginVariablesToken]. $! -> [Syntax TypeToken]. $` -> [l readSelector: Syntax MacroSelectorToken] } otherwise: [((c = $+ or: [c = $-]) and: [l peekCharacter isDigit]) ifTrue: [l undoCharacter: c. l readNumber] ifFalse: [l undoCharacter: c. c isDigit ifTrue: [l readNumber] ifFalse: [l readSelector: Syntax SelectorToken]]] ]. l@(Syntax Lexer traits) next [ l readToken ].