requires: {#Stream. #String. #ExtensibleArray}. provides: {#RegExp}. prototypes ensureNamespace: #RegularExpressions. prototypes addImmutableSlot: #RegExp valued: prototypes RegularExpressions. "An alias." "The default scanner is generated by regular expressions from strings. This is the abstract syntax of that system." RegExp addPrototype: #Node derivedFrom: {Cloneable}. "RegularExpressionNode represents a regular expression. The scanner is represented by a regular expression. These are the initial objects created in producing the scanner. From these nodes, we create a directed graph and then we compile the graph." RegExp Node addSlot: #actions. "The Sequence of actions to be performed on a successful match; this generally seems to be a regex String." RegExp Node addSlot: #position. "The position of the RE in the scanner. For multiple matches, the position of the first match." node@(RegExp Node traits) copy [| newN | newN: resend. newN actions: node actions copy. newN ]. node@(RegExp Node traits) , node2 [ RegExp SequenceNode newFor: ({node. node2} as: ExtensibleArray) ]. node@(RegExp Node traits) \/ node2 [ RegExp OrNode newFor: ({node. node2} as: ExtensibleArray) ]. node@(RegExp Node traits) repeat [ RegExp RepeatingNode newFor: re ]. node@(RegExp Node traits) repeatFor: min [| node | node: RegExp RepeatingNode newFor: re. node minimum: min. node ]. node@(RegExp Node traits) repeatFor: min to: max [| node | node: RegExp RepeatingNode newFor: re. node minimum: min. node maximum: max. node ]. node@(RegExp Node traits) possibleMatchesDo: block on: stream [ overrideThis ]. node@(RegExp Node traits) possibleMatchesSize [ overrideThis ]. node@(RegExp Node traits) possibleMatchesDo: block [| stream | stream: String newEmpty writer. node possibleMatchesDo: [block applyWith: stream contents] on: stream. node ]. node@(RegExp Node traits) possibleMatches [| matches | matches: ExtensibleArray newEmpty. node possibleMatchesDo: [| :each | matches add: each]. matches ]. RegExp addPrototype: #CharacterNode derivedFrom: {RegExp Node}. "Represents a regular expression that matches one of a set of characters." RegExp CharacterNode addSlot: #characters. "The SortedSet whose characters are the valid matches for this element. This is generally a String, though. TODO: Make a SortedSet and use it here." node@(RegExp CharacterNode traits) newFor: str "TODO: Make ignorecase: an optional keyword." [| newN | newN: node clone. newN characters: (node sortedCharactersFor: str). newN ]. RegExp addSlot: #ignoreCase valued: False. "TODO: remove this global-level control ASAP!" node@(RegExp CharacterNode traits) sortedCharactersFor: str@(String traits) "This removes duplicates and sorts by ASCII code." "TODO: refactor this into some new more appropriate method." [| chars stream | RegExp ignoreCase ifTrue: [stream: String newEmpty writer. str do: [| :each | stream nextPut: each asUppercase. stream nextPut: each asLowercase]] ifFalse: [stream: (String newSizeOf: str) writer. stream ; str]. chars: (stream contents sortBy: [| :a :b | (a as: Integer) < (b as: Integer)]). chars inject: Nil into: [| :sum :each | sum = each ifFalse: [stream nextPut: each]. each]. stream contents ]. node@(RegExp CharacterNode traits) matchingCharacters [ RegExp ignoreCase ifTrue: [node characters reject: [| :each | each isLowercase]] ifFalse: [characters] ]. node@(RegExp CharacterNode traits) possibleMatchesDo: block on: stream [ node matchingCharacters do: [| :each | stream nextPut: each. block do. stream skip: -1]. node ]. node@(RegExp CharacterNode traits) possibleMatchesSize [ node matchingCharacters size ]. node@(RegExp CharacterNode traits) copy [| newN | newN: resend. newN characters: node characters copy. newN ]. node@(RegExp CharacterNode traits) \/ node2@(RegExp CharacterNode traits) "Specializes the normal Or-node creation to merge the two nodes into one whenever they are consistent." [ (node characters isNotNil and: [node2 characters isNotNil and: [node action = node2 action]]) ifFalse: [^ resend]. node characters: ((((node characters ; node2 characters) as: Set) sortBy: [| :a :b | (a as: Integer) < (b as:Integer)]) as: String). node ]. node@(RegExp CharacterNode traits) print: char on: stream "ASCII codes between 32 and 126 are literally-printable. Everything else is forced to use a hexadecimal rendition in regular expressions." [ ((char as: Integer) between: 32 and: 126) ifTrue: [^ (stream nextPut: char)]. stream ; '\x'. (char as: Integer) printOn: stream base: 16. node ]. node@(RegExp CharacterNode traits) printOn: stream [| allCharacters | node characters size = 1 ifTrue: [^ (node print: node characters first on: stream)]. stream nextPut: $[. allCharacters: node characters. allCharacters size > 128 ifTrue: [stream nextPut: $^. allCharacters: (((0 below: 256) collect: [| :each | each as: ASCIICharacter]) reject: [| :each | node characters includes: each])]. allCharacters do: [| :each | node print: each on: stream]. stream nextPut: $]. node ]. RegExp addPrototype: #OrNode derivedFrom: {RegExp Node}. "Represents an alternative." RegExp OrNode addSlot: #alternatives. "The possible choices." node@(RegExp OrNode traits) copy [| newN | newN: resend. newN alternatives: node alternatives copy. newN ]. node@(RegExp OrNode traits) \/ node2 "Specializes the normal Or-node creation to incorporate the node into the alternatives when it's consistent." [ node action = node2 action ifFalse: [^ resend]. node alternatives add: node2. node ]. node@(RegExp OrNode traits) possibleMatchesDo: block on: stream [ node alternatives do: [| :each pos | pos: stream position. each possibleMatchesDo: block on: stream. stream position: pos]. node ]. node@(RegExp OrNode traits) possibleMatchesSize [ node alternatives inject: 0 into: [| :sum :each | sum + each possibleMatchesSize] ]. node@(RegExp OrNode traits) printOn: stream [ stream nextPut: $(. node alternatives do: [| :each | each printOn: stream] separatedBy: [stream ; '| ']. stream nextPut: $). node ]. RegExp addPrototype: #RepetitionNode derivedFrom: {RegExp Node}. "Represents a repeating character in an expression." RegExp RepetitionNode addSlot: #element. "The repeated element." RegExp RepetitionNode addSlot: #minimum valued: 0. "The minimum number of acceptable occurrences of the element." RegExp RepetitionNode addSlot: #maximum valued: PositiveInfinity. "The maximum number of occurrences which this expression will match." node@(RegExp RepetitionNode traits) newFor: char [| newN | newN: node clone. newN element: char. newN ]. node@(RegExp RepetitionNode traits) possibleMatchesDo: block on: stream [ node possibleMatchesDo: block on: stream startingAt: 0 ]. node@(RegExp RepetitionNode traits) possibleMatchesDo: block on: stream startingAt: start [ (start between: node minimum and: node maximum) ifTrue: [block do]. start = node maximum - 1 ifTrue: [node element possibleMatchesDo: block on: stream] ifFalse: [node element possibleMatchesDo: [node possibleMatchesDo: block on: stream startingAt: start + 1] on: stream]. node ]. node@(RegExp RepetitionNode traits) possibleMatchesSize [ node element possibleMatchesSize * (node maximum - node minimum + 1) ]. node@(RegExp RepetitionNode traits) printOn: stream [ node element printOn: stream. node maximum = PositiveInfinity ifTrue: [node minimum = 0 ifTrue: [^ stream nextPut: $*]. node minimum = 1 ifTrue: [^ stream nextPut: $+]. ^ (stream nextPut: ${. stream ; (node minimum as: String). stream ; ',}')]. stream nextPut: ${. stream ; (node minimum as: String). stream ; (node maximum as: String). stream nextPut: $}. node ]. RegExp addPrototype: #SequenceNode derivedFrom: {RegExp Node}. "Represents the catenation of several other regular expressions together." RegExp SequenceNode addSlot: #elements valued: ExtensibleArray newEmpty. "The child nodes in left-to-right order." node@(RegExp SequenceNode traits) newFor: seq [ s as: node ]. s@(Sequence traits) as: node@(RegExp SequenceNode traits) [| newN | newN: node clone. newN elements: (s as: node elements). newN ]. node@(RegExp SequenceNode traits) copy [| newN | newN: resend. newN elements: node elements copy. newN ]. node@(RegExp SequenceNode traits) , node2 "This specializes the general method to include the new node in the sequence if it is compatible." [ node action = node2 action ifFalse: [^ resend]. node elements add: node2. node ]. node@(RegExp SequenceNode traits) possibleMatchesDo: block on: stream [ node possibleMatchesDo: block on: stream startingAt: 0 ]. node@(RegExp SequenceNode traits) possibleMatchesDo: block on: stream startingAt: start [| each | each: (node elements at: start). node elements size = start ifTrue: [each possibleMatchesDo: block on: stream] ifFalse: [node possibleMatchesDo: [node possibleMatchesDo: block on: stream startingAt: start + 1] on: stream]. node ]. node@(RegExp SequenceNode traits) possibleMatchesSize [ node elements inject: 1 into: [| :sum :each | sum * each possibleMatchesSize] ]. node@(RegExp SequenceNode traits) printOn: stream [ node elements do: [| :each | each printOn: stream] separatedBy: [stream nextPut: $\s]. node ]. RegExp addPrototype: #Lexer derivedFrom: {Cloneable}. RegExp Lexer addSlot: #stream. RegExp Lexer addSlot: #undoBuffer.