(*** Lexical Analysis -- Scanning. From Chapter 9. ***) infix mem; infix 5 --; infix 3 >>; infix 0 ||; (*Result signature of LexicalFUN*) signature LEXICAL = sig type instream datatype token = Id of string | Key of string | Num of int val scan : instream -> token list end (*All characters are covered except octal 0-41 (nul-space) and 177 (del), which are ignored. *) functor LEXICAL (structure Basic : BASIC and Keyword : KEYWORD and StreamIO: TEXT_STREAM_IO) : LEXICAL = struct local open Basic StreamIO Char StringCvt in type instream = StreamIO.instream datatype token = Key of string | Id of string | Num of int; fun input_lc s = case (input1 s) of SOME (c, s') => SOME (Char.toLower c, s') | NONE => NONE val specials = explode"!@#$%^&*()+-={}[]:\"|'\\,./?`~<>"; fun isDash c = c = #"-" and isUnderscore c = c = #"_" and isSemicolon c = c = #";" and isEOL c = c = #"\n" (*an alpha identifier or keyword*) fun myAlpha c = isAlphaNum c orelse isDash c orelse isUnderscore c fun tokenof a = if a mem Keyword.alphas then Key(a) else Id(a); (*scanning of a symbolic keyword*) fun symbolic (sy, s) = if sy mem Keyword.symbols then (sy, s) else case input_lc s of SOME (c, s') => if Char.isPunct c then symbolic (sy^ String.str c, s') else (sy, s) | NONE => (sy, s) (*Scanning a list of characters into a list of tokens*) fun scanning (toks, s) = case input_lc s of NONE => rev toks (*end of chars*) | SOME (c, s') => if isSemicolon c then let val (_, s'') = inputLine s' in scanning (toks, s'') end (*comment*) else if isAlpha c orelse isUnderscore c then (*identifier or keyword*) let val (id, s') = splitl myAlpha input_lc s in scanning (tokenof id :: toks, s') end else if isDigit c then (* decimal numeral *) case Int.scan StringCvt.DEC input_lc s of SOME (n,cs') => scanning (Num n :: toks, cs') | NONE => raise Fail "Expected a number" else if c mem specials then (*special symbol*) let val (sy, cs') = symbolic ("",s) in scanning (Key sy :: toks, cs') end else scanning (toks, skipWS input_lc s') (*spaces, line breaks, strange characters are ignored*) fun scan s = scanning([],s); end end;