(asdf-load :cl-ppcre :split-sequence) (defparameter *pl1-keywords* "a abnormal act(ivate)? addbuff alias aligned alloc(ate)? anycond(ition)? area ascii assignable|asgn asm|assembler attach attention|attn auto(matic)? b b1 b2 b3 b4 backwards based begin bigendian bin(ary)? bit bkwd blksize buf(fered)? buff(ers)? buffoff bufnd bufni bufsp builtin by byaddr byvalue bx c call cdecl cell char(acter)? charg(raphic)? check close cobol col(umn)? complex|cplx conn(ected)? cond(ition)? consecutive const(ant)? ctlasa ctl360 controlled|ctl conv(ersion)? copy d db data date dcl|declare deact(ivate)? dec(imal)? dft|default delay delete define def(ined)? descriptor descriptors detach dim(ension)? direct display do downthru e edit else endendfile endpage entry env(ironment)? error event excl(usive)? exit exports ext(ernal)? f fb fs fbs fetch fetchable file finish fixed fixedoverflow|fofl float flush free forever fortran format from fromalien g generic genkey get go goto graphic gx handle hexadec i ieee if ignore imported in include indexarea indexed init(ial)? inline input inter interactive int(ernal)? into invalidop irred(ucible)? iterate key keyed keyfrom keylength keyloc keyto label leave limited like line linesize linkage list littleendian local locate loop m main name ncp nocharg(raphic)? nocheck noconv(ersion)? nodescriptor noexecops nof(ixedoverflow|ofl) nolock nonassignable|nonasgn nonconn(ected)? none nonvar(ying)? non_quick no_quick_blocks noinit noinline noinvalidop noo(verflow|fl) noprint normal nosize nosub(scriptrange|rg) nostr(ingrange|g) nostr(ingsize|z) note nou(nderflow|fl) nowrite noz(erodivide|div) offset on open optional options optlink order ordinal other(wise)? output overflow|ofl p package packed_decimal packed page pagesize par(m|ameter) password pending pic(ture)? pointer|ptr pos(ition)? prec(ision)? print priority proc(edure)? put r range read real record recsize recursive red(ucible)? reentrant refer regional release rename reorder repeat replace reply reread reserved reserves resignal retcode return returns reuse revert rewrite scalarvarying select separate_static set sequential|seql signal signed sis size skip snap static stdcall storage stop stream string stringrange|strg stringsize|strz stringvalue structure sub subscriptrange|subrg support system task then thread title to total tp transient transmit trkofl tstack type u unal(igned)? unbuf(fered)? unconnected undefinedfile|undf underflow|ufl union unlock uns(igned)? until update upthru v validate value variable var(ying)? var(ying)?z vb vbs vs vsam wait when w(ide)?char winmain while write wx x xn xu zerodivide|zdiv ") (cl-ppcre::parse-string "zerodivide|zdiv") --> (:ALTERNATION "zerodivide" "zdiv") (cl-ppcre::parse-string "var(ying)?z") --> (:SEQUENCE "var" (:GREEDY-REPETITION 0 1 (:REGISTER "ying")) #\z) (cl-ppcre::parse-string "par(m|ameter)") --> (:SEQUENCE "par" (:REGISTER (:ALTERNATION #\m "ameter"))) (defgrammar lex-regexp () (regexp ::= term terms) (terms ::= (alt (seq term terms) empty)) (empty ::=) (term ::= ) ) (defun parse-lex-regexp (regexp) (let ((result '())) (labels ((remove-register (expr) (cond ((atom expr) expr) ((eq :register (first expr)) (remove-register (second expr))) (t (mapcar (function remove-register) expr)))) (generate-keywords (expr) (cond ((characterp expr) (list (string expr))) ((stringp expr) (list expr)) ((atom expr) (error "~S unexpected ~S" 'GENERATE-KEYWORDS expr)) (t (ecase (first expr) ((:alternation) (mapcan (function generate-keywords) (rest expr))) ((:sequence) (mapcar (lambda (seq) (apply (function concatenate) 'string seq)) (apply (function COM.INFORMATIMAGO.COMMON-LISP.LIST:COMBINE) (mapcar (function generate-keywords) (rest expr))))) ((:greedy-repetition) (destructuring-bind (g min max subexpr) expr (if (and (integerp min) (integerp max) (<= min max)) (apply (function COM.INFORMATIMAGO.COMMON-LISP.LIST:COMBINE) (generate-keywords subexpr)) (error "~S unexpected bounds in ~S" 'GENERATE-KEYWORDS expr))))))))) (generate-keywords (remove-register (cl-ppcre::parse-string regexp)))))) ;; (mapcar 'parse-lex-regexp ;; '("write" "zerodivide|zdiv" "var(ying)?z" "var((yi)(ng))?z" "par(m|ameter)" ;; "((a|b)(c|d))" "beg((a|b)(c|d))?end")) (defparameter *expanded-keywords* (mapcar (lambda (regexp) (parse-lex-regexp (cl:string-trim " " regexp))) (split-sequence:split-sequence #\newline *pl1-keywords* :remove-empty-subseqs t))) ;; (reduce (function append) *expanded-keywords*) ;; (("a") ("abnormal") ("activate") ("addbuff") ("alias") ("aligned") ("allocate") ;; ("anycondition") ("area") ("ascii") ("assignable" "asgn") ("asm" "assembler") ;; ("attach") ("attention" "attn") ("automatic") ("b") ("b1") ("b2") ("b3") ;; ("b4") ("backwards") ("based") ("begin") ("bigendian") ("binary") ("bit") ;; ("bkwd") ("blksize") ("buffered") ("buffers") ("buffoff") ("bufnd") ("bufni") ;; ("bufsp") ("builtin") ("by") ("byaddr") ("byvalue") ("bx") ("c") ("call") ;; ("cdecl") ("cell") ("character") ("chargraphic") ("check") ("close") ("cobol") ;; ("column") ("complex" "cplx") ("connected") ("condition") ("consecutive") ;; ("constant") ("ctlasa") ("ctl360") ("controlled" "ctl") ("conversion") ;; ("copy") ("d") ("db") ("data") ("date") ("dcl" "declare") ("deactivate") ;; ("decimal") ("dft" "default") ("delay") ("delete") ("define") ("defined") ;; ("descriptor") ("descriptors") ("detach") ("dimension") ("direct") ("display") ;; ("do") ("downthru") ("e") ("edit") ("else") ("endendfile") ("endpage") ;; ("entry") ("environment") ("error") ("event") ("exclusive") ("exit") ;; ("exports") ("external") ("f") ("fb") ("fs") ("fbs") ("fetch") ("fetchable") ;; ("file") ("finish") ("fixed") ("fixedoverflow" "fofl") ("float") ("flush") ;; ("free") ("forever") ("fortran") ("format") ("from") ("fromalien") ("g") ;; ("generic") ("genkey") ("get") ("go") ("goto") ("graphic") ("gx") ("handle") ;; ("hexadec") ("i") ("ieee") ("if") ("ignore") ("imported") ("in") ("include") ;; ("indexarea") ("indexed") ("initial") ("inline") ("input") ("inter") ;; ("interactive") ("internal") ("into") ("invalidop") ("irreducible") ;; ("iterate") ("key") ("keyed") ("keyfrom") ("keylength") ("keyloc") ("keyto") ;; ("label") ("leave") ("limited") ("like") ("line") ("linesize") ("linkage") ;; ("list") ("littleendian") ("local") ("locate") ("loop") ("m") ("main") ;; ("name") ("ncp") ("nochargraphic") ("nocheck") ("noconversion") ;; ("nodescriptor") ("noexecops") ("nofixedoverflow" "nofofl") ("nolock") ;; ("nonassignable" "nonasgn") ("nonconnected") ("none") ("nonvarying") ;; ("non_quick") ("no_quick_blocks") ("noinit") ("noinline") ("noinvalidop") ;; ("nooverflow" "noofl") ("noprint") ("normal") ("nosize") ;; ("nosubscriptrange" "nosubrg") ("nostringrange" "nostrg") ;; ("nostringsize" "nostrz") ("note") ("nounderflow" "noufl") ("nowrite") ;; ("nozerodivide" "nozdiv") ("offset") ("on") ("open") ("optional") ("options") ;; ("optlink") ("order") ("ordinal") ("otherwise") ("output") ("overflow" "ofl") ;; ("p") ("package") ("packed_decimal") ("packed") ("page") ("pagesize") ;; ("parm" "parameter") ("password") ("pending") ("picture") ("pointer" "ptr") ;; ("position") ("precision") ("print") ("priority") ("procedure") ("put") ("r") ;; ("range") ("read") ("real") ("record") ("recsize") ("recursive") ("reducible") ;; ("reentrant") ("refer") ("regional") ("release") ("rename") ("reorder") ;; ("repeat") ("replace") ("reply") ("reread") ("reserved") ("reserves") ;; ("resignal") ("retcode") ("return") ("returns") ("reuse") ("revert") ;; ("rewrite") ("scalarvarying") ("select") ("separate_static") ("set") ;; ("sequential" "seql") ("signal") ("signed") ("sis") ("size") ("skip") ("snap") ;; ("static") ("stdcall") ("storage") ("stop") ("stream") ("string") ;; ("stringrange" "strg") ("stringsize" "strz") ("stringvalue") ("structure") ;; ("sub") ("subscriptrange" "subrg") ("support") ("system") ("task") ("then") ;; ("thread") ("title") ("to") ("total") ("tp") ("transient") ("transmit") ;; ("trkofl") ("tstack") ("type") ("u") ("unaligned") ("unbuffered") ;; ("unconnected") ("undefinedfile" "undf") ("underflow" "ufl") ("union") ;; ("unlock") ("unsigned") ("until") ("update") ("upthru") ("v") ("validate") ;; ("value") ("variable") ("varying") ("varyingz") ("vb") ("vbs") ("vs") ("vsam") ;; ("wait") ("when") ("widechar") ("winmain") ("while") ("write") ("wx") ("x") ;; ("xn") ("xu") ("zerodivide" "zdiv"))