~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/tcl/applications/uc/xml-parse.tcl

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 # xml-parse.tcl --
  2 #
  3 #       XML Parser
  4 
  5 
  6 #--------------------------------------------------
  7 #
  8 #Class XMLParser - need this comment for import
  9 #
 10 #--------------------------------------------------
 11 
 12 
 13 # $Id: xml-parse.tcl,v 1.4 2004/05/27 01:43:39 aswan Exp $
 14 #
 15 # Copyright (c) 1997 Australian National University (ANU).
 16 #
 17 # ANU makes this software and all associated data and documentation
 18 # ('Software') available free of charge for non-commercial purposes only. You
 19 # may make copies of the Software but you must include all of this notice on
 20 # any copy.
 21 #
 22 # The Software was developed for research purposes and ANU does not warrant
 23 # that it is error free or fit for any purpose.  ANU disclaims any
 24 # liability for all claims, expenses, losses, damages and costs any user may
 25 # incur as a result of using, copying or modifying the Software.
 26 
 27 ##########################################
 28 #
 29 # XML Parser
 30 #
 31 # This package provides procedures for parsing
 32 # XML DTDs and document instances.
 33 #
 34 # It uses the XML namespace.
 35 #
 36 ##########################################
 37 
 38 package provide XML 1.0
 39 
 40 namespace eval XML {
 41     namespace export parse parse:idx pretty_print pretty_print:idx
 42     namespace export parse:DTD
 43 
 44 # XML:parse:idx --
 45 #
 46 # Arguments:
 47 #       xml             Document instance
 48 #       args    option/value pairs
 49 #
 50 # Recognised Options:
 51 #       -textcommand    Default parse:text
 52 #       -elementcommand Default parse:elem
 53 #       -picommand      Default parse:pi
 54 #       -commentcommand Default None
 55 #       -dtd            DTD to use for document
 56 #       -loaddtdcommand Script to load and parse DTD
 57 #       -errorcommand   Script to evaluate if error found
 58 #       -warningcommand Script to evaluate for a reportable warning
 59 #       -unusedvariable global variable for storing unused text
 60 #       -statevariable  global parser variable
 61 #       -start          Default true
 62 #       -end            Default true
 63 #
 64 # Return value:
 65 #       None.
 66 
 67 proc parse:idx {xml args} {
 68     array set opts {
 69         -start 1 -end 1
 70         -removewhitespace 0
 71         -textcommand            parse:text
 72         -elementcommand         parse:elem
 73         -picommand              parse:pi
 74         -loaddtdcommand         noop
 75         -errorcommand           noop
 76         -warningcommand         noop
 77     }
 78     array set opts [list \
 79         -statevariable          [namespace current]::parse_state        \
 80     ]
 81     array set opts $args        ;# very rudimentary option processing
 82     regsub {^(1|yes|true|on)$} $opts(-start) 1 opts(-start)
 83     regsub {^(0|no|false|off)$} $opts(-start) 0 opts(-start)
 84     regsub {^(1|yes|true|on)$} $opts(-end) 1 opts(-end)
 85     regsub {^(0|no|false|off)$} $opts(-end) 0 opts(-end)
 86 
 87     # First, transform the XML into a (flat) Tcl list, ala Uhler
 88 
 89     # Protect Tcl special characters
 90     regsub -all {([{}\\])} $xml {\\\1} xml
 91 
 92     # Do the translation
 93     set w " \t\r\n"     ;# white space
 94     set n "\r\n"        ;# newline
 95     set no_ws [cl ^$w]
 96     set nmtoken [cl -a-zA-Z0-9._]+      ;# NMTOKENs
 97     set name [cl a-zA-Z_][cl -a-zA-Z0-9._]*
 98 
 99     # steve: check whether XML swallows newlines before and/or after a tag
100     # this version swallows
101     # Answer: don't swallow here, but collapse ws for element-only content
102     set exp <(/?)([cl ^$w>]+)[cl $w]*([cl ^>]*)(/?)>
103     set sub "\}\n{\\2} {\\1} {\\4} {\\3} \{"
104     if {[info exists opts(-unusedvariable)]} {
105         upvar #0 $opts(-unusedvariable) unused
106         if {[info exists unused]} {
107             # Sanity check
108             if {$opts(-start)} {
109                 error {specified start of XML, but found unused XML text}
110             }
111             regsub -all $exp "$unused$xml" $sub xml
112             unset unused
113         } else {
114             regsub -all $exp $xml $sub xml
115         }
116         set xml "{} {} {} {} \{$xml\}"
117         if {[regexp {^([^<]*)(<[^>]*$)} [lindex $xml end] x text unused]} {
118             set xml [lreplace $xml end end $text]
119         }
120     } else {
121         regsub -all $exp $xml $sub xml
122         set xml "{} {} {} {} \{$xml\}"
123     }
124 
125     dbgputs parse {Flat list:}
126     foreach {tag close empty param text} $xml {
127         dbgputs parse [list $tag $close $empty $param $text]
128     }
129     dbgputs parse {}
130 
131     # Now process this flat representation into a heirarchical list.
132     # Start with no context.
133 
134     upvar #0 $opts(-statevariable) state
135     if {[info exists opts(-dtd)]} {
136         array set contentmodel [lindex $opts(-dtd) 0]
137         array set attributes [lindex $opts(-dtd) 1]
138         array set entities [lindex $opts(-dtd) 2]
139     } else {
140         # Parse the document without a DTD.  All content models assume
141         # ANY, all attributes are allowed and the basic entities are
142         # available.
143         #array set contentmodel {}
144         #array set attributes {}
145         array set entities {amp & lt < gt > quot \" apos '}
146     }
147 
148     if {$opts(-start)} {
149         # The document prologue gives us the required DTD and
150         # starting context, from which we can determine the start
151         # content model.
152         catch {unset state}
153         array set state [list mode normal RMD ALL cmodel {} context {} stack $opts(-statevariable)(container0) level 0 cid 0 preserve 0 pre_stack {}]
154         upvar #0 [lindex $state(stack) end] currContainer
155         set currContainer {}
156     } else {
157         upvar #0 [lindex $state(stack) end] currContainer
158     }
159 
160     foreach {tag close empty param text} $xml {
161         # If the current mode is cdata or comment then we must undo what the
162         # regsub above has done to reconstitute the data
163         switch $state(mode) {
164             comment {
165                 if {![string compare {} $param] && [regexp ([cl ^-]*)--[cl $w]*\$ $tag x comm1]} {
166                     # Found the end of the comment
167                     dbgputs parse {end of comment (in tag)}
168                     catch {append currContainer "\n$opts(-commentcommand) [list "$state(comment_text)<$close$comm1"] {} {}"}
169                     set tag {}
170                     unset state(comment_text)
171                     set state(mode) normal
172                 } elseif {[regexp ([cl ^-]*)--[cl $w]*\$ $param x comm1]} {
173                     # Found the end of the comment
174                     dbgputs parse {end of comment (in attributes)}
175                     catch {append currContainer "\n$opts(-commentcommand) [list "$state(comment_text)<$close$tag $comm1"] {} {}"}
176                     set tag {}
177                     set param {}
178                     unset state(comment_text)
179                     set state(mode) normal
180                 } elseif {[regexp ([cl ^-]*)--[cl $w]*>(.*) $text x comm1 text]} {
181                     # Found the end of the comment
182                     dbgputs parse {end of comment (in text)}
183                     catch {append currContainer "\n$opts(-commentcommand) [list "$state(comment_text)<$close$tag $param>$comm1"] {} {}"}
184                     set tag {}
185                     set param {}
186                     unset state(comment_text)
187                     set state(mode) normal
188                 } else {
189                     dbgputs parse {comment continues}
190                     append state(comment_text) <$close$tag { } $param$empty>$text
191                     continue
192                 }
193             }
194             cdata {
195                 if {![string compare {} $param] && [regexp ([cl ^\]]*)\]\][cl $w]*\$ $tag x cdata1]} {
196                     # Found the end of the CDATA
197                     dbgputs parse {end of CDATA (in tag)}
198                     set text $state(cdata)<$close$cdata1$text
199                     set tag {}
200                     unset state(cdata)
201                     set state(mode) normal
202                 } elseif {[regexp ([cl ^\]]*)\]\][cl $w]*\$ $param x cdata1]} {
203                     # Found the end of the CDATA
204                     dbgputs parse {end of CDATA (in attributes)}
205                     set text $state(cdata)<$close$tag\ $cdata1$text
206                     set tag {}
207                     set param {}
208                     unset state(cdata)
209                     set state(mode) normal
210                 } elseif {[regexp ([cl ^\]]*)\]\][cl $w]*>(.*) $text x cdata1 text]} {
211                     # Found the end of the CDATA
212                     dbgputs parse {end of CDATA (in text)}
213                     set text $state(cdata)<$close$tag\ $param$empty>$cdata1$text
214                     set tag {}
215                     set param {}
216                     unset state(cdata)
217                     set state(mode) normal
218                 } else {
219                     dbgputs parse {CDATA continues}
220                     append state(cdata) <$close$tag { } $param$empty>$text
221                     continue
222                 }
223             }
224             markupdecl {
225                 if {[regexp [cl $w]*([cl ^$w])[cl $w](.*) $param x id value]} {
226                     catch {parse:dtd:[string toupper $tag] $id $value} err
227                     continue
228                 } else {
229                     eval $opts(-errorcommand) [list "bad syntax in internal DTD subset declaration \"<$tag $param>\""]
230                 }
231                 if {[regexp {]>} $text]} {
232                     # Ignore other text, since we're in a PI
233                     set tag {}
234                     set param {}
235                     set text {}
236                     set state(mode) normal
237                 }
238             }
239         }
240         # default: normal mode
241 
242         # Fold tag to uppercase and process attributes
243         set ftag [string toupper $tag]
244         catch {unset params}
245         array set params [parse:attrs $param]
246         # A validator would now check the attributes against the attribute list
247         # We won't bother, but (later) we will search the list for #FIXED attributes
248 
249         dbgputs parse [list processing $close$ftag$empty - context [lindex $state(context) end] container [lindex $state(stack) end] cmodel [lindex $state(cmodel) end] level $state(level) containerid $state(cid) pre $state(preserve)]
250 
251         switch -glob -- [regexp {^\?|!.*} $ftag],$close,$empty {
252             0,, {
253                 # Start tag for a non-empty (container) element.
254 
255                 parse:ifValid opts $ftag [list parse:element:open opts $ftag params]
256             }
257             0,/, {
258                 # End tag for an element.
259 
260                 # This tag (the end tag) should match the current context (which was the open tag).
261                 if {[string compare $ftag [lindex $state(context) end]]} {
262                     # Go looking for the matching start tag
263                     dbgputs XML_parse [list end tag $ftag doesn't match open container [lindex $state(context) end]]
264                     # First check whether the start tag is there at all
265                     if {[lsearch $state(context) $ftag] >= 0} {
266                         dbgputs XML_parse [list open tag found - closing intervening containers]
267                         while {[string compare $ftag [lindex $state(context) end]]} {
268                             dbgputs XML_parse [list closing intervening container [lindex $state(context) end]]
269                             parse:element:close opts
270                         }
271                         # Now finally close the container
272                         parse:element:close opts
273                         eval $opts(-errorcommand) unbalanced [list "unbalanced open tags for close tag \"$ftag\""]
274                     } else {
275                         # No start tag found - ignore this tag
276                         dbgputs XML_parse [list ignoring close tag $ftag - start tag not found]
277                         eval $opts(-errorcommand) unmatchedclose [list "ignoring close tag for element \"$ftag\": no matching start tag"]
278                     }
279 
280                 } else {
281                     dbgputs XML_parse [list close container $ftag]
282                     parse:element:close opts
283                 }
284             }
285             0,,/ {
286                 # Empty element
287 
288                 parse:ifValid opts $ftag [list append currContainer "\n$opts(-elemcommand) $ftag [list [array get params]] {}"]
289             }
290             1,, {
291                 # Processing instructions or XML declaration
292                 switch -glob -- $ftag {
293                     \\?XML {
294                         # $params(RMD) indicates whether DTD is required or not
295                         catch {
296                             if {![regexp {(NONE|INTERNAL|ALL)} [string toupper $params(RMD)] x state(RMD)]} {
297                                 eval $opts(-errorcommand) invalidvalue [list "invalid value \"$params(RMD)\" for Required Markup Declaration"]
298                             }
299                         }
300                         append currContainer "\n$opts(-picommand) $ftag [list [array get params]] {}"
301                     }
302                     \\?* {
303                         append currContainer "\n$opts(-picommand) $ftag [list [array get params]] {}"
304                     }
305                     !DOCTYPE {
306                         # Parse the params supplied.  Looking for Name, ExternalID and MarkupDecl
307                         regexp ^[cl $w]*($name)(.*) $param x state(doc_name) param
308                         set state(doc_name) [string toupper $state(doc_name)]
309                         set externalID {}
310                         if {[regexp -nocase ^[cl $w]*(SYSTEM|PUBLIC)(.*) $param x id param]} {
311                             switch [string toupper $id] {
312                                 SYSTEM {
313                                     if {[regexp ^[cl $w]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $w]+'([cl ^']*)'(.*) $param x systemlit param]} {
314                                         set externalID [list SYSTEM $systemlit]
315                                     } else {
316                                         eval $opts(-errorcommand) syntax [list "SYSTEM identifier not followed by literal"]
317                                     }
318                                 }
319                                 PUBLIC {
320                                     if {[regexp ^[cl $w]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $w]+'([cl ^']*)'(.*) $param x pubidlit param]} {
321                                         if {[regexp ^[cl $w]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $w]+'([cl ^']*)'(.*) $param x systemlit param]} {
322                                             set externalID [list PUBLIC $pubidlit $systemlit]
323                                         } else {
324                                             eval $opts(-errorcommand) syntax [list "PUBLIC identifier not followed by system literal"]
325                                         }
326                                     } else {
327                                         eval $opts(-errorcommand) syntax [list "PUBLIC identifier not followed by literal"]
328                                     }
329                                 }
330                             }
331                             if {[regexp -nocase ^[cl $w]+NDATA[cl $w]+($name)(.*) $param x notation param]} {
332                                 lappend externalID $notation
333                             }
334                         }
335 
336                         append currContainer "\n$opts(-picommand) $ftag [list $externalID] {}"
337 
338                         catch {
339                             # If a DTD is supplied on the command line, it overrides any DTD specified by the PI
340                             if {![string compare {} $opts(-dtd)] && ![string compare ALL $state(RMD)]} {
341                                 eval $opts(-loaddtdcommand) $externalID
342                             }
343                         }
344 
345                         # Now parse markupdecl
346                         if {[regexp [format {^[%s]*\[[^<]*<([^%s]+)[%s]*([^%s]*)[%s]*(.*)} $w $w $w $w $w] $param x decl id value]} {
347                             set state(mode) markupdecl
348                             catch {parse:dtd:[string toupper $decl] $id $value} err
349                         }
350 
351                         if {[info exists contentmodel]} {
352                             set state(cmodel) [list [list $state(doc_name) {}]]
353                             dbgputs XML_parse [list !doctype set cmodel to $state(cmodel)]
354                         }
355                     }
356                     !--* {
357                         # Start of a comment
358                         # See if it ends in the same tag, otherwise change the
359                         # parsing mode
360                         regexp {!--(.*)} $tag x comm1
361                         if {[regexp ([cl ^-]*)--[cl $w]*\$ $param x comm2]} {
362                             dbgputs parse {processed comment (end in attributes)}
363                             catch {append currContainer "\n$opts(-commentcommand) [list $comm1$comm2] {} {}"}
364                         } elseif {[regexp ([cl ^-]*)--[cl $w]*\$ $text x comm2]} {
365                             dbgputs parse {processed comment (end in text)}
366                             catch {append currContainer "\n$opts(-commentcommand) [list "$comm1 $param>$comm2"] {} {}"}
367                         } else {
368                             dbgputs parse {start of comment}
369                             set state(mode) comment
370                             set state(comment_text) "$comm1 $param>$text"
371                             continue
372                         }
373                     }
374                     ![CDATA[* {
375                         dbgputs parse {start CDATA section}
376                         regexp {!\[CDATA\[(.*)} $tag x cdata1
377                         if {[regexp {([^\]]*)]]$} $param x cdata2]} {
378                             dbgputs parse {processed CDATA (end in attribute)}
379                             set text "$cdata1 $cdata2$text"
380                         } elseif {[regexp {([^\]]*)]]>(.*)} $text x cdata2 text]} {
381                             dbgputs parse {processed CDATA (end in text)}
382                             set text "$cdata1 $param>$cdata2$text"
383                         } else {
384                             dbgputs parse {start CDATA}
385                             set state(cdata) "$cdata1 $param$text"
386                             set state(mode) cdata
387                             continue
388                         }
389                     }
390 
391 
392                     !ELEMENT {
393                         # Internal DTD declaration
394                     }
395                     !ATTLIST {
396                     }
397                     !ENTITY {
398                     }
399                     !NOTATION {
400                     }
401 
402 
403                     !* {
404                         append currContainer "\n$opts(-picommand) $ftag [list $param] {}"
405                     }
406                     default {
407                         eval $opts(-errorcommand) unknown [list "unknown processing instruction \"<$ftag>\""]
408                     }
409                 }
410             }
411             1,* -
412             0,/,/ {
413                 # Syntax error
414                 eval $opts(-errorcommand) syntax [list "syntax error: tag \"</$ftag/>\""]
415             }
416         }
417 
418         # Add the text to the current container
419         # Must check that #PCDATA is in the content model
420 
421         # XML states that white space is removed for element-only content,
422         # but is forwarded to the application in mixed content.
423         # The attribute XML-SPACE="PRESERVE" can be used to force white space to
424         # be kept.  However, applications may want white space collapsed wherever
425         # possible.  -removewhitespace may be used to indicate this behaviour.
426 
427         set text [expr {($state(preserve) || ![info exists contentmodel]) || !$opts(-removewhitespace) ? [entity $text] : [zap_white [entity $text]]}]
428         switch -glob [info exists contentmodel([lindex $state(context) end])],[string compare 0 [regexp $no_ws $text]] {
429             1,-1 {
430                 dbgputs XML_parse [list processing #PCDATA - current content model [lindex $state(cmodel) end]]
431                 # Is #PCDATA valid for the current content model?
432                 parse:ifValid opts #PCDATA [list append currContainer "\n$opts(-textcommand) [list $text] {} {}"]
433             }
434             0,-1 {
435                 # No content model for this element, assume ANY
436                 dbgputs XML_parse [list no content model for current element, assuming ANY]
437                 append currContainer "\n$opts(-textcommand) [list $text] {} {}"
438             }
439         }
440     }
441 
442     # If this is the end of the document, close all open containers
443     if {$opts(-end)} {
444         while {$state(level) > 0} {
445             dbgputs XML_parse [list at end - popping stack]
446             parse:element:close opts
447         }
448     }
449 
450     return {}
451 }
452 
453 # Support routines
454 
455 proc cl x {return "\[$x\]"}
456 set w " \t\r\n" ;# white space
457 
458 proc parse:ifValid {optvar content validcmd} {
459     upvar $optvar opts
460     upvar #0 $opts(-statevariable) state
461     upvar contentmodel cm
462 
463     # Is this element valid for the current content model?
464     if {![info exists cm] && [string compare {} $content]} {
465         # There is no content model.
466         uplevel 1 $validcmd
467     } elseif {![string compare * [lindex [lindex $state(cmodel) end] 0]] || [lsearch [lindex [lindex $state(cmodel) end] 0] $content] >= 0} {
468         dbgputs XML_parse [list content $content is OK]
469         # Modify the content model according to the current rep
470         switch [lindex [lindex $state(cmodel) end] 1] {
471             {} -
472             ? {
473                 # remove this model from the current content model
474                 set state(cmodel) [lreplace $state(cmodel) end end [lreplace [lindex $state(cmodel) end] 0 1]]
475             }
476             * -
477             + {# no change necessary}
478         }
479 
480         # Go ahead and run script
481         uplevel 1 $validcmd
482 
483     } else {
484         dbgputs XML_parse [list element $content not in content model]
485         # There are four options:
486         #   - as reps of content models allow, search ahead for a context where this element is valid, and modify content model appropriately
487         #   - search ahead through the content model and imply intervening elements
488         #   - close elements until an element lower in the stack allows this element
489         #   - ignore this element (a "hard" error)
490     }
491 }
492 
493 proc parse:element:open {optvar tag param} {
494     upvar $optvar opts
495     upvar #0 $opts(-statevariable) state
496     upvar contentmodel cm
497     upvar currContainer current
498     upvar $param params
499 
500     dbgputs XML_parse [list parse:element:open current: level $state(level) cid $state(cid) cmodel $state(cmodel)]
501     incr state(level)   ;# invariant: $state(level) == [llength $state(stack)]
502     set container $opts(-statevariable)(container[incr state(cid)])
503     append current "\n$opts(-elementcommand) $tag [list [array get params]] $container"
504     lappend state(stack) $container
505     uplevel "upvar #0 $container currContainer; set currContainer {}"
506     lappend state(context) $tag
507     catch {lappend state(cmodel) $cm($tag)}
508     dbgputs XML_parse [list content model is now $state(cmodel)]
509 
510     # Need to keep a reference count for handling nested preformatted elements
511     if {[catch {
512         if {![string compare PRESERVE $params(XML-SPACE)]} {
513             dbgputs XML_parse {starting preformatted text}
514             incr state(preserve)
515             lappend state(pre_stack) 1
516         } else {
517             lappend state(pre_stack) 0
518         }
519     }]} {
520         lappend state(pre_stack) 0
521     }
522 
523     return $container
524 }
525 
526 proc parse:element:close optvar {
527     upvar $optvar opts
528     upvar #0 $opts(-statevariable) state
529 
530     # Decrement preformatted reference count appropriately
531     incr state(preserve) -[lindex $state(pre_stack) end]
532     set state(pre_stack) [lreplace $state(pre_stack) end end]   ;# pop stack
533 
534     incr state(level) -1
535     set state(stack) [lreplace $state(stack) end end]
536     set state(context) [lreplace $state(context) end end]
537     catch {set state(cmodel) [lreplace $state(cmodel) end end]}
538     uplevel "upvar #0 [lindex $state(stack) end] currContainer"
539     dbgputs XML_parse [list parse:element:close $optvar - new: level $state(level) stack $state(stack) pre $state(preserve)]
540 }
541 
542 # parse:attrs --
543 #
544 # Parse attributes.  There are two forms for name-value pairs:
545 #
546 #       name="value"
547 #       name='value'
548 #
549 # Arguments:
550 #       attrs   attribute string given in a tag
551 #
552 # Return Value:
553 #       A Tcl list representing the name-value pairs in the attribute string
554 
555 proc parse:attrs {attrs} {
556     # First check whether there's any work to do
557     if {![string compare {} [string trim $attrs]]} {
558         return {}
559     }
560 
561     # Protect Tcl special characters
562     regsub -all {([[\$\\])} $attrs {\\\1} attrs
563 
564     set ws "\n\t "
565     regsub -all [format {([a-zA-Z0-9]+)[%s]*=[%s]*"([^"]*)"} $ws $ws] $attrs {[set parsed([string toupper {\1}]) {\2}] } attrs ;# "
566     regsub -all [format {([a-zA-Z0-9]+)[%s]*=[%s]*'([^']*)'} $ws $ws] $attrs {[set parsed([string toupper {\1}]) {\2}] } attrs
567     subst $attrs
568 
569     return [array get parsed]
570 }
571 
572 # parse --
573 #
574 # Parse XML text into a Tcl heirarchical list format.
575 # First parses into indexed list format, and then expands the
576 # indexed structures to arrive at final format.
577 
578 proc parse {xml args} {
579     array set opts [list \
580         -statevariable  [namespace current]::parse_state \
581         -textcommand    parse:text \
582         -elemcommand    parse:elem \
583         -picommand      parse:pi \
584         -commentcommand parse:comment \
585     ]
586     array set opts $args
587 
588     eval parse:idx [list $xml] $args
589 
590     return [parse:expand $opts(-statevariable) $opts(-elemcommand) $opts(-textcommand) $opts(-picommand) $opts(-commentcommand)]
591 }
592 
593 proc parse:expand {state elemcmd txtcmd picmd commentcmd} {
594     upvar #0 $state data
595     return [parse:expand_int ${state}(container0) $elemcmd $txtcmd $picmd $commentcmd]
596 }
597 
598 proc parse:expand_int {container elemcmd txtcmd picmd commentcmd} {
599     upvar #0 $container data
600     if {![info exists data]} return;
601     set ret {}
602     foreach {type arg1 arg2 arg3} $data {
603         switch $type \
604             $elemcmd {
605                 lappend ret $type $arg1 $arg2 [parse:expand_int $arg3 $elemcmd $txtcmd $picmd $commentcmd]
606             } \
607             $txtcmd - \
608             $picmd - \
609             $commentcmd {
610                 lappend ret $type $arg1 $arg2 $arg3
611             } \
612             default {error [list $type doesn't match $elemcmd or $txtcmd]}
613     }
614     return $ret
615 }
616 
617 # Print out a nice representation of a parsed XML structure
618 
619 proc pretty_print {xml {indent {}} {elemcmd parse:elem} {txtcmd parse:text} {picmd parse:pi} {commentcmd parse:comment}} {
620     set ret {}
621     foreach {type arg1 arg2 arg3} $xml {
622         #puts "$indent"
623         switch $type \
624             $picmd - \
625             $commentcmd - \
626             $txtcmd {
627                 if {[string compare {} $indent]} {
628                     append ret [format %${indent}s { }]
629                 }
630                 append ret [list $arg1]\n
631             } \
632             $elemcmd {
633                 if {[string compare {} $indent]} {
634                     append ret [format %${indent}s { }]
635                 } else {
636                     append indent 0
637                 }
638                 append ret "[list $arg1] [list $arg2] \{\n"
639                 append ret [pretty_print $arg3 [expr $indent + 4] $elemcmd $txtcmd $picmd $commentcmd]
640                 if {$indent > 0} {
641                     append ret [format %${indent}s { }]
642                 }
643                 append ret \}\n
644             }
645     }
646     return $ret
647 }
648 
649 # Similar to above, but work from state variable with container pointers
650 
651 proc pretty_print:idx {{container parse_state(container0)} {indent {}} {elemcmd parse:elem} {txtcmd parse:text} {picmd parse:pi} {commentcmd parse:comment}} {
652     upvar #0 $container xml
653 
654     if {![info exists xml]} {
655         if {[string compare {} $indent]} {
656             append ret [format %${indent}s { }]
657         }
658         return {}
659     }
660 
661     set ret {}
662     foreach {type arg1 arg2 arg3} $xml {
663         switch $type \
664             $picmd - \
665             $commentcmd - \
666             $txtcmd {
667                 if {[string compare {} $indent]} {
668                     append ret [format %${indent}s { }]
669                 }
670                 append ret [list $arg1]\n
671             } \
672             $elemcmd {
673                 if {[string compare {} $indent]} {
674                     append ret [format %${indent}s { }]
675                 } else {
676                     append indent 0
677                 }
678                 append ret "[list $arg1] [list $arg2] $arg3 \{\n"
679                 append ret [pretty_print:idx $arg3 [expr $indent + 4] $elemcmd $txtcmd $picmd $commentcmd]
680                 if {$indent > 0} {
681                     append ret [format %${indent}s { }]
682                 }
683                 append ret \}\n
684             }
685     }
686     return $ret
687 }
688 
689 # Debugging.  See Utilities package for how to enable.
690 
691 proc dbgputs args {}
692 
693 # Do-nothing proc
694 
695 proc noop args {}
696 
697 ### Following procedures are based on html_library
698 
699 # Convert multiple white space into a single space
700 
701 proc zap_white data {
702     regsub -all "\[ \t\r\n\]+" $data { } data
703     return $data
704 }
705 
706 # find XML entity references (syntax: &xxx;)
707 
708 proc entity [list text [list entities [namespace current]::entity_predef]] {
709     if {![regexp & $text]} {return $text}
710     regsub -all {([][$\\])} $text {\\\1} new
711     regsub -all {&#(x?)([0-9]+);} \
712             $new {[format %c [scan \2 %[expr {\1 == {} ? d : x}] tmp; set tmp]]} new
713     regsub -all {&([a-zA-Z]+);} $new [format {[entity:deref %s \1]} $entities] new
714     return [subst $new]
715 }
716 
717 # convert an XML escape sequence into character
718 
719 proc entity:deref [list text [list entities [namespace current]::entity_predef] {unknown ?}] {
720     upvar #0 $entities map
721     set result $unknown
722     catch {set result $map($text)}
723     return $result
724 }
725 
726 # table of predefined entities
727 
728 array set entity_predef {
729    lt <   gt >   amp &   quot \"   apos '
730 }
731 
732 ####################################
733 #
734 # DTD parser for XML
735 #
736 # A DTD is represented as a three element Tcl list.
737 # The first element contains the content models for elements,
738 # the second contains the attribute lists for elements and
739 # the last element contains the entities for the document.
740 #
741 ####################################
742 
743 proc parse:DTD {dtd args} {
744     array set opts {
745         -errorcommand           noop
746     }
747     array set opts $args
748 
749     set w " \t\r\n"     ;# white space
750     set exp <!([cl ^$w>]+)[cl $w]*([cl ^$w]+)[cl $w]*([cl ^>]*)>
751     set sub {{\1} {\2} {\3}}
752     regsub -all $exp $dtd $sub dtd
753 
754     foreach {decl id value} $dtd {
755         catch {parse:dtd:[string toupper $decl] $id $value} err
756     }
757 
758     return [list [array get contentmodel] [array get attributes] [array get entities]]
759 }
760 
761 # Procedures for handling the various declarative elements in a DTD
762 # New elements may be added by creating a procedure of the form
763 # parse:dtd:_element_
764 
765 # For each of these procedures, the various regular expressions they use
766 # are created outside of the proc to avoid overhead at runtime
767 
768 proc parse:dtd:element {id value} {
769     dbgputs XML_DTD_parse [list parse:dtd:element $id $value]
770     upvar opts state
771     upvar contentmodel cm
772     upvar otherModels om
773 
774     if {[info exists cm($id)] || [info exists im($id)] || [info exists om($id)]} {
775         eval $state(-errorcommand) element [list "element \"$id\" already declared"]
776     } else {
777         switch -- $value {
778             EMPTY {
779                 set om($id) {}
780             }
781             ANY {
782                 set cm($id) *
783             }
784             default {
785                 # Translate the content model into Tcl list format
786                 regsub -all {\(} $value " \{ " value
787                 regsub -all {\)} $value " \} " value
788                 regsub -all {([*,+|?])} $value { \1 } value
789                 if {[catch {parse:dtd:element:cmodel $value 2} result]} {
790                     eval $state(-errorcommand) element [list $result]
791                 } else {
792                     set cm($id) $result
793                 }
794             }
795         }
796     }
797 }
798 
799 # parse:dtd:element:cmodel --
800 #
801 # Parse an element content model.
802 # Content models are sequences of choices, possibly nested.
803 # If the model starts with a choice, then it is a single sequence
804 # at the top-level.
805 #
806 # This is going to need alot of work!
807 #
808 # Return Result:
809 #       A Tcl list representing the content model:
810 #       {elements1} rep {elements2} rep ...
811 #       Eg: (head, (p|list|note)*, div*) becomes:
812 #       head {} {p list note} * div *
813 
814 proc parse:dtd:element:cmodel {value depth {nested 0}} {
815     upvar $depth entities ents
816     dbgputs XML_DTD_parse [list parse:dtd:element:cmodel $value $depth]
817     set model {}
818     set result {}
819     set rep 0
820 
821     foreach part $value {
822         if {[llength $part] > 1} {
823             set result [parse:dtd:element:cmodel $part [expr $depth + 1] 1]
824             if {[llength $result] == 1} {
825                 lappend model [lindex $result 0]
826             } else {
827                 dbgputs XML_DTD_parse [list appending $result]
828                 eval lappend model $result
829             }
830             set result {}
831             set rep 2   ;# 2 indicates that next rep should replace rep returned
832         } else {
833             switch -regexp -- $part {
834                 %[^;]*; {
835                     # Substitute parameter entity
836                     regexp {%([^;]*);} $part x pentity
837                     if {[catch {lappend result $ents($pentity)}]} {
838                         error "unknown parameter entity \"$pentity\""
839                     }
840                 }
841                 , {
842                     if {[llength $result]} {
843                         lappend model $result
844                     }
845                     set result {}
846                     if {!$rep} {
847                         lappend model {}
848                     }
849                     set rep 0
850                 }
851                 \\| {
852                 }
853                 \\+ -
854                 \\* -
855                 \\? {
856                     if {[llength $result]} {
857                         lappend model $result
858                         set result {}
859                     }
860                     if {$rep == 2} {
861                         set model [lreplace $model end end $part]
862                     } else {
863                         lappend model $part
864                     }
865                     set rep 1
866                 }
867                 default {
868                     lappend result [lindex $part 0]
869                 }
870             }
871         }
872     }
873 
874     if {[llength $result]} {
875         lappend model $result
876     }
877     if {!$rep} {
878         lappend model {}
879     }
880 
881     return $model
882 }
883 
884 # Watch out for case-sensitivity
885 
886 set attlist_exp [cl $w]*([cl ^$w]+)[cl $w]*([cl ^$w]+)[cl $w]*(#REQUIRED|#IMPLIED)
887 set attlist_enum_exp [cl $w]*([cl ^$w]+)[cl $w]*\\(([cl ^)]*)\\)[cl $w]*("([cl ^")])")? ;# "
888 set attlist_fixed_exp [cl $w]*([cl ^$w]+)[cl $w]*([cl ^$w]+)[cl $w]*(#FIXED)[cl $w]*([cl ^$w]+)
889 
890 proc parse:dtd:attlist {id value} {
891     variable attlist_exp attlist_enum_exp attlist_fixed_exp
892     dbgputs XML_DTD_parse [list parse:dtd:attlist $id $value]
893     upvar opts state
894     upvar attributes am
895 
896     if {[info exists am($id)]} {
897         eval $state(-errorcommand) attlist [list "attribute list for element \"$id\" already declared"]
898     } else {
899         # Parse the attribute list.  If it were regular, could just use foreach,
900         # but some attributes may have values.
901         regsub -all {([][$\\])} $value {\\\1} value
902         regsub -all $attlist_exp $value {[parse:dtd:attlist:att {\1} {\2} {\3}]} value
903         regsub -all $attlist_enum_exp $value {[parse:dtd:attlist:att {\1} {\2} {\3}]} value
904         regsub -all $attlist_fixed_exp $value {[parse:dtd:attlist:att {\1} {\2} {\3} {\4}]} value
905         subst $value
906         set am($id) [array get attlist]
907     }
908 }
909 
910 proc parse:dtd:attlist:att {name type default {value {}}} {
911     upvar attlist al
912     # This needs further work
913     set al($name) [list $default $value]
914 }
915 
916 set param_entity_exp [cl $w]*([cl ^$w]+)[cl $w]*([cl ^"$w]*)[cl $w]*"([cl ^"]*)"
917 
918 proc parse:dtd:entity {id value} {
919     variable param_entity_exp
920     dbgputs XML_DTD_parse [list parse:dtd:entity $id $value]
921     upvar opts state
922     upvar entities ents
923 
924     if {[string compare % $id]} {
925         # Entity declaration
926         if {[info exists ents($id)]} {
927             eval $state(-errorcommand) entity [list "entity \"$id\" already declared"]
928         } else {
929             if {![regexp {"([^"]*)"} $value x entvalue] && ![regexp {'([^']*)'} $value x entvalue]} {
930                 eval $state(-errorcommand) entityvalue [list "entity value \"$value\" not correctly specified"]
931             } ;# "
932             set ents($id) $entvalue
933         }
934     } else {
935         # Parameter entity declaration
936         switch -glob [regexp $param_entity_exp $value x name scheme data],[string compare {} $scheme] {
937             0,* {
938                 eval $state(-errorcommand) entityvalue [list "parameter entity \"$value\" not correctly specified"]
939             }
940             *,0 {
941                 # SYSTEM or PUBLIC declaration
942             }
943             default {
944                 set ents($id) $data
945             }
946         }
947     }
948 }
949 
950 set notation_exp [cl $w]*([cl ^$w]+)[cl $w]*(.*)
951 
952 proc parse:dtd:notation {id value} {
953     variable notation_exp
954     upvar opts state
955 
956     if {[regexp $notation_exp $value x scheme data] == 2} {
957     } else {
958         eval $state(-errorcommand) notationvalue [list "notation value \"$value\" incorrectly specified"]
959     }
960 }
961 
962 # End XML namespace
963 }
964 
965 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.