#!/usr/bin/env tclsh # wow... the RFC makes a resolution that breaks my x-file scheme. # oh well, there's nothing to say i can't make my own scheme rules... # it seems permitted. It's just a nuisance. # my x-file scheme will resolve according to the RFC, except that: # - step 6c is replaced with: # All occurrences of "/.", where "." is a complete path segment, # are removed from the buffer string. # - step 6d is removed # - step 6e is replaced with: # All occurrences of "//..", where ".." is a complete # path segment and is a # complete path segment not equal to "..", are removed from the # buffer string. Removal of these path segments is performed # iteratively, removing the leftmost matching pattern on each # iteration, until no matching pattern remains. # - step 6f is removed. # this gives effectively the same resolution, except that the # trailing / is preserved. It may even reduce the URL to # an empty path. # I want to preserve the trailing slash because I want two # behaviours on directories: # x-file:///a/dir/name must generate an index for the directory. # References to directories must not end in /. # x-file:///a/dir/name/ must look for a readable index file, # generating an index if none is found. In the latter, references # to directories must end in /. # Thus, without the trailing /, we always get directory listings; an index # document cannot "hide" the directory as it can in KFM at present. With # the trailing /, we get a view of the site as if from the server as long # as the internal directory references are properly constructed. (In other # words, it will draw attention to the missing /s that cause redirections.) # resolve the chain of URIs and return the result namespace eval DRH-HTML { proc resolve args { set len [llength $args] # find the ultimate absolute URI for {set n $len} {[incr n -1]>=0} {} { if {![catch {scheme-of [lindex $args $n]} scheme]} { break } } if {$n<0} { error "no absolute URI in series" } set url [lindex $args $n] if {$n==$len-1} { return $url } if {3!=[llength [info commands s-${scheme}::*]]} { # fixme; if the namespace exists set scheme generic } s-${scheme}::split $url data #puts "$url\t->\t[array get data]" while {[incr n]<[llength $args]} { s-${scheme}::resolve data [lindex $args $n] } s-${scheme}::join data } # return the scheme of uri; error if relative # RFC-2396 ss 3.1 proc scheme-of {uri} { if {[regexp {^([a-z][a-z0-9+.-]*):} [string tolower $uri] {} x]} { return $x } error "\"$uri\" is relative" } # # # namespace eval s-generic { namespace export split resolve join canonic variable defaultport array set defaultport {http 80 ftp 21 gopher ? telnet ? nntp 119} # # generic handling of RFC-2396 # # return the URL in as canonic a form as we can manage proc canonic {url} { split $url data #fixme join data } # split the URL into a data array; # scheme (downcased), # authority (optional), # path (split), and # query (optional). # The RFC specifically distinguishes null and undefined parts # and warns implementations to do the same. Hence the optionals. proc split {url dv} { upvar 1 $dv data catch {unset data} if {![regexp -nocase {^([^:/?#]+):(//[^/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?} $url {} scheme auth path query frag]} { error "\"$url\" will not split generically" } set data(scheme) [string tolower $scheme] if {[string length $auth]} { set data(authority) [string range $auth 2 end] } set data(path) [::split $path /] if {[string length $query]} { set data(query) [string range $query 1 end] } if {[string length $frag]} { set data(fragment) [string range $frag 1 end] } } # resolve the URI WRT the URL described in the array, back into the array # ss 5.2 proc resolve {dv uri} { upvar 1 $dv data if {![regexp -nocase {^(//[^/?#]*)?([^?#]*)(\?[^#]*)?(#.*)?} $uri {} auth path query frag]} { error "\"$uri\" did not split" } set path [::split $path /] set long [llength $path] if {[string length $auth]} { # rule 4 # network-path set data(authority) [string range $auth 2 end] set data(path) $path } elseif {$long} { if {[string length [lindex $path 0]]} { # rule 5 # relative path # (cut the leading blank) set lpath $data(path) set lpath [lrange $lpath 1 [expr {[llength $lpath]-2}]] eval lappend lpath $path #puts $lpath # (restore the leading blank) set path {{}} set n 0 ;# number of path components to keep set p 0 ;# number of excess parents foreach i $lpath { set flag 1 ;# need to tack on a blank if {{.}=="$i"} { } elseif {{..}!="$i"} { set flag 0 lappend path $i incr n } elseif {$n==$p} { lappend path .. incr n incr p } else { incr n -1 set path [lrange $path 0 $n] } #puts $path } if {$flag} { lappend path {} } } set data(path) $path } if {[string length $query]} { set data(query) [string range $query 1 end] if {!$long && [string length [lindex $data(path) end]]} { set data(path) [lreplace $data(path) end end {}] } } else { catch {unset data(query)} } if {[string length $frag]} { set data(fragment) [string range $frag 1 end] } else { catch {unset data(fragment)} } } # rejoin the data into a URL # ss 5.2 rule 7 proc join {dv} { upvar 1 $dv data set url $data(scheme): if {[info exists data(authority)]} { append url // $data(authority) } append url [::join $data(path) /] if {[info exists data(query)]} { append url ? $data(query) } if {[info exists data(fragment)]} { append url # $data(fragment) } set url }} # # tests from Appendix C # i include fragments here; it happens to work anyway # if 0 { # personally, i believe ?y should resolve to http://a/b/c/d;p?y but this is # the rule. it's the reason for that silly special case flagged with "long". # IMO they dropped the ball on URIs; considering they're at the core of the # WWW and their relative simplicity, this should have been one of the first # things rigourously standardised, not one of the last. set base {http://a/b/c/d;p?q} foreach {i j} { g:h g:h g http://a/b/c/g ./g http://a/b/c/g g/ http://a/b/c/g/ /g http://a/g //g http://g ?y http://a/b/c/?y g?y http://a/b/c/g?y g#s http://a/b/c/g#s g?y#s http://a/b/c/g?y#s ;x http://a/b/c/;x g;x http://a/b/c/g;x g;x?y#s http://a/b/c/g;x?y#s . http://a/b/c/ ./ http://a/b/c/ .. http://a/b/ ../ http://a/b/ ../g http://a/b/g ../.. http://a/ ../../ http://a/ ../../g http://a/g ../../../g http://a/../g ../../../../g http://a/../../g /./g http://a/./g /../g http://a/../g g. http://a/b/c/g. .g http://a/b/c/.g g.. http://a/b/c/g.. ..g http://a/b/c/..g ./../g http://a/b/g ./g/. http://a/b/c/g/ g/./h http://a/b/c/g/h g/../h http://a/b/c/h g;x=1/./y http://a/b/c/g;x=1/y g;x=1/../y http://a/b/c/y } { set x [resolve $base $i] if {[string compare $x $j]} { puts "$i to $x should be $j" } }}} # # eof #