diff --git a/src/bootsupport/modules/include_modules.config b/src/bootsupport/modules/include_modules.config index fc436d8c..3993e0c9 100644 --- a/src/bootsupport/modules/include_modules.config +++ b/src/bootsupport/modules/include_modules.config @@ -1,67 +1,67 @@ - -#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project -#They must be already built, so generally shouldn't come directly from src/modules. - -#each entry - base module -set bootsupport_modules [list\ - src/vendormodules cksum\ - src/vendormodules modpod\ - src/vendormodules overtype\ - src/vendormodules oolib\ - src/vendormodules http\ - src/vendormodules dictutils\ - src/vendormodules fileutil\ - src/vendormodules textutil::adjust\ - src/vendormodules textutil::repeat\ - src/vendormodules textutil::split\ - src/vendormodules textutil::string\ - src/vendormodules textutil::tabify\ - src/vendormodules textutil::trim\ - src/vendormodules textutil::wcswidth\ - src/vendormodules uuid\ - src/vendormodules md5\ - src/vendormodules sha1\ - src/vendormodules tomlish\ - src/vendormodules test::tomlish\ - modules punkcheck\ - modules natsort\ - modules punk::ansi\ - modules punk::assertion\ - modules punk::args\ - modules punk::cap\ - modules punk::cap::handlers::caphandler\ - modules punk::cap::handlers::scriptlibs\ - modules punk::cap::handlers::templates\ - modules punk::char\ - modules punk::console\ - modules punk::du\ - modules punk::encmime\ - modules punk::fileline\ - modules punk::docgen\ - modules punk::lib\ - modules punk::mix\ - modules punk::mix::base\ - modules punk::mix::cli\ - modules punk::mix::util\ - modules punk::mix::templates\ - modules punk::mix::commandset::buildsuite\ - modules punk::mix::commandset::debug\ - modules punk::mix::commandset::doc\ - modules punk::mix::commandset::layout\ - modules punk::mix::commandset::loadedlib\ - modules punk::mix::commandset::module\ - modules punk::mix::commandset::project\ - modules punk::mix::commandset::repo\ - modules punk::mix::commandset::scriptwrap\ - modules punk::ns\ - modules punk::overlay\ - modules punk::path\ - modules punk::repo\ - modules punk::tdl\ - modules punk::zip\ - modules punk::winpath\ - modules textblock\ - modules natsort\ - modules oolib\ -] - + +#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project +#They must be already built, so generally shouldn't come directly from src/modules. + +#each entry - base module +set bootsupport_modules [list\ + src/vendormodules cksum\ + src/vendormodules modpod\ + src/vendormodules overtype\ + src/vendormodules oolib\ + src/vendormodules http\ + src/vendormodules dictutils\ + src/vendormodules fileutil\ + src/vendormodules textutil::adjust\ + src/vendormodules textutil::repeat\ + src/vendormodules textutil::split\ + src/vendormodules textutil::string\ + src/vendormodules textutil::tabify\ + src/vendormodules textutil::trim\ + src/vendormodules textutil::wcswidth\ + src/vendormodules uuid\ + src/vendormodules md5\ + src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ + modules punkcheck\ + modules natsort\ + modules punk::ansi\ + modules punk::assertion\ + modules punk::args\ + modules punk::cap\ + modules punk::cap::handlers::caphandler\ + modules punk::cap::handlers::scriptlibs\ + modules punk::cap::handlers::templates\ + modules punk::char\ + modules punk::console\ + modules punk::du\ + modules punk::encmime\ + modules punk::fileline\ + modules punk::docgen\ + modules punk::lib\ + modules punk::mix\ + modules punk::mix::base\ + modules punk::mix::cli\ + modules punk::mix::util\ + modules punk::mix::templates\ + modules punk::mix::commandset::buildsuite\ + modules punk::mix::commandset::debug\ + modules punk::mix::commandset::doc\ + modules punk::mix::commandset::layout\ + modules punk::mix::commandset::loadedlib\ + modules punk::mix::commandset::module\ + modules punk::mix::commandset::project\ + modules punk::mix::commandset::repo\ + modules punk::mix::commandset::scriptwrap\ + modules punk::ns\ + modules punk::overlay\ + modules punk::path\ + modules punk::repo\ + modules punk::tdl\ + modules punk::zip\ + modules punk::winpath\ + modules textblock\ + modules natsort\ + modules oolib\ +] + diff --git a/src/bootsupport/modules/oolib-0.1.1.tm b/src/bootsupport/modules/oolib-0.1.1.tm deleted file mode 100644 index ecf2cca9..00000000 --- a/src/bootsupport/modules/oolib-0.1.1.tm +++ /dev/null @@ -1,200 +0,0 @@ -#JMN - api should be kept in sync with package patternlib where possible -# -package provide oolib [namespace eval oolib { - variable version - set version 0.1.1 -}] - -namespace eval oolib { - oo::class create collection { - variable o_data ;#dict - variable o_alias - constructor {} { - set o_data [dict create] - } - method info {} { - return [dict info $o_data] - } - method count {} { - return [dict size $o_data] - } - method isEmpty {} { - expr {[dict size $o_data] == 0} - } - method names {{globOrIdx {}}} { - if {[llength $globOrIdx]} { - if {[string is integer -strict $globOrIdx]} { - set idx $globOrIdx - if {$idx < 0} { - set idx "end-[expr {abs($idx + 1)}]" - } - if {[catch {lindex [dict keys $o_data] $idx} result]} { - error "[self object] no such index : '$idx'" - } else { - return $result - } - } else { - #glob - return [lsearch -glob -all -inline [dict keys $o_data] $globOrIdx] - } - } else { - return [dict keys $o_data] - } - } - #like names but without globbing - method keys {} { - dict keys $o_data - } - method key {{posn 0}} { - if {$posn < 0} { - set posn "end-[expr {abs($posn + 1)}]" - } - if {[catch {lindex [dict keys $o_data] $posn} result]} { - error "[self object] no such index : '$posn'" - } else { - return $result - } - } - method hasKey {key} { - dict exists $o_data $key - } - method get {} { - return $o_data - } - method items {} { - return [dict values $o_data] - } - method item {key} { - if {[string is integer -strict $key]} { - if {$key >= 0} { - set valposn [expr {(2*$key) +1}] - return [lindex $o_data $valposn] - } else { - set key "end-[expr {abs($key + 1)}]" - return [lindex $o_data $key] - #return [lindex [dict keys $o_data] $key] - } - } - if {[dict exists $o_data $key]} { - return [dict get $o_data $key] - } - } - #inverse lookup - method itemKeys {value} { - set value_indices [lsearch -all [dict values $o_data] $value] - set keylist [list] - foreach i $value_indices { - set idx [expr {(($i + 1) *2) -2}] - lappend keylist [lindex $o_data $idx] - } - return $keylist - } - method search {value args} { - set matches [lsearch {*}$args [dict values $o_data] $value] - if {"-inline" in $args} { - return $matches - } else { - set keylist [list] - foreach i $matches { - set idx [expr {(($i + 1) *2) -2}] - lappend keylist [lindex $o_data $idx] - } - return $keylist - } - } - #review - see patternlib. Is the intention for aliases to be configurable independent of whether the target exists? - method alias {newAlias existingKeyOrAlias} { - if {[string is integer -strict $newAlias]} { - error "[self object] collection key alias cannot be integer" - } - if {[string length $existingKeyOrAlias]} { - set o_alias($newAlias) $existingKeyOrAlias - } else { - unset o_alias($newAlias) - } - } - method aliases {{key ""}} { - if {[string length $key]} { - set result [list] - foreach {n v} [array get o_alias] { - if {$v eq $key} { - lappend result $n $v - } - } - return $result - } else { - return [array get o_alias] - } - } - #if the supplied index is an alias, return the underlying key; else return the index supplied. - method realKey {idx} { - if {[catch {set o_alias($idx)} key]} { - return $idx - } else { - return $key - } - } - method add {value key} { - if {[string is integer -strict $key]} { - error "[self object] collection key must not be an integer. Use another structure if integer keys required" - } - if {[dict exists $o_data $key]} { - error "[self object] col_processors object error: key '$key' already exists in collection" - } - dict set o_data $key $value - return [expr {[dict size $o_data] - 1}] ;#return index of item - } - method remove {idx {endRange ""}} { - if {[string length $endRange]} { - error "[self object] collection error: ranged removal not yet implemented.. remove one item at a time" - } - if {[string is integer -strict $idx]} { - if {$idx < 0} { - set idx "end-[expr {abs($idx+1)}]" - } - set key [lindex [dict keys $o_data] $idx] - set posn $idx - } else { - set key $idx - set posn [lsearch -exact [dict keys $o_data] $key] - if {$posn < 0} { - error "[self object] no such index: '$idx' in this collection" - } - } - dict unset o_data $key - return - } - method clear {} { - set o_data [dict create] - return - } - method reverse_the_collection {} { - #named slightly obtusely because reversing the data when there may be references held is a potential source of bugs - #the name reverse_the_collection should make it clear that the object is being modified in place as opposed to simply 'reverse' which may imply a view/copy. - #todo - consider implementing a get_reverse which provides an interface to the same collection without affecting original references, yet both allowing delete/edit operations. - set dictnew [dict create] - foreach k [lreverse [dict keys $o_data]] { - dict set dictnew $k [dict get $o_data $k] - } - set o_data $dictnew - return - } - #review - cmd as list vs cmd as script? - method map {cmd} { - set seed [list] - dict for {k v} $o_data { - lappend seed [uplevel #0 [list {*}$cmd $v]] - } - return $seed - } - method objectmap {cmd} { - set seed [list] - dict for {k v} $o_data { - lappend seed [uplevel #0 [list $v {*}$cmd]] - } - return $seed - } - } - -} - diff --git a/src/bootsupport/modules/overtype-1.6.5.tm b/src/bootsupport/modules/overtype-1.6.5.tm index 492341d6..3c200d26 100644 --- a/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/bootsupport/modules/overtype-1.6.5.tm @@ -439,7 +439,8 @@ tcl::namespace::eval overtype { if {[llength $lflines]} { lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] } - set inputchunks $lflines[unset lflines] + #set inputchunks $lflines[unset lflines] + set inputchunks [lindex [list $lflines [unset lflines]] 0] } } @@ -2115,6 +2116,7 @@ tcl::namespace::eval overtype { if {[llength $undercols]< $opt_width} { set diff [expr {$opt_width- [llength $undercols]}] if {$diff > 0} { + #set undercols [list {*}$undercols {*}[lrepeat $diff "\u0000"]] ;#2024 - much slower lappend undercols {*}[lrepeat $diff "\u0000"] lappend understacks {*}[lrepeat $diff $cs] lappend understacks_gx {*}[lrepeat $diff $gs] @@ -3889,7 +3891,19 @@ tcl::namespace::eval overtype { #OSC 4 - set colour palette #can take multiple params #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ - set params [tcl::string::range $code_content 1 end] + set params [tcl::string::range $code_content 2 end] ;#strip 4 and first semicolon + set cmap [dict create] + foreach {cnum spec} [split $params {;}] { + if {$cnum >= 0 and $cnum <= 255} { + #todo - parse spec from names like 'red' to RGB + #todo - accept rgb:ab/cd/ef as well as rgb:/a/b/c (as alias for aa/bb/cc) + #also - what about rgb:abcd/defg/hijk and 12-bit abc/def/ghi ? + dict set cmap $cnum $spec + } else { + #todo - log + puts stderr "overtype::renderline OSC 4 set colour palette - bad color number: $cnum must be from 0 to 255. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" diff --git a/src/bootsupport/modules/punk/console-0.1.1.tm b/src/bootsupport/modules/punk/console-0.1.1.tm index e367ce9e..887888e8 100644 --- a/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/bootsupport/modules/punk/console-0.1.1.tm @@ -183,7 +183,9 @@ namespace eval punk::console { variable previous_stty_state_$channel set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] eq ""} { - set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + if {[catch {{*}$sttycmd -g <@$channel} previous_stty_state_$channel]} { + set previous_stty_state_$channel "" + } } exec {*}$sttycmd raw -echo <@$channel @@ -253,13 +255,21 @@ namespace eval punk::console { return "line" } } elseif {$raw_or_line eq "raw"} { - punk::console::enableRaw + if {[catch { + punk::console::enableRaw + } errM]} { + puts stderr "Warning punk::console::enableRaw failed - $errM" + } if {[can_ansi]} { punk::console::enableVirtualTerminal both } } elseif {$raw_or_line eq "line"} { #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) - punk::console::disableRaw + if {[catch { + punk::console::disableRaw + } errM]} { + puts stderr "Warning punk::console::disableRaw failed - $errM" + } if {[can_ansi]} { punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc punk::console::enableVirtualTerminal output ;#display/use ansi codes @@ -290,12 +300,15 @@ namespace eval punk::console { set loadstate [zzzload::pkg_require twapi] #loadstate could also be stuck on loading? - review - zzzload not very ripe - #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. - + #Twapi can be relatively slow to load (on some systems) - can be 1s plus in some cases - and much longer if there are disk performance issues. if {$loadstate ni [list failed]} { + #possibly still 'loading' #review zzzload usage #puts stdout "=========== console loading twapi =============" - zzzload::pkg_wait twapi + set loadstate [zzzload::pkg_wait twapi] ;#can return 'failed' will return version if already loaded or loaded during wait + } + + if {$loadstate ni [list failed]} { package require twapi ;#should be fast once twapi dll loaded in zzzload thread set ::punk::console::has_twapi 1 @@ -523,6 +536,9 @@ namespace eval punk::console { set is_raw 0 return [list stdin [list from $oldmode to $newmode]] } elseif {[set sttycmd [auto_execok stty]] ne ""} { + #stty can return info on windows - but doesn't seem to be able to set anything. + #review - is returned info even valid? + set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] ne ""} { exec {*}$sttycmd [set previous_stty_state_$channel] diff --git a/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/bootsupport/modules/punk/lib-0.1.1.tm index 63f32dee..872e4807 100644 --- a/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -339,6 +339,92 @@ namespace eval punk::lib { set has_twapi [expr {![catch {package require twapi}]}] } + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + proc lswap {lvar a z} { + upvar $lvar l + if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { + #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + set a_index [lindex_resolve $l $a] + set a_msg "" + switch -- $a_index { + -2 { + "$a is greater th + } + -3 { + } + } + error "lswap cannot indices $a and $z $a is out of range" + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} #*** !doctools #[subsection {Namespace punk::lib}] @@ -347,6 +433,7 @@ namespace eval punk::lib { if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to} { lseq $from $to @@ -1009,24 +1096,28 @@ namespace eval punk::lib { } set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds - if {${lower_resolve} == -1} { + if {${lower_resolve} == -2} { + ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max - } elseif {$lower_resolve == -2} { + } elseif {$lower_resolve == -3} { + ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve $dval $b] - if {$upper == -2} { + if {$upper == -3} { + ##x #upper bound is below list range - - if {$lower_resolve >=-1} { + if {$lower_resolve >=-2} { + ##x set upper 0 } else { continue } - } elseif {$upper == -1} { + } elseif {$upper == -2} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists @@ -1669,7 +1760,8 @@ namespace eval punk::lib { error "bad expression '$expression': must be integer?\[+-\]integer?" } } - + + # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bunds on upper vs lower side proc lindex_resolve {list index} { #*** !doctools #[call [fun lindex_resolve] [arg list] [arg index]] @@ -1679,11 +1771,13 @@ namespace eval punk::lib { #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: - #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) - #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] a) -3 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -2 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr #if {![llength $list]} { @@ -1694,9 +1788,9 @@ namespace eval punk::lib { if {[string is integer -strict $index]} { #can match +i -i if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } else { #integer may still have + sign - normalize with expr return [expr {$index}] @@ -1708,14 +1802,14 @@ namespace eval punk::lib { set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { - return -1 + return -2 } } else { - #end + #index is 'end' set index [expr {[llength $list]-1}] if {$index < 0} { - #special case - end with empty list - treat end like a positive number out of bounds - return -1 + #special case - 'end' with empty list - treat end like a positive number out of bounds + return -2 } else { return $index } @@ -1723,7 +1817,7 @@ namespace eval punk::lib { if {$offset == 0} { set index [expr {[llength $list]-1}] if {$index < 0} { - return -1 ;#special case + return -2 ;#special case as above } else { return $index } @@ -1732,7 +1826,7 @@ namespace eval punk::lib { set index [expr {([llength $list]-1) - $offset}] } if {$index < 0} { - return -2 + return -3 } else { return $index } @@ -1753,26 +1847,50 @@ namespace eval punk::lib { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } return $index } } } - proc lindex_resolve2 {list index} { - #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + proc lindex_resolve_basic {list index} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg list] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -1 for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ + # - which #for {set i 0} {$i < [llength $list]} {incr i} { # lappend indices $i #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < 0 || ($index >= [llength $list])} { + #even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } if {[llength $list]} { set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + #if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) } else { set indices [list] } set idx [lindex $indices $index] if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end return -1 } else { return $idx @@ -2334,13 +2452,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var - proc swapnumvars {namea nameb} { - upvar $namea a $nameb b - set a [expr {$a ^ $b}] - set b [expr {$a ^ $b}] - set a [expr {$a ^ $b}] - } #e.g linesort -decreasing $data proc linesort {args} { @@ -2956,7 +3067,7 @@ namespace eval punk::lib { # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { - set number [punk::objclone $unformattednumber] + set number [objclone $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] diff --git a/src/bootsupport/modules/punk/mix/base-0.1.tm b/src/bootsupport/modules/punk/mix/base-0.1.tm index 932c1db6..806b172e 100644 --- a/src/bootsupport/modules/punk/mix/base-0.1.tm +++ b/src/bootsupport/modules/punk/mix/base-0.1.tm @@ -4,6 +4,7 @@ package provide punk::mix::base [namespace eval punk::mix::base { }] package require punk::path +package require punk::lib ;#format_number etc #base internal plumbing functions namespace eval punk::mix::base { @@ -657,16 +658,38 @@ namespace eval punk::mix::base { #temp emission to stdout.. todo - repl telemetry channel puts stdout "cksum_path: creating temporary tar archive for $path" - puts stdout " at: $archivename .." - tar::create $archivename $target + puts -nonewline stdout " at: $archivename ..." + set tsstart [clock millis] + if {[set tarpath [auto_execok tar]] ne ""} { + #using an external binary is *significantly* faster than tar::create - but comes with some risks + #review - need to check behaviour/flag variances across platforms + #don't use -z flag. On at least some tar versions the zipped file will contain a timestamped subfolder of filename.tar - which ruins the checksum + #also - tar is generally faster without the compression (although this may vary depending on file size and disk speed?) + exec {*}$tarpath -cf $archivename $target ;#{*} needed in case spaces in tarpath + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar -cf done ($ms ms)" + } else { + set tsstart [clock millis] ;#don't include auto_exec search time for tar::create + tar::create $archivename $target + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar::create done ($ms ms)" + puts stdout " NOTE: install tar executable for potentially *much* faster directory checksum processing" + } + if {$ftype eq "file"} { - set sizeinfo "(size [file size $target])" + set sizeinfo "(size [punk::lib::format_number [file size $target]] bytes)" } else { - set sizeinfo "(file type $ftype - size unknown)" + set sizeinfo "(file type $ftype - tarred size [punk::lib::format_number [file size $archivename]] bytes)" } - puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set tsstart [clock millis] + puts -nonewline stdout "cksum_path: calculating cksum for $target $sizeinfo ... " set cksum [{*}$cksum_command $archivename] - #puts stdout "cksum_path: cleaning up.. " + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " cksum done ($ms ms)" + puts stdout " cksum: $cksum" file delete -force $archivename cd $startdir diff --git a/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm b/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm index 9afc685c..80cab2a7 100644 --- a/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm +++ b/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm @@ -157,6 +157,9 @@ namespace eval punk::mix::commandset::project { set opt_force [dict get $opts -force] set opt_confirm [string tolower [dict get $opts -confirm]] # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- set opt_modules [dict get $opts -modules] if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { #if not specified - add a single module matching project name @@ -169,9 +172,6 @@ namespace eval punk::mix::commandset::project { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_layout [dict get $opts -layout] - set opt_update [dict get $opts -update] - # -- --- --- --- --- --- --- --- --- --- --- --- --- #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache diff --git a/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/bootsupport/modules/punk/ns-0.1.0.tm index cf0bf70c..10250a9b 100644 --- a/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1707,6 +1707,7 @@ tcl::namespace::eval punk::ns { lassign [internal::get_run_opts {-vars -nowarnings} {} $args] _r runopts _c cmdargs set use_vars [expr {"-vars" in $runopts}] set no_warnings [expr {"-nowarnings" in $runopts}] + set ver "" #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns @@ -1717,15 +1718,68 @@ tcl::namespace::eval punk::ns { } default { if {[string match ::* $pkg_or_existing_ns]} { + set pkg_unqualified [string range $pkg_or_existing_ns 2 end] if {![tcl::namespace::exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + set ver [package require $pkg_unqualified] } else { set ver "" } set ns $pkg_or_existing_ns } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns + set pkg_unqualified $pkg_or_existing_ns + set ver [package require $pkg_unqualified] + set ns ::$pkg_unqualified + } + #some packages don't create their namespace immediately and/or don't populate it with commands and instead put entries in ::auto_index + set previous_command_count 0 + if {[namespace exists $ns]} { + set previous_command_count [llength [info commands ${ns}::*]] + } + + + #also if a sub package was loaded first - then the namespace for the base or lower package may exist but have no commands + #for the purposes of pkguse - which most commonly interactive - we want the namespace populated + #It may still not be *fully* populated because we stop at first source that adds commands - REVIEW + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + + if {!$ns_populated} { + #we will catch-run an auto_index entry if any + #auto_index entry may or may not be prefixed with :: + set keys [list] + #first look for exact pkg_unqualified and ::pkg_unqualified + #leave these at beginning of keys list + if {[array exists ::auto_index($pkg_unqualified)]} { + lappend keys $pkg_unqualified + } + if {[array exists ::auto_index(::$pkg_unqualified)]} { + lappend keys ::$pkg_unqualified + } + #as auto_index is an array - we could get keys in arbitrary order + set matches [lsort [array names ::auto_index ${pkg_unqualified}::*]] + lappend keys {*}$matches + set matches [lsort [array names ::auto_index ::${pkg_unqualified}::*]] + lappend keys {*}$matches + set ns_populated 0 + set i 0 + set already_sourced [list] ;#often multiple triggers for the same source - don't waste time re-sourcing + set ns_depth [llength [punk::ns::nsparts [string trimleft $ns :]]] + while {!$ns_populated && $i < [llength $keys]} { + #todo - skip sourcing deeper entries from a subpkg which may have been loaded earlier than the base + #e.g if we are loading ::x::y + #only source for keys the same depth, or one deeper ie ::x::y, x::y, ::x::y::z not ::x or ::x::y::z::etc + set k [lindex $keys $i] + set k_depth [llength [punk::ns::nsparts [string trimleft $k :]]] + if {$k_depth == $ns_depth || $k_depth == $ns_depth + 1} { + set auto_source [set ::auto_index($k)] + if {$auto_source ni $already_sourced} { + uplevel 1 $auto_source + lappend already_sourced $auto_source + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + } + } + incr i + } + } } } @@ -1799,7 +1853,13 @@ tcl::namespace::eval punk::ns { return $out } } else { - error "Namespace $ns not found." + if {$ver eq ""} { + error "Namespace $ns not found. No package version found." + } else { + set out "(no package namespace found) remaining in [uplevel 1 {namespace current}]" + append out \n $ver + return $out + } } return $out } diff --git a/src/bootsupport/modules/punk/repo-0.1.1.tm b/src/bootsupport/modules/punk/repo-0.1.1.tm index 2cb5fd1d..e056b14a 100644 --- a/src/bootsupport/modules/punk/repo-0.1.1.tm +++ b/src/bootsupport/modules/punk/repo-0.1.1.tm @@ -468,7 +468,7 @@ namespace eval punk::repo { set path [string trim [string range $ln [string length "MISSING "] end]] dict set pathdict $path "missing" } - "EXTRA * " { + "EXTRA *" { #fossil will explicitly list files in a new folder - as opposed to git which shows just the folder set path [string trim [string range $ln [string length "EXTRA "] end]] dict set pathdict $path "extra" diff --git a/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/bootsupport/modules/test/tomlish-1.1.1.tm index 4ea2ce3d..8405fae7 100644 Binary files a/src/bootsupport/modules/test/tomlish-1.1.1.tm and b/src/bootsupport/modules/test/tomlish-1.1.1.tm differ diff --git a/src/bootsupport/modules/tomlish-1.1.1.tm b/src/bootsupport/modules/tomlish-1.1.1.tm index d85d4416..3e13e75d 100644 --- a/src/bootsupport/modules/tomlish-1.1.1.tm +++ b/src/bootsupport/modules/tomlish-1.1.1.tm @@ -19,12 +19,20 @@ #*** !doctools #[manpage_begin tomlish_module_tomlish 0 1.1.1] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] #[require tomlish] -#[keywords module] +#[keywords module parsing toml configuration] #[description] -#[para] - +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -71,17 +79,41 @@ package require logger namespace eval tomlish { namespace export {[a-z]*}; # Convention: export all lowercase variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + #ARRAY is analogous to a Tcl list #TABLE is analogous to a Tcl dict #WS = inline whitespace - #KEYVAL = bare key and value - #QKEYVAL = quoted key and value + #KEY = bare key and value + #QKEY = double quoted key and value + #SQKEY = single quoted key and value #ITABLE = inline table (*can* be anonymous table) # inline table values immediately create a table with the opening brace # inline tables are fully defined between their braces, as are dotted-key subtables defined within # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained - set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) set min_int -9223372036854775808 ;#-2^63 @@ -114,10 +146,13 @@ namespace eval tomlish { #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error ">>>_get_keyval_value doesn't seem to be a properly structured { = } list" + } foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -127,10 +162,10 @@ namespace eval tomlish { } } if {!$found_value} { - error "Failed to find value element in KEYVAL. '$keyval_element'" + error "Failed to find value element in KEY. '$keyval_element'" } if {$found_value > 1} { - error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + error "Found multiple value elements in KEY, expected exactly one. '$keyval_element'" } switch -exact -- $type { @@ -141,16 +176,28 @@ namespace eval tomlish { STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - LITSTRING { + LITERAL - LITERALPART { #REVIEW set result [list type $type value $value] } - TABLE - ITABLE - ARRAY - MULTISTRING { - #jmn2024 - added ITABLE - review + TABLE { + #invalid? + error "_get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::get_dict [list $found_sub]] + } + ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! set result [list type $type value [::tomlish::get_dict [list $found_sub]]] } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } default { error "Unexpected value type '$type' found in keyval '$keyval_element'" } @@ -158,6 +205,48 @@ namespace eval tomlish { return $result } + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "_get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } #get_dict is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. # get_dict is primarily for reading toml data. @@ -193,9 +282,12 @@ namespace eval tomlish { set tag [lindex $item 0] #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL { + KEY - QKEY - SQKEY { log::debug "--> processing $tag: $item" set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } #!todo - normalize key. (may be quoted/doublequoted) if {[dict exists $datastructure $key]} { @@ -206,6 +298,43 @@ namespace eval tomlish { set keyval_dict [_get_keyval_value $item] dict set datastructure $key $keyval_dict } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "get_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } TABLE { set tablename [lindex $item 1] set tablename [::tomlish::utils::tablename_trim $tablename] @@ -220,21 +349,20 @@ namespace eval tomlish { #toml spec rule - all segments mst be non-empty #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - set key_hierarchy [list] - set key_hierarchy_raw [list] + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] foreach rawseg $name_segments { set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes - set c1 [::string index $rawseg 0] - set c2 [::string index $rawseg end] + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes are processed within it. - set seg [::string range $rawseg 1 end-1] + set seg [tcl::string::range $rawseg 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. - set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] - #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] } else { set seg $rawseg } @@ -243,15 +371,16 @@ namespace eval tomlish { #if {$rawseg eq ""} { # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" #} - lappend key_hierarchy $seg - lappend key_hierarchy_raw $rawseg + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg - if {[dict exists $datastructure {*}$key_hierarchy]} { + if {[dict exists $datastructure {*}$table_key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, - # but not if it was defined as a keyval/qkeyval + # but not if it was defined as a key/qkey/skey ? - set testkey [join $key_hierarchy_raw .] - set testkey_length [llength $key_hierarchy_raw] + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { set found_testkey 1 @@ -267,11 +396,12 @@ namespace eval tomlish { # instead compare {a b.c d} with {a b c d} # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #dots within table segments might seem like an 'edge case' - # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" if {$testkey eq $seen_match} { set found_testkey 1 } @@ -279,35 +409,81 @@ namespace eval tomlish { } if {$found_testkey == 0} { - #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg } } } + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } #We must do this after the key-collision test above! lappend tablenames_seen $tablename - - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -320,16 +496,36 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { - set keyval_key [lindex $element 1] + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure $keyval_key $keyval_dict + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -350,12 +546,16 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ARRAY - MULTISTRING { + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } - WS - SEP { - #ignore whitespace and commas + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments } default { error "Unexpected value type '$type' found in array" @@ -363,6 +563,49 @@ namespace eval tomlish { } } } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } MULTISTRING { #triple dquoted string log::debug "--> processing multistring: $item" @@ -372,7 +615,14 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "doulbe quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } @@ -533,8 +783,8 @@ namespace eval tomlish::encode { proc float {f} { #convert any non-lower case variants of special values to lowercase for Toml - if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { - return [list FLOAT [string tolower $f]] + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] @@ -553,44 +803,56 @@ namespace eval tomlish::encode { proc boolean {b} { #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false - if {![string is boolean -strict $b]} { + if {![tcl::string::is boolean -strict $b]} { error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { if {[expr {$b && 1}]} { - return [list BOOL true] + return [::list BOOL true] } else { - return [list BOOL false] + return [::list BOOL false] } } } + + #TODO #Take tablename followed by - # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types proc table {name args} { set pairs [list] foreach t $args { - if {[llength $t] == 3} { - if {[lindex $t 0] ne "KEYVAL"} { - error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" } - lappend pairs $t + lappend pairs [list KEY $keystr = $valuepart] } elseif {[llength $t] == 2} { #!todo - type heuristics lassign $t n v - lappend pairs [list KEYVAL $n [list STRING $v]] + lappend pairs [list KEY $n = [list STRING $v]] } else { - error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" foreach part [lrange $item 1 end] { - append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$litstring''' + append toml '''$literal''' } INT - BOOL - @@ -777,6 +1079,7 @@ namespace eval tomlish::decode { # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. # For this reason, we also do absolutely no line-ending transformations based on platform. # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { #*** !doctools #[call [fun toml] [arg s]] @@ -835,11 +1138,12 @@ namespace eval tomlish::decode { set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. - set state "key-space" - ::tomlish::parse::spacestack push {space key-space} + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) set linenum 1 - + + set ::tomlish::parse::state_list [list] try { while {$r} { set r [::tomlish::parse::tok $s] @@ -851,31 +1155,162 @@ namespace eval tomlish::decode { #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" #puts "-->tok: $tok tokenType='$tokenType'" set prevstate $state - ##### - set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below - set state $nextstate - if {$state eq "err"} { - error "State error - aborting parse. [tomlish::parse::report_line]" + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) } - - if {$last_space_action eq "pop"} { + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } newline { incr linenum lappend v($nest) [list NEWLINE $tok] } tablename { #note: a tablename only 'pops' if we are greater than zero - error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" } tablearrayname { #!review - tablearrayname different to tablename regarding push/pop? #note: a tablename only 'pops' if we are greater than zero - error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" } endarray { #nothing to do here. @@ -885,29 +1320,74 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - puts stderr "endinlinetable" + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } endmultiquote { - puts stderr "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" } default { - error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } - set parentlevel [expr {$nest -1}] - lappend v($parentlevel) [set v($nest)] + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + incr nest -1 } elseif {$last_space_action eq "push"} { + set prevnest $nest incr nest 1 set v($nest) [list] # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } barekey { - set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" } quotedkey - itablequotedkey { - set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! @@ -922,7 +1402,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -940,21 +1420,31 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - puts stderr "push trigger tokenType startmultiquote (todo)" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE - #JMN ??? - #set next_tokenType_known 1 - #::tomlish::parse::set_tokenType "multistring" - #set tok "" + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL } default { - error "push trigger tokenType '$tokenType' not yet implemented" + error "---- push trigger tokenType '$tokenType' not yet implemented" } } } else { #no space level change switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } starttablename { #$tok is triggered by the opening bracket and sends nothing to output } @@ -962,40 +1452,69 @@ namespace eval tomlish::decode { #$tok is triggered by the double opening brackets and sends nothing to output } tablename - tablenamearray { - error "did not expect 'tablename/tablearrayname' without space level change" + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" #set v($nest) [list TABLE $tok] } endtablename - endtablearrayname { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startlinetable without space level change" + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } startquote { - switch -exact -- $nextstate { - string { + switch -exact -- $newstate { + string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quotedkey { + quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "quotedkey" set tok "" } - itablequotedkey { + itable-quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablequotedkey" set tok "" } default { - error "startquote switch case not implemented for nextstate: $nextstate" + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startmultiquote { #review - puts stderr "no space level change - got startmultiquote" + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" set next_tokenType_known 1 ::tomlish::parse::set_tokenType "stringpart" set tok "" @@ -1004,27 +1523,53 @@ namespace eval tomlish::decode { #nothing to do? set tok "" } + endsquote { + set tok "" + } endmultiquote { #JMN!! set tok "" } string { - lappend v($nest) [list STRING $tok] + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes } - stringpart { - lappend v($nest) [list STRINGPART $tok] + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } } multistring { #review lappend v($nest) [list MULTISTRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST } itablequotedkey { } - untyped-value { + untyped_value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -1035,9 +1580,10 @@ namespace eval tomlish::decode { } elseif {[::tomlish::utils::is_datetime $tok]} { set tag DATETIME } else { - error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } lappend v($nest) [list $tag $tok] + } comment { #puts stdout "----- comment token returned '$tok'------" @@ -1068,17 +1614,18 @@ namespace eval tomlish::decode { #!todo - check previous tokens are complete/valid? } default { - error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" } } } if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } - if {$state eq "end"} { + if {$state eq "end-state"} { break } @@ -1108,8 +1655,6 @@ namespace eval tomlish::decode { } finally { set is_parsing 0 } - - return $v(0) } @@ -1136,31 +1681,84 @@ namespace eval tomlish::utils { set segments [tablename_split $tablename false] set trimmed_segments [list] foreach seg $segments { - lappend trimmed_segments [::string trim $seg [list " " \t]] + lappend trimmed_segments [::string trim $seg " \t"] } return [join $trimmed_segments .] } + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + #utils::tablename_split proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set i 0 - set sLen [::string length $tablename] + set sLen [tcl::string::length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" - for {} {$i < $sLen} {} { + for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { - set lastChar [::string index $tablename [expr {$i - 1}]] + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $tablename $i] - incr i + set c [tcl::string::index $tablename $i] if {$c eq "."} { switch -exact -- $mode { @@ -1188,7 +1786,7 @@ namespace eval tomlish::utils { } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { - if {[::string trim $seg] ne ""} { + if {[tcl::string::trim $seg] ne ""} { #we don't allow a quote in the middle of a bare key error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" } @@ -1240,7 +1838,7 @@ namespace eval tomlish::utils { } append seg $c } - if {$i == $sLen} { + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { @@ -1251,13 +1849,13 @@ namespace eval tomlish::utils { if {$normalize} { lappend segments $seg } else { - lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong } } litquoted { - set trimmed_seg [::string trim $seg] - if {[::string index $trimmed_seg end] ne "\'"} { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" } lappend segments $seg @@ -1275,14 +1873,14 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg [list " " \t]] + set trimmed [tcl::string::trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" #} if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename'" + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" } } return $segments @@ -1294,7 +1892,7 @@ namespace eval tomlish::utils { # is a valid 'unicode scalar value' # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} - if {[::string match {\\u*} $slashu]} { + if {[tcl::string::match {\\u*} $slashu]} { set exp {^\\u([0-9a-fA-F]{4}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %4x dec] != 1} { @@ -1306,7 +1904,7 @@ namespace eval tomlish::utils { } else { return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] } - } elseif {[::string match {\\U*} $slashu]} { + } elseif {[tcl::string::match {\\U*} $slashu]} { set exp {^\\U([0-9a-fA-F]{8}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %8x dec] != 1} { @@ -1340,7 +1938,7 @@ namespace eval tomlish::utils { set buffer4 "" ;#buffer for 4 hex characters following a \u set buffer8 "" ;#buffer for 8 hex characters following a \u - set sLen [::string length $str] + set sLen [tcl::string::length $str] #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc set slash_active 0 @@ -1352,12 +1950,12 @@ namespace eval tomlish::utils { set i 0 for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [::string index $str [expr {$i - 1}]] + set lastChar [tcl::string::index $str [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $str $i] + set c [tcl::string::index $str $i] ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { @@ -1380,10 +1978,10 @@ namespace eval tomlish::utils { } } else { if {$unicode4_active} { - if {[::string length $buffer4] < 4} { + if {[tcl::string::length $buffer4] < 4} { append buffer4 $c } - if {[::string length $buffer4] == 4} { + if {[tcl::string::length $buffer4] == 4} { #we have a \uHHHH to test set unicode4_active 0 set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] @@ -1394,10 +1992,10 @@ namespace eval tomlish::utils { } } } elseif {$unicode8_active} { - if {[::string length $buffer8] < 8} { + if {[tcl::string::length $buffer8] < 8} { append buffer8 $c } - if {[::string length $buffer8] == 8} { + if {[tcl::string::length $buffer8] == 8} { #we have a \UHHHHHHHH to test set unicode8_active 0 set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] @@ -1409,7 +2007,7 @@ namespace eval tomlish::utils { } } elseif {$slash_active} { set slash_active 0 - set ctest [string map {{"} dq} $c] + set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { set e "\\\"" @@ -1453,15 +2051,15 @@ namespace eval tomlish::utils { } proc normalize_key {rawkey} { - set c1 [::string index $rawkey 0] - set c2 [::string index $rawkey end] + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes allowed within it. - set key [::string range $rawkey 1 end-1] + set key [tcl::string::range $rawkey 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. # - set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only set key [::tomlish::utils::unescape_string $keydata] #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. } else { @@ -1497,11 +2095,11 @@ namespace eval tomlish::utils { #check if str is valid for use as a toml bare key proc is_barekey {str} { - if {[::string length $str] == 0} { + if {[tcl::string::length $str] == 0} { return 0 } else { set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters match the regexp return 1 } else { @@ -1512,7 +2110,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'integer'. proc int_validchars1 {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 } else { @@ -1521,7 +2119,7 @@ namespace eval tomlish::utils { } #add support for hex,octal,binary 0x.. 0o.. 0b... proc int_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { return 1 } else { @@ -1538,22 +2136,22 @@ namespace eval tomlish::utils { # --------------------------------------- #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } # --------------------------------------- #check +,- only occur in the first position. - if {[::string last - $str] > 0} { + if {[tcl::string::last - $str] > 0} { return 0 } - if {[::string last + $str] > 0} { + if {[tcl::string::last + $str] > 0} { return 0 } - set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) - if {![string is integer -strict $numeric_value]} { + if {![tcl::string::is integer -strict $numeric_value]} { return 0 } #!todo - check bounds only based on some config value @@ -1575,7 +2173,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'float'. proc float_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { @@ -1595,7 +2193,7 @@ namespace eval tomlish::utils { return 1 } - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) #Toml spec also disallows leading zeros in the exponent part @@ -1603,12 +2201,12 @@ namespace eval tomlish::utils { #for now we will allow leading zeros in exponents #!todo - configure 'strict' option to disallow? #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] set r {([0-9])*} regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E set z {([0])*} regexp $z $intpart leadingzeros - if {[::string length $leadingzeros] > 1} { + if {[tcl::string::length $leadingzeros] > 1} { return 0 } #for floats, +,- may occur in multiple places @@ -1616,9 +2214,9 @@ namespace eval tomlish::utils { #!todo - check bounds ? #strip underscores for tcl double check - set check [::string map {_ ""} $str] + set check [tcl::string::map {_ ""} $str] #string is double accepts inf nan +NaN etc. - if {![::string is double $check]} { + if {![tcl::string::is double $check]} { return 0 } @@ -1631,7 +2229,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'datetime'. proc datetime_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { return 1 } else { @@ -1639,19 +2237,37 @@ namespace eval tomlish::utils { } } + #review - we proc is_datetime {str} { - #e.g 1979-05-27T00:32:00-07:00 + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart #!todo - what if the value is 'time only'? - - if {[catch {clock scan $datepart} err]} { - puts stderr "tcl clock scan failed err:'$err'" - return 0 - } + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + #!todo - verify time part is reasonable } else { return 0 @@ -1670,174 +2286,434 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state - # states: - # key-space, curly-space, array-space - # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... # # notes: - # key-space i # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack - # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax # #stateMatrix defines for each state, actions to take for each possible token. - #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. - #dual-element actions are a push command and the name of the space to push on the stack. - # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases - #test variable stateMatrix set stateMatrix [dict create] - dict set stateMatrix\ - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + #xxx-space vs xxx-syntax inadequately documented - TODO + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# dict set stateMatrix\ - curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ } - #REVIEW - #toml spec looks like heading towards allowing newlines within inline tables - #https://github.com/toml-lang/toml/issues/781 + #itable-space/ curly-syntax : itables dict set stateMatrix\ - curly-syntax {\ - whitespace "curly-syntax"\ - newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-ok"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ } dict set stateMatrix\ - value-expected {\ - whitespace "value-expected"\ - newline "err"\ - eof "err"\ - untyped-value "samespace"\ - startquote "string"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - comment "err"\ - comma "err"\ - startarray {pushspace array-space}\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ } + + # ' = ' portion of keyval dict set stateMatrix\ - array-space {\ - whitespace "array-space"\ - newline "array-space"\ - eof "err"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "array-space"\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ } + #### dict set stateMatrix\ - array-syntax {\ - whitespace "array-syntax"\ - newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "err"\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - - + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} dict set stateMatrix\ - itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + leading-squote-space {\ + squote_seq "POPSPACE"\ + } #dict set stateMatrix\ - # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + dict set stateMatrix\ - itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ - itablekeyval-space {} + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW dict set stateMatrix\ - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } dict set stateMatrix\ - keyval-space {} + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } dict set stateMatrix\ - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + dict set stateMatrix\ - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } dict set stateMatrix\ - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + dict set stateMatrix\ - baretablename {whitespace "NA" newline "err" equal "value-expected"} + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } dict set stateMatrix\ - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - end {} - - #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push - variable stateMatrix_orig { - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} - value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} - array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} - array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - keyval-space {} - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - end {} + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] tcl::dict::for {s transitions} $stateMatrix { tcl::dict::for {token transition_to} $transitions { - set action [lindex $transition_to 0] - switch -exact -- $action { - pushspace - zeropoppushspace { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { if {$token ni $push_trigger_tokens} { lappend push_trigger_tokens $token } @@ -1845,84 +2721,139 @@ namespace eval tomlish::parse { } } } - puts stdout "push_trigger_tokens: $push_trigger_tokens" - #!todo - hard code once stateMatrix finalised? + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. - #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail + array-space array-syntax } - variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - key-space tablename + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail - variable state_list + variable state_list ;#reset every tomlish::decode::toml namespace export tomlish toml namespace ensemble create - proc getNextState {tokentype currentstate} { + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state variable nest variable v - + + set prevstate $currentstate + + variable spacePopTransitions variable spacePushTransitions - variable last_space_action "none" - variable last_space_type "none" + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" variable state_list set result "" + set starttok "" if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { - popspace { + POPSPACE { spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + set last_space_action "pop" set last_space_type $type - - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" } else { - set next $target + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } } set result $next } - samespace { - #note the same data as popspace (spacePopTransitions) is used here. - set parent [spacestack peek] - ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" - lassign $parent type target - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" } else { - set next $target + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } } set result $next } zeropoppushspace { if {$nest > 0} { - #pop back down to the root level (key-space) + #pop back down to the root level (table-space) spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + set last_space_action "pop" set last_space_type $type @@ -1935,36 +2866,72 @@ namespace eval tomlish::parse { } #re-entrancy - #set next [list pushspace [lindex $transition_to 1]] + #set next [list PUSHSPACE [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" - set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] } - pushspace { - set target [lindex $transition_to 1] - spacestack push [list space $target] + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + set last_space_action "push" set last_space_type "space" - - #puts $::tomlish::parse::spacePushTransitions - if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { - set next [dict get $::tomlish::parse::spacePushTransitions $target] - ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" } else { - set next $target + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } } set result $next } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } default { - set result $transition_to + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word } } } else { - set result "nostate-err" - + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" } - lappend state_list $result - return $result + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] } proc report_line {{line ""}} { @@ -1988,7 +2955,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2023,12 +2990,13 @@ namespace eval tomlish::parse { proc _shortcircuit_startquotesequence {} { variable tok variable i - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { set_tokenType "startquote" incr i -1 return -level 2 1 } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" set_tokenType "startquote" set tok "\"" incr i -2 @@ -2036,8 +3004,81 @@ namespace eval tomlish::parse { } } - #return a list of 0 1 or 2 tokens + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + proc tok {s} { variable nest variable v @@ -2046,14 +3087,12 @@ namespace eval tomlish::parse { variable type ;#character type variable state ;#FSM - set resultlist [list] variable tokenType variable tokenType_list variable endToken - set sLen [::string length $s] variable lastChar @@ -2063,400 +3102,601 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof variable token_waiting - if {[dict size $token_waiting]} { - set tokenType [dict get $token_waiting type] - set tok [dict get $token_waiting tok] - dict unset token_waiting type - dict unset token_waiting tok + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] return 1 } #------------------------------ + set resultlist [list] + set sLen [tcl::string::length $s] + set slash_active 0 set quote 0 set c "" set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [string index $s [expr {$i - 1}]] + set lastChar [tcl::string::index $s [expr {$i - 1}]] } else { set lastChar "" } - set c [string index $s $i] + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" - incr i ;#must incr here because we do'returns'inside the loop + incr i ;#must incr here because we do returns inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" } whitespace { # hash marks end of whitespace token #do a return for the whitespace, set token_waiting - #dict set token_waiting type comment - #dict set token_waiting tok "" + #set_token_waiting type comment value "" complete 1 incr i -1 ;#leave comment for next run return 1 } - untyped-value { + untyped_value { #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? - #we will accept a comment marker as an immediate terminator of the untyped-value. + #we will accept a comment marker as an immediate terminator of the untyped_value. incr i -1 return 1 } + starttablename - starttablearrayname { + #fix! + error "Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } default { - #quotedkey, string, multistring + #quotedkey, itablequotedkey, string,literal, multistring append tok $c } } } else { - #$slash_active not relevant when no tokenType - #start of token if we're not in a token - set_tokenType comment - set tok "" ;#The hash is not part of the comment data + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } } } lc { - set multi_dquote "" ;#!! - #test jmn2024 #left curly brace - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - error "unexpected tablename problem" - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - switch -exact -- $state { - value-expected { - #switch last key to tablename?? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - multistring-space { - set_tokenType "stringpart" - if {$slash_active} { - set tok "\\\{" - } else { - set tok "\{" - } - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - array-space - array-syntax { - #nested anonymous inline table - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - default { - error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rc { - set multi_dquote "" ;#!! #right curly brace - try { - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endinlinetable - dict set token_waiting tok "" - return 1 - } - tablearrayname { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } - itablevaltail { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - } - default { - #end any other token - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" - - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - curly-syntax - curly-space { - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - array-syntax - array-space { - #invalid - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - itablevaltail { - set_tokenType "endinlinetable" - set tok "" - #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 - incr i -1 - return 1 - } - itablekeyval-syntax { - error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" - } - default { - #JMN2024b keytail? - error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" - } + startquotesequence { + _shortcircuit_startquotesequence } - } - } on error {em} { - error $em - } finally { - set slash_active 0 - } - - } - lb { - set multi_dquote "" ;#!! - #left square bracket - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - set_tokenType "startarray" - set tok "\[" - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } - key-space { - #table name - #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray - #note that a starttablearrayname token may contain whitespace between the brackets - # e.g \[ \[ - set_tokenType "starttablename" - set tok "" ;#there is no output into the tomlish list for this token - } - array-space - array-syntax { - #nested array? - set_tokenType "startarray" - set tok "\[" - return 1 - #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } } - default { - error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rb { - set multi_dquote "" ;#!! #right square bracket - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } - tablearraynames { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 - } - default { + } else { incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 } } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endarray" - set tok "\]" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endarray" - set tok "\]" - return 1 + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endtablename" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endtablearray" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - array-syntax - array-space { - set_tokenType "endarray" - set tok "\]" - return 1 - } - default { - error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } bsl { set dquotes $multi_dquote set multi_dquote "" ;#!! #backslash - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - litstring - multilitstring - comment - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2474,13 +3714,15 @@ namespace eval tomlish::parse { set slash_active 1 } } - whitespace { - if {$state eq "multistring-space"} { - #end whitespace token - incr i -1 - return 1 + starttablename - starttablearrayname { + error "backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" } else { - error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + set slash_active 1 } } barekey { @@ -2491,206 +3733,448 @@ namespace eval tomlish::parse { } } } else { - if {$state eq "multistring-space"} { - set slash_active 1 - } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } } - dq { - #double quote - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { append tok $c - } elseif {$toklen == 2} { + if {$existingtoklen > 2} { + error "tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { append tok $c - set_tokenType "startmultiquote" - return 1 - } else { - error "unexpected token length in 'startquotesequence'" + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" } } - endquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { append tok $c - } elseif {$toklen == 2} { + } + 2 { + #switch? append tok $c - set_tokenType "endmultiquote" + set_tokenType triple_squote return 1 - } else { - error "unexpected token length in 'endquotesequence'" + } + default { + error "unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } - string { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 return 1 } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "unexpected _start_squote_sequence length $toklen" + } } - stringpart { - #sub element of multistring - if {$slash_active} { - append tok "\\" - append tok $c + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + append multi_dquote "\"" } } - whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } - } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - #append multi_dquote "\"" + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } } } - default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 - } } - } - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - quotedkey - itablequotedkey { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 } } - tablename - tablearrayname { - if {$slash_active} {append tok "\\"} + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" append tok $c - } - starttablename - starttablearrayname { - incr i -1 ;## + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } - default { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" - } } - } else { - #$slash_active not relevant when no tokenType - #token is string only if we're expecting a value at this point - switch -exact -- $state { - value-expected - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - multistring-space { - #REVIEW + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 } else { append multi_dquote "\"" } } - key-space { - set tokenType startquote - set tok $c - return 1 - } - curly-space { - set tokenType startquote - set tok $c - return 1 - } - tablename - tablearrayname { - set_tokenType $state - set tok $c - } - default { - error "Unexpected quote during state '$state' [tomlish::parse::report_line]" - } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } = { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + if {$had_slash} {append tok "\\"} append tok $dquotes$c } whitespace { - dict set token_waiting type equal - dict set token_waiting tok = - return 1 + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } } barekey { - dict set token_waiting type equal - dict set token_waiting tok = + #set_token_waiting type equal value = complete 1 + incr i -1 return 1 } + starttablename - starttablearrayname { + error "Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } default { error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" } @@ -2698,11 +4182,24 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok ${dquotes}= + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 } default { - set_tokenType equal + set_tokenType "equal" set tok = return 1 } @@ -2710,19 +4207,47 @@ namespace eval tomlish::parse { } } cr { + #REVIEW! set dquotes $multi_dquote set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warning "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } stringpart { append tok $dquotes$c } + starttablename - starttablearrayname { + error "Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } default { #!todo - error out if cr inappropriate for tokenType append tok $c @@ -2731,24 +4256,46 @@ namespace eval tomlish::parse { } else { #lf may be appended if next #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) - set_tokenType newline + set_tokenType "newline" set tok cr } } lf { + # \n newline set dquotes $multi_dquote set multi_dquote "" ;#!! - # \n newline - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } newline { + #review #this lf is the trailing part of a crlf - append tok lf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } stringpart { @@ -2757,11 +4304,20 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf - return 1 + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } } } + starttablename - tablename - tablearrayname - starttablearrayname { + error "Character is invalid in $tokenType. [tomlish::parse::report_line]" + } default { #newline ends all other tokens. #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) @@ -2770,60 +4326,130 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } } else { - set had_slash $slash_active - set slash_active 0 - if {$had_slash} { - set_tokenType "continuation" - set tok "\\" - incr i -1 - return 1 - } else { - set_tokenType newline - set tok lf - return 1 + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} } } , { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } default { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "," + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," } multiliteral-space { - set_tokenType literalpart + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" set tok "," } default { - set_tokenType comma + set_tokenType "comma" set tok "," return 1 } @@ -2831,47 +4457,106 @@ namespace eval tomlish::parse { } } . { + set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - stringpart - comment - quotedkey - untyped-value { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" append tok $c } - baretablename - tablename - tablearrayname { + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { #subtable - split later - review append tok $c } barekey { + #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review - switch_tokenType tablename - incr i -1 - - #error "barekey period unimplemented" + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 } default { - error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" - #dict set token_waiting type period - #dict set token_waiting tok "." + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 #return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "." + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." } multiliteral-space { - set_tokenType literalpart + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" set tok "." + return 1 } default { - set_tokenType untyped-value + set_tokenType "untyped_value" set tok "." } } @@ -2881,24 +4566,38 @@ namespace eval tomlish::parse { " " { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { + #todo had_slash - emit token or error #whitespace is a terminator for bare keys - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - untyped-value { + untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } @@ -2906,37 +4605,45 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $c - } - quotedkey - string { - if {$had_slash} { - append tok "\\" - } - #if {$dquotes eq "\""} { - #} - append tok $c + append tok $dquotes$c } - whitespace { + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } append tok $c } stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) if {$had_slash} { #REVIEW + #emit the stringpart - go back to the slash incr i -2 return 1 } else { #split into STRINGPART aaa WS " " - #keeping WS separate allows easier processing of CONT stripping append tok $dquotes incr i -1 return 1 } } - starttablename { - incr i -1 - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } } - starttablearrayname { + starttablename - starttablearrayname { incr i -1 return 1 } @@ -2951,19 +4658,21 @@ namespace eval tomlish::parse { } } else { set had_slash $slash_active - if {$slash_active} { - set slash_active 0 - } + set slash_active 0 switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - if {$had_slash} { - set tok "\\$c" - } else { - set tok $c - } + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c } multistring-space { if {$had_slash} { @@ -2976,12 +4685,16 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok $dquotes incr i -1 - return + return 1 } set_tokenType "whitespace" append tok $c } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { if {$had_slash} { error "unexpected backslash [tomlish::parse::report_line]" @@ -2996,35 +4709,58 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { #whitespace is a terminator for bare keys incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 return 1 } untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - quotedkey { + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { append tok $c } string - comment - whitespace { append tok $c } stringpart { - append tok $dquotes$c + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c } starttablename - starttablearrayname { incr i -1 @@ -3045,10 +4781,14 @@ namespace eval tomlish::parse { set slash_active 0 } switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname set tok $c } multistring-space { @@ -3069,6 +4809,10 @@ namespace eval tomlish::parse { } } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "whitespace" append tok $c @@ -3078,27 +4822,77 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - set_tokenType "bom" - set tok "\uFEFF" - return 1 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } } default { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - endquotesequence { - puts stderr "endquotesequence: $tok" + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } whitespace { - incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. - return 1 + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } } barekey { if {[tomlish::utils::is_barekey $c]} { @@ -3116,7 +4910,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3124,7 +4918,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - key-space - curly-space - curly-syntax { + table-space - itable-space { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -3133,6 +4927,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } multistring-space { set_tokenType "stringpart" if {$had_slash} { @@ -3142,16 +4945,25 @@ namespace eval tomlish::parse { set tok $dquotes$c } } - tablename { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { set_tokenType "tablename" set tok $c } - tablearrayname { + tablearrayname-state { set_tokenType "tablearrayname" set tok $c } + dottedkey-space { + set_tokenType barekey + set tok $c + } default { - set_tokenType "untyped-value" + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" set tok $c } } @@ -3162,32 +4974,48 @@ namespace eval tomlish::parse { } #run out of characters (eof) - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { #check for invalid ending tokens - #if {$state eq "err"} { + #if {$state eq "err-state"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - eror "eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" - return 1 + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } } } - dict set token_waiting type "eof" - dict set token_waiting tok "eof" + set_token_waiting type eof value eof complete 1 startindex $i ;#review return 1 } else { - ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } @@ -3312,7 +5140,7 @@ if {$argc > 0} { puts stderr "argc: $argc args: $argv" if {($argc == 1)} { - if {[::string tolower $argv] in {help -help h -h}} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { puts stdout "Usage: -app where appname one of:[tomlish::appnames]" exit 0 } else { diff --git a/src/bootsupport/modules_tcl8/include_modules.config b/src/bootsupport/modules_tcl8/include_modules.config index 05ce61ae..59e31647 100644 --- a/src/bootsupport/modules_tcl8/include_modules.config +++ b/src/bootsupport/modules_tcl8/include_modules.config @@ -1,9 +1,9 @@ - -#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project -#They must be already built, so generally shouldn't come directly from src/modules. - -#each entry - base module -set bootsupport_modules [list\ - modules_tcl8 thread\ -] - + +#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project +#They must be already built, so generally shouldn't come directly from src/modules. + +#each entry - base module +set bootsupport_modules [list\ + modules_tcl8 thread\ +] + diff --git a/src/make.tcl b/src/make.tcl index 9edd90b0..24206ba7 100644 --- a/src/make.tcl +++ b/src/make.tcl @@ -13,7 +13,7 @@ namespace eval ::punkmake { variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] variable non_help_flags [list -k] variable help_flags [list -help --help /?] - variable known_commands [list project get-project-info shell vendorupdate bootsupport vfscommonupdate] + variable known_commands [list project modules info check shell vendorupdate bootsupport vfscommonupdate] } if {"::try" ni [info commands ::try]} { puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" @@ -21,7 +21,7 @@ if {"::try" ni [info commands ::try]} { } #------------------------------------------------------------------------------ -#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#Module loading from src/bootsupport or [pwd]/modules if pwd is a 'src' folder #------------------------------------------------------------------------------ #If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files # - then it will attempt to preference these modules @@ -35,8 +35,10 @@ if {[file exists [file join $startdir src bootsupport]]} { set bootsupport_mod [file join $startdir bootsupport modules] set bootsupport_lib [file join $startdir bootsupport lib] } -if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + +set package_paths_modified 0 +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list set original_auto_path $::auto_path @@ -63,8 +65,18 @@ if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { } - if {[file exists [pwd]/modules]} { - tcl::tm::add [pwd]/modules + #we deliberately don't use [pwd]/modules because commonly the launch dir may be the project dir. + #The /modules are the very modules we are building - and may be in a broken state, which make.tcl then can't fix. + if {[file tail $startdir] eq "src"} { + if {[file exists $startdir/modules]} { + #launch from /modules /lib etc." \n \n + append h " $scriptname modules" \n + append h " - build modules from src/modules etc without scanning src/runtime and src/vfs folders to build kit/zipkit executables" \n \n append h " $scriptname bootsupport" \n - append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n + append h " - bootsupport modules are available to make.tcl" \n \n append h " $scriptname vendorupdate" \n - append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n append h " $scriptname vfscommonupdate" \n - append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib" \n \n - append h " $scriptname get-project-info" \n - append h " - show the name and base folder of the project to be built" \n + append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib etc" \n + append h " - before calling this (followed by make project) - you can test using '(.exe) dev'" \n + append h " this will load modules from your /module /lib paths instead of from the kit/zipkit" \n \n + append h " $scriptname info" \n + append h " - show the name and base folder of the project to be built" \n append h "" \n if {[llength $::punkmake::pkg_missing]} { append h "* ** NOTE ** ***" \n @@ -220,12 +232,68 @@ if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} } set sourcefolder $projectroot/src +if {$::punkmake::command eq "check"} { + puts stdout "- tcl::tm::list" + foreach fld [tcl::tm::list] { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + puts stdout "- auto_path" + foreach fld $::auto_path { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + set v [package require punk::mix::base] + puts stdout "punk::mix::base version $v\n[package ifneeded punk::mix::base $v]" + exit 0 +} + +if {$package_paths_modified} { + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] +} + + -if {$::punkmake::command eq "get-project-info"} { - puts stdout "- -- --- --- --- --- --- --- --- --- ---" - puts stdout "- -- get-project-info -- -" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" +if {$::punkmake::command eq "info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" + puts stdout "- -- info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" puts stdout "- projectroot : $projectroot" + set sourcefolder $projectroot/src + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + puts stdout "- vendorlib folders: ([llength $vendorlibfolders])" + foreach fld $vendorlibfolders { + puts stdout " src/$fld" + } + puts stdout "- vendormodule folders: ([llength $vendormodulefolders])" + foreach fld $vendormodulefolders { + puts stdout " src/$fld" + } + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] + puts stdout "- source module paths: [llength $source_module_folderlist]" + foreach fld $source_module_folderlist { + puts stdout " $fld" + } + set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] + lappend projectlibfolders lib + puts stdout "- source libary paths: [llength $projectlibfolders]" + foreach fld $projectlibfolders { + puts stdout " src/$fld" + } if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { set vc "fossil" set rev [punk::repo::fossil_revision $scriptfolder] @@ -241,8 +309,11 @@ if {$::punkmake::command eq "get-project-info"} { } puts stdout "- version control : $vc" puts stdout "- revision : $rev" - puts stdout "- remote : $rem" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- remote" + foreach ln [split $rem \n] { + puts stdout " $ln" + } + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" exit 0 } @@ -564,7 +635,7 @@ if {$::punkmake::command eq "bootsupport"} { -if {$::punkmake::command ne "project"} { +if {$::punkmake::command ni {project modules}} { puts stderr "Command $::punkmake::command not implemented - aborting." flush stderr after 100 @@ -803,6 +874,19 @@ if {[punk::repo::is_fossil_root $projectroot]} { $installer destroy } +if {$::punkmake::command ne "project"} { + #command = modules + puts stdout "vfs folders not checked" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into base vfs folder" + puts stdout " - use 'make.tcl project' to build executable kits/zipkits from vfs folders as well if you have runtimes installed" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" + puts stdout "-done-" + exit 0 +} + + set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] if {$buildfolder ne "$sourcefolder/_build"} { puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" @@ -832,10 +916,12 @@ if {![llength $runtimes]} { exit 0 } +set has_sdx 1 if {[catch {exec sdx help} errM]} { puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" puts stderr "err: $errM" - exit 1 + #exit 1 + set has_sdx 0 } # -- --- --- --- --- --- --- --- --- --- @@ -1025,6 +1111,8 @@ foreach runtimefile $runtimes { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set failed_kits [list] set installed_kits [list] +set skipped_kits [list] +set skipped_kit_installs [list] proc ::make_file_traversal_error {args} { error "file_traverse error: $args" @@ -1304,30 +1392,39 @@ foreach vfstail $vfs_tails { } } kit { - if {[catch { - if {$rtname ne "-"} { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose - } else { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose - } - } result]} { - if {$rtname ne "-"} { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" - } else { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" - } - puts stderr "sdx wrap $targetkit failed" - lappend failed_kits [list kit $targetkit reason $sdxmsg] + if {!$has_sdx} { + puts stderr "no sdx available to wrap $targetkit" + lappend failed_kits [list kit $targetkit reason "sdx_executable_unavailable"] $vfs_event targetset_end FAILED $vfs_event destroy $vfs_installer destroy continue - } else { - puts stdout "ok - finished sdx" - set separator [string repeat = 40] - puts stdout $separator - puts stdout $result - puts stdout $separator + } else { + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose + } + } result]} { + if {$rtname ne "-"} { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" + } else { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" + } + puts stderr "sdx wrap $targetkit failed" + lappend failed_kits [list kit $targetkit reason $sdxmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } } } } @@ -1435,6 +1532,7 @@ foreach vfstail $vfs_tails { set skipped_vfs_build 1 puts stderr "." puts stdout "Skipping build for vfs $vfstail with runtime $rtname - no change detected" + lappend skipped_kits [list kit $targetkit reason "no change detected"] $vfs_event targetset_end SKIPPED } $vfs_event destroy @@ -1489,6 +1587,7 @@ foreach vfstail $vfs_tails { set skipped_kit_install 1 puts stderr "." puts stdout "Skipping kit install for $targetkit with vfs $vfstail runtime $rtname - no change detected" + lappend skipped_kit_installs [list kit $targetkit reason "no change detected"] $bin_event targetset_end SKIPPED } $bin_event destroy @@ -1510,8 +1609,21 @@ if {[llength $failed_kits]} { punk::lib::showdict -channel stderr -roottype list $failed_kits */@*.@* #puts stderr [join $failed_kits \n] } - -puts stdout "done" +set had_kits [expr {[llength $installed_kits] || [llength $failed_kits] || [llength $skipped_kits]}] +if {$had_kits} { + puts stdout " module builds and kit/zipkit builds processed (vfs config: src/runtime/mapvfs.config)" + puts stdout " - use 'make.tcl modules' to build modules without scanning/building the vfs folders into executable kits/zipkits" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into the base vfs folder" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" +} else { + puts stdout " module builds processed" + puts stdout "" + puts stdout " If kit/zipkit based executables required - create src/vfs/.vfs folders containing lib,modules,modules_tcl9 etc folders" + puts stdout " Also ensure appropriate executables exist in src/runtime along with src/runtime/mapvfs.config" +} +puts stdout "-done-" exit 0 diff --git a/src/modules/calculator_test-999999.0a1.0.tm b/src/modules/calculator_test-999999.0a1.0.tm new file mode 100644 index 00000000..0e9325e4 --- /dev/null +++ b/src/modules/calculator_test-999999.0a1.0.tm @@ -0,0 +1,540 @@ +## -*- tcl -*- +## +## OO-based Tcl/PARAM implementation of the parsing +## expression grammar +## +## calculator grammar +## +## Generated from file calctest.tcl +## for user jnoble +## +# # ## ### ##### ######## ############# ##################### +## Requirements + +package require Tcl 8.5 9 +package require TclOO +package require pt::rde::oo ; # OO-based implementation of the + # PARAM virtual machine + # underlying the Tcl/PARAM code + # used below. + +# # ## ### ##### ######## ############# ##################### +## + +oo::class create calculator_test { + # # ## ### ##### ######## ############# + ## Public API + + superclass pt::rde::oo ; # TODO - Define this class. + # Or can we inherit from a snit + # class too ? + + method parse {channel} { + my reset $channel + my MAIN ; # Entrypoint for the generated code. + return [my complete] + } + + method parset {text} { + my reset {} + my data $text + my MAIN ; # Entrypoint for the generated code. + return [my complete] + } + + # # ## ### ###### ######## ############# + ## BEGIN of GENERATED CODE. DO NOT EDIT. + + # + # Grammar Start Expression + # + + method MAIN {} { + my sym_Expression + return + } + + # + # value Symbol 'AddOp' + # + + method sym_AddOp {} { + # [+-] + + my si:void_symbol_start AddOp + my si:next_class +- + my si:void_leaf_symbol_end AddOp + return + } + + # + # value Symbol 'Digit' + # + + method sym_Digit {} { + # [0123456789] + + my si:void_symbol_start Digit + my si:next_class 0123456789 + my si:void_leaf_symbol_end Digit + return + } + + # + # value Symbol 'Expression' + # + + method sym_Expression {} { + # x + # (Term) + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:value_symbol_start Expression + my sequence_18 + my si:reduce_symbol_end Expression + return + } + + method sequence_18 {} { + # x + # (Term) + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:value_state_push + my sym_Term + my si:valuevalue_part + my kleene_16 + my si:value_state_merge + return + } + + method kleene_16 {} { + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + while {1} { + my si:void2_state_push + my sequence_14 + my si:kleene_close + } + return + } + + method sequence_14 {} { + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_AddOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Term + my si:value_state_merge + return + } + + method kleene_8 {} { + # * + # '' + + while {1} { + my si:void2_state_push + my si:next_char \40 + my si:kleene_close + } + return + } + + # + # value Symbol 'Factor' + # + + method sym_Factor {} { + # x + # (Fragment) + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:value_symbol_start Factor + my sequence_32 + my si:reduce_symbol_end Factor + return + } + + method sequence_32 {} { + # x + # (Fragment) + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:value_state_push + my sym_Fragment + my si:valuevalue_part + my kleene_30 + my si:value_state_merge + return + } + + method kleene_30 {} { + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + while {1} { + my si:void2_state_push + my sequence_28 + my si:kleene_close + } + return + } + + method sequence_28 {} { + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_PowOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Fragment + my si:value_state_merge + return + } + + # + # value Symbol 'Fragment' + # + + method sym_Fragment {} { + # / + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + # (Number) + # (Var) + + my si:value_symbol_start Fragment + my choice_46 + my si:reduce_symbol_end Fragment + return + } + + method choice_46 {} { + # / + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + # (Number) + # (Var) + + my si:value_state_push + my sequence_42 + my si:valuevalue_branch + my sym_Number + my si:valuevalue_branch + my sym_Var + my si:value_state_merge + return + } + + method sequence_42 {} { + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + + my si:void_state_push + my si:next_char \50 + my si:voidvoid_part + my kleene_8 + my si:voidvalue_part + my sym_Expression + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my si:next_char \51 + my si:value_state_merge + return + } + + # + # value Symbol 'MulOp' + # + + method sym_MulOp {} { + # [*/] + + my si:void_symbol_start MulOp + my si:next_class */ + my si:void_leaf_symbol_end MulOp + return + } + + # + # value Symbol 'Number' + # + + method sym_Number {} { + # x + # ? + # (Sign) + # + + # (Digit) + + my si:value_symbol_start Number + my sequence_57 + my si:reduce_symbol_end Number + return + } + + method sequence_57 {} { + # x + # ? + # (Sign) + # + + # (Digit) + + my si:value_state_push + my optional_52 + my si:valuevalue_part + my poskleene_55 + my si:value_state_merge + return + } + + method optional_52 {} { + # ? + # (Sign) + + my si:void2_state_push + my sym_Sign + my si:void_state_merge_ok + return + } + + method poskleene_55 {} { + # + + # (Digit) + + my i_loc_push + my sym_Digit + my si:kleene_abort + while {1} { + my si:void2_state_push + my sym_Digit + my si:kleene_close + } + return + } + + # + # value Symbol 'PowOp' + # + + method sym_PowOp {} { + # "**" + + my si:void_symbol_start PowOp + my si:next_str ** + my si:void_leaf_symbol_end PowOp + return + } + + # + # value Symbol 'Sign' + # + + method sym_Sign {} { + # [-+] + + my si:void_symbol_start Sign + my si:next_class -+ + my si:void_leaf_symbol_end Sign + return + } + + # + # value Symbol 'Term' + # + + method sym_Term {} { + # x + # (Factor) + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:value_symbol_start Term + my sequence_75 + my si:reduce_symbol_end Term + return + } + + method sequence_75 {} { + # x + # (Factor) + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:value_state_push + my sym_Factor + my si:valuevalue_part + my kleene_73 + my si:value_state_merge + return + } + + method kleene_73 {} { + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + while {1} { + my si:void2_state_push + my sequence_71 + my si:kleene_close + } + return + } + + method sequence_71 {} { + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_MulOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Factor + my si:value_state_merge + return + } + + # + # value Symbol 'Var' + # + + method sym_Var {} { + # x + # '$' + # [xyz] + + my si:void_symbol_start Var + my sequence_80 + my si:void_leaf_symbol_end Var + return + } + + method sequence_80 {} { + # x + # '$' + # [xyz] + + my si:void_state_push + my si:next_char $ + my si:voidvoid_part + my si:next_class xyz + my si:void_state_merge + return + } + + ## END of GENERATED CODE. DO NOT EDIT. + # # ## ### ###### ######## ############# +} + +# # ## ### ##### ######## ############# ##################### +## Ready + +package provide calculator_test 999999.0a1.0 +return diff --git a/src/modules/punk/console-999999.0a1.0.tm b/src/modules/punk/console-999999.0a1.0.tm index c31852b5..140968a0 100644 --- a/src/modules/punk/console-999999.0a1.0.tm +++ b/src/modules/punk/console-999999.0a1.0.tm @@ -183,7 +183,9 @@ namespace eval punk::console { variable previous_stty_state_$channel set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] eq ""} { - set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + if {[catch {{*}$sttycmd -g <@$channel} previous_stty_state_$channel]} { + set previous_stty_state_$channel "" + } } exec {*}$sttycmd raw -echo <@$channel @@ -253,13 +255,21 @@ namespace eval punk::console { return "line" } } elseif {$raw_or_line eq "raw"} { - punk::console::enableRaw + if {[catch { + punk::console::enableRaw + } errM]} { + puts stderr "Warning punk::console::enableRaw failed - $errM" + } if {[can_ansi]} { punk::console::enableVirtualTerminal both } } elseif {$raw_or_line eq "line"} { #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) - punk::console::disableRaw + if {[catch { + punk::console::disableRaw + } errM]} { + puts stderr "Warning punk::console::disableRaw failed - $errM" + } if {[can_ansi]} { punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc punk::console::enableVirtualTerminal output ;#display/use ansi codes @@ -290,12 +300,15 @@ namespace eval punk::console { set loadstate [zzzload::pkg_require twapi] #loadstate could also be stuck on loading? - review - zzzload not very ripe - #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. - + #Twapi can be relatively slow to load (on some systems) - can be 1s plus in some cases - and much longer if there are disk performance issues. if {$loadstate ni [list failed]} { + #possibly still 'loading' #review zzzload usage #puts stdout "=========== console loading twapi =============" - zzzload::pkg_wait twapi + set loadstate [zzzload::pkg_wait twapi] ;#can return 'failed' will return version if already loaded or loaded during wait + } + + if {$loadstate ni [list failed]} { package require twapi ;#should be fast once twapi dll loaded in zzzload thread set ::punk::console::has_twapi 1 @@ -523,6 +536,9 @@ namespace eval punk::console { set is_raw 0 return [list stdin [list from $oldmode to $newmode]] } elseif {[set sttycmd [auto_execok stty]] ne ""} { + #stty can return info on windows - but doesn't seem to be able to set anything. + #review - is returned info even valid? + set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] ne ""} { exec {*}$sttycmd [set previous_stty_state_$channel] diff --git a/src/modules/punk/lib-999999.0a1.0.tm b/src/modules/punk/lib-999999.0a1.0.tm index 4b4ce6e5..5463c32e 100644 --- a/src/modules/punk/lib-999999.0a1.0.tm +++ b/src/modules/punk/lib-999999.0a1.0.tm @@ -339,6 +339,92 @@ namespace eval punk::lib { set has_twapi [expr {![catch {package require twapi}]}] } + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + proc lswap {lvar a z} { + upvar $lvar l + if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { + #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + set a_index [lindex_resolve $l $a] + set a_msg "" + switch -- $a_index { + -2 { + "$a is greater th + } + -3 { + } + } + error "lswap cannot indices $a and $z $a is out of range" + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} #*** !doctools #[subsection {Namespace punk::lib}] @@ -347,6 +433,7 @@ namespace eval punk::lib { if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to} { lseq $from $to @@ -1009,24 +1096,28 @@ namespace eval punk::lib { } set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds - if {${lower_resolve} == -1} { + if {${lower_resolve} == -2} { + ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max - } elseif {$lower_resolve == -2} { + } elseif {$lower_resolve == -3} { + ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve $dval $b] - if {$upper == -2} { + if {$upper == -3} { + ##x #upper bound is below list range - - if {$lower_resolve >=-1} { + if {$lower_resolve >=-2} { + ##x set upper 0 } else { continue } - } elseif {$upper == -1} { + } elseif {$upper == -2} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists @@ -1669,7 +1760,8 @@ namespace eval punk::lib { error "bad expression '$expression': must be integer?\[+-\]integer?" } } - + + # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bunds on upper vs lower side proc lindex_resolve {list index} { #*** !doctools #[call [fun lindex_resolve] [arg list] [arg index]] @@ -1679,11 +1771,13 @@ namespace eval punk::lib { #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: - #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) - #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] a) -3 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -2 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr #if {![llength $list]} { @@ -1694,9 +1788,9 @@ namespace eval punk::lib { if {[string is integer -strict $index]} { #can match +i -i if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } else { #integer may still have + sign - normalize with expr return [expr {$index}] @@ -1708,14 +1802,14 @@ namespace eval punk::lib { set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { - return -1 + return -2 } } else { - #end + #index is 'end' set index [expr {[llength $list]-1}] if {$index < 0} { - #special case - end with empty list - treat end like a positive number out of bounds - return -1 + #special case - 'end' with empty list - treat end like a positive number out of bounds + return -2 } else { return $index } @@ -1723,7 +1817,7 @@ namespace eval punk::lib { if {$offset == 0} { set index [expr {[llength $list]-1}] if {$index < 0} { - return -1 ;#special case + return -2 ;#special case as above } else { return $index } @@ -1732,7 +1826,7 @@ namespace eval punk::lib { set index [expr {([llength $list]-1) - $offset}] } if {$index < 0} { - return -2 + return -3 } else { return $index } @@ -1753,26 +1847,50 @@ namespace eval punk::lib { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } return $index } } } - proc lindex_resolve2 {list index} { - #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + proc lindex_resolve_basic {list index} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg list] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -1 for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ + # - which #for {set i 0} {$i < [llength $list]} {incr i} { # lappend indices $i #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < 0 || ($index >= [llength $list])} { + #even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } if {[llength $list]} { set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + #if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) } else { set indices [list] } set idx [lindex $indices $index] if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end return -1 } else { return $idx @@ -2334,13 +2452,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var - proc swapnumvars {namea nameb} { - upvar $namea a $nameb b - set a [expr {$a ^ $b}] - set b [expr {$a ^ $b}] - set a [expr {$a ^ $b}] - } #e.g linesort -decreasing $data proc linesort {args} { @@ -2956,7 +3067,7 @@ namespace eval punk::lib { # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { - set number [punk::objclone $unformattednumber] + set number [objclone $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] diff --git a/src/modules/punk/mix/base-0.1.tm b/src/modules/punk/mix/base-0.1.tm index 932c1db6..806b172e 100644 --- a/src/modules/punk/mix/base-0.1.tm +++ b/src/modules/punk/mix/base-0.1.tm @@ -4,6 +4,7 @@ package provide punk::mix::base [namespace eval punk::mix::base { }] package require punk::path +package require punk::lib ;#format_number etc #base internal plumbing functions namespace eval punk::mix::base { @@ -657,16 +658,38 @@ namespace eval punk::mix::base { #temp emission to stdout.. todo - repl telemetry channel puts stdout "cksum_path: creating temporary tar archive for $path" - puts stdout " at: $archivename .." - tar::create $archivename $target + puts -nonewline stdout " at: $archivename ..." + set tsstart [clock millis] + if {[set tarpath [auto_execok tar]] ne ""} { + #using an external binary is *significantly* faster than tar::create - but comes with some risks + #review - need to check behaviour/flag variances across platforms + #don't use -z flag. On at least some tar versions the zipped file will contain a timestamped subfolder of filename.tar - which ruins the checksum + #also - tar is generally faster without the compression (although this may vary depending on file size and disk speed?) + exec {*}$tarpath -cf $archivename $target ;#{*} needed in case spaces in tarpath + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar -cf done ($ms ms)" + } else { + set tsstart [clock millis] ;#don't include auto_exec search time for tar::create + tar::create $archivename $target + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar::create done ($ms ms)" + puts stdout " NOTE: install tar executable for potentially *much* faster directory checksum processing" + } + if {$ftype eq "file"} { - set sizeinfo "(size [file size $target])" + set sizeinfo "(size [punk::lib::format_number [file size $target]] bytes)" } else { - set sizeinfo "(file type $ftype - size unknown)" + set sizeinfo "(file type $ftype - tarred size [punk::lib::format_number [file size $archivename]] bytes)" } - puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set tsstart [clock millis] + puts -nonewline stdout "cksum_path: calculating cksum for $target $sizeinfo ... " set cksum [{*}$cksum_command $archivename] - #puts stdout "cksum_path: cleaning up.. " + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " cksum done ($ms ms)" + puts stdout " cksum: $cksum" file delete -force $archivename cd $startdir diff --git a/src/modules/punk/mix/commandset/project-999999.0a1.0.tm b/src/modules/punk/mix/commandset/project-999999.0a1.0.tm index 24a2be2d..bf16c030 100644 --- a/src/modules/punk/mix/commandset/project-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/project-999999.0a1.0.tm @@ -157,6 +157,9 @@ namespace eval punk::mix::commandset::project { set opt_force [dict get $opts -force] set opt_confirm [string tolower [dict get $opts -confirm]] # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- set opt_modules [dict get $opts -modules] if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { #if not specified - add a single module matching project name @@ -169,9 +172,6 @@ namespace eval punk::mix::commandset::project { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_layout [dict get $opts -layout] - set opt_update [dict get $opts -update] - # -- --- --- --- --- --- --- --- --- --- --- --- --- #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache diff --git a/src/modules/punk/ns-999999.0a1.0.tm b/src/modules/punk/ns-999999.0a1.0.tm index 0d939073..51e2c541 100644 --- a/src/modules/punk/ns-999999.0a1.0.tm +++ b/src/modules/punk/ns-999999.0a1.0.tm @@ -1707,6 +1707,7 @@ tcl::namespace::eval punk::ns { lassign [internal::get_run_opts {-vars -nowarnings} {} $args] _r runopts _c cmdargs set use_vars [expr {"-vars" in $runopts}] set no_warnings [expr {"-nowarnings" in $runopts}] + set ver "" #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns @@ -1717,15 +1718,68 @@ tcl::namespace::eval punk::ns { } default { if {[string match ::* $pkg_or_existing_ns]} { + set pkg_unqualified [string range $pkg_or_existing_ns 2 end] if {![tcl::namespace::exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + set ver [package require $pkg_unqualified] } else { set ver "" } set ns $pkg_or_existing_ns } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns + set pkg_unqualified $pkg_or_existing_ns + set ver [package require $pkg_unqualified] + set ns ::$pkg_unqualified + } + #some packages don't create their namespace immediately and/or don't populate it with commands and instead put entries in ::auto_index + set previous_command_count 0 + if {[namespace exists $ns]} { + set previous_command_count [llength [info commands ${ns}::*]] + } + + + #also if a sub package was loaded first - then the namespace for the base or lower package may exist but have no commands + #for the purposes of pkguse - which most commonly interactive - we want the namespace populated + #It may still not be *fully* populated because we stop at first source that adds commands - REVIEW + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + + if {!$ns_populated} { + #we will catch-run an auto_index entry if any + #auto_index entry may or may not be prefixed with :: + set keys [list] + #first look for exact pkg_unqualified and ::pkg_unqualified + #leave these at beginning of keys list + if {[array exists ::auto_index($pkg_unqualified)]} { + lappend keys $pkg_unqualified + } + if {[array exists ::auto_index(::$pkg_unqualified)]} { + lappend keys ::$pkg_unqualified + } + #as auto_index is an array - we could get keys in arbitrary order + set matches [lsort [array names ::auto_index ${pkg_unqualified}::*]] + lappend keys {*}$matches + set matches [lsort [array names ::auto_index ::${pkg_unqualified}::*]] + lappend keys {*}$matches + set ns_populated 0 + set i 0 + set already_sourced [list] ;#often multiple triggers for the same source - don't waste time re-sourcing + set ns_depth [llength [punk::ns::nsparts [string trimleft $ns :]]] + while {!$ns_populated && $i < [llength $keys]} { + #todo - skip sourcing deeper entries from a subpkg which may have been loaded earlier than the base + #e.g if we are loading ::x::y + #only source for keys the same depth, or one deeper ie ::x::y, x::y, ::x::y::z not ::x or ::x::y::z::etc + set k [lindex $keys $i] + set k_depth [llength [punk::ns::nsparts [string trimleft $k :]]] + if {$k_depth == $ns_depth || $k_depth == $ns_depth + 1} { + set auto_source [set ::auto_index($k)] + if {$auto_source ni $already_sourced} { + uplevel 1 $auto_source + lappend already_sourced $auto_source + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + } + } + incr i + } + } } } @@ -1799,7 +1853,13 @@ tcl::namespace::eval punk::ns { return $out } } else { - error "Namespace $ns not found." + if {$ver eq ""} { + error "Namespace $ns not found. No package version found." + } else { + set out "(no package namespace found) remaining in [uplevel 1 {namespace current}]" + append out \n $ver + return $out + } } return $out } diff --git a/src/modules/punk/repl-0.1.tm b/src/modules/punk/repl-0.1.tm index 9debf045..f40ff65d 100644 --- a/src/modules/punk/repl-0.1.tm +++ b/src/modules/punk/repl-0.1.tm @@ -416,7 +416,11 @@ proc repl::start {inchan args} { variable codethread_cond - tsv::unset codethread_$codethread + if {[catch { + tsv::unset codethread_$codethread + } errM]} { + puts stderr " repl::start temp warning - $errM" + } thread::cancel $codethread thread::cond destroy $codethread_cond ;#race if we destroy cond before child thread has exited - as it can send a -async quit set codethread "" diff --git a/src/modules/punk/repo-999999.0a1.0.tm b/src/modules/punk/repo-999999.0a1.0.tm index d9f968ed..1dae586c 100644 --- a/src/modules/punk/repo-999999.0a1.0.tm +++ b/src/modules/punk/repo-999999.0a1.0.tm @@ -468,7 +468,7 @@ namespace eval punk::repo { set path [string trim [string range $ln [string length "MISSING "] end]] dict set pathdict $path "missing" } - "EXTRA * " { + "EXTRA *" { #fossil will explicitly list files in a new folder - as opposed to git which shows just the folder set path [string trim [string range $ln [string length "EXTRA "] end]] dict set pathdict $path "extra" diff --git a/src/modules/punk/winlnk-999999.0a1.0.tm b/src/modules/punk/winlnk-999999.0a1.0.tm new file mode 100644 index 00000000..a9fa21e3 --- /dev/null +++ b/src/modules/punk/winlnk-999999.0a1.0.tm @@ -0,0 +1,561 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 999999.0a1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::winlnk 0 999999.0a1.0] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [MS_SHLLINK].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::winlnk::class { + #*** !doctools + #[subsection {Namespace punk::winlnk::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + proc Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Get_HeaderSize $contents] eq $magic_HeaderSize && [Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + proc Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + variable LinkFlags + set LinkFlags [dict create\ + hasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Has_LinkFlag error - flagname $flagname not known" + } + } + + + + #https://github.com/libyal/liblnk/blob/main/documentation/Windows%20Shortcut%20File%20(LNK)%20format.asciidoc + + #offset 24 4 bytes + #File attribute flags + + #offset 28 8 bytes + #creation date and time + + #offset 36 8 bytes + #last access date and time + + #offset 44 8 bytes + #last modification date and time + + #offset 52 4 bytes - unsigned int + #file size in bytes (of target) + proc Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset 56 4 bytes signed integer + #icon index value + + #offset 60 4 bytes - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset 64 Bytes 2 + #Hot key + + #offset 66 2 bytes - reserved + + #offset 68 4 bytes - reserved + + #offset 72 4 bytes - reserved + + #next 76 + + proc Get_LinkTargetIDList_size {contents} { + if {[Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Has_LinkFlag $contents B]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $4bytes i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] ;# a + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] ;# a + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix] + } + + proc contents_get_info {contents} { + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-999999.0a1.0/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-999999.0a1.0\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-999999.0a1.0\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set link_file "" + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + set link_file [file join $localbase_path $suffix_path] + } + + set result [dict create\ + link_file $link_file\ + link_flags $flags_enabled\ + file_attributes ""\ + create_time ""\ + last_accessed_time ""\ + target_length [Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey ""\ + relative_path "?"\ + ] + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + proc file_get_info {path} { + #*** !doctools + #[call [fun file_get_info] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [contents_get_info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + proc file_show_info {path} { + package require punk::lib + punk::lib::showdict [file_get_info $path] * + } + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 999999.0a1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/winlnk-buildversion.txt b/src/modules/punk/winlnk-buildversion.txt new file mode 100644 index 00000000..f47d01c8 --- /dev/null +++ b/src/modules/punk/winlnk-buildversion.txt @@ -0,0 +1,3 @@ +0.1.0 +#First line must be a semantic version number +#all other lines are ignored. diff --git a/src/modules/shellfilter-0.1.9.tm b/src/modules/shellfilter-0.1.9.tm index 4f887fd5..b8f4dec0 100644 --- a/src/modules/shellfilter-0.1.9.tm +++ b/src/modules/shellfilter-0.1.9.tm @@ -1658,6 +1658,14 @@ namespace eval shellfilter { return [list $idout $iderr] } + #eg try: set v [list #a b c] + #vs set v {#a b c} + proc list_is_canonical l { + #courtesy DKF via wiki https://wiki.tcl-lang.org/page/BNF+for+Tcl + if {[catch {llength $l}]} {return 0} + string equal $l [list {*}$l] + } + #return a dict keyed on numerical list index showing info about each element # - particularly # 'wouldbrace' to indicate that the item would get braced by Tcl when added to another list diff --git a/src/project_layouts/custom/_project/punk.basic/src/make.tcl b/src/project_layouts/custom/_project/punk.basic/src/make.tcl index 9edd90b0..24206ba7 100644 --- a/src/project_layouts/custom/_project/punk.basic/src/make.tcl +++ b/src/project_layouts/custom/_project/punk.basic/src/make.tcl @@ -13,7 +13,7 @@ namespace eval ::punkmake { variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] variable non_help_flags [list -k] variable help_flags [list -help --help /?] - variable known_commands [list project get-project-info shell vendorupdate bootsupport vfscommonupdate] + variable known_commands [list project modules info check shell vendorupdate bootsupport vfscommonupdate] } if {"::try" ni [info commands ::try]} { puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" @@ -21,7 +21,7 @@ if {"::try" ni [info commands ::try]} { } #------------------------------------------------------------------------------ -#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#Module loading from src/bootsupport or [pwd]/modules if pwd is a 'src' folder #------------------------------------------------------------------------------ #If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files # - then it will attempt to preference these modules @@ -35,8 +35,10 @@ if {[file exists [file join $startdir src bootsupport]]} { set bootsupport_mod [file join $startdir bootsupport modules] set bootsupport_lib [file join $startdir bootsupport lib] } -if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + +set package_paths_modified 0 +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list set original_auto_path $::auto_path @@ -63,8 +65,18 @@ if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { } - if {[file exists [pwd]/modules]} { - tcl::tm::add [pwd]/modules + #we deliberately don't use [pwd]/modules because commonly the launch dir may be the project dir. + #The /modules are the very modules we are building - and may be in a broken state, which make.tcl then can't fix. + if {[file tail $startdir] eq "src"} { + if {[file exists $startdir/modules]} { + #launch from /modules /lib etc." \n \n + append h " $scriptname modules" \n + append h " - build modules from src/modules etc without scanning src/runtime and src/vfs folders to build kit/zipkit executables" \n \n append h " $scriptname bootsupport" \n - append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n + append h " - bootsupport modules are available to make.tcl" \n \n append h " $scriptname vendorupdate" \n - append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n append h " $scriptname vfscommonupdate" \n - append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib" \n \n - append h " $scriptname get-project-info" \n - append h " - show the name and base folder of the project to be built" \n + append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib etc" \n + append h " - before calling this (followed by make project) - you can test using '(.exe) dev'" \n + append h " this will load modules from your /module /lib paths instead of from the kit/zipkit" \n \n + append h " $scriptname info" \n + append h " - show the name and base folder of the project to be built" \n append h "" \n if {[llength $::punkmake::pkg_missing]} { append h "* ** NOTE ** ***" \n @@ -220,12 +232,68 @@ if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} } set sourcefolder $projectroot/src +if {$::punkmake::command eq "check"} { + puts stdout "- tcl::tm::list" + foreach fld [tcl::tm::list] { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + puts stdout "- auto_path" + foreach fld $::auto_path { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + set v [package require punk::mix::base] + puts stdout "punk::mix::base version $v\n[package ifneeded punk::mix::base $v]" + exit 0 +} + +if {$package_paths_modified} { + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] +} + + -if {$::punkmake::command eq "get-project-info"} { - puts stdout "- -- --- --- --- --- --- --- --- --- ---" - puts stdout "- -- get-project-info -- -" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" +if {$::punkmake::command eq "info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" + puts stdout "- -- info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" puts stdout "- projectroot : $projectroot" + set sourcefolder $projectroot/src + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + puts stdout "- vendorlib folders: ([llength $vendorlibfolders])" + foreach fld $vendorlibfolders { + puts stdout " src/$fld" + } + puts stdout "- vendormodule folders: ([llength $vendormodulefolders])" + foreach fld $vendormodulefolders { + puts stdout " src/$fld" + } + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] + puts stdout "- source module paths: [llength $source_module_folderlist]" + foreach fld $source_module_folderlist { + puts stdout " $fld" + } + set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] + lappend projectlibfolders lib + puts stdout "- source libary paths: [llength $projectlibfolders]" + foreach fld $projectlibfolders { + puts stdout " src/$fld" + } if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { set vc "fossil" set rev [punk::repo::fossil_revision $scriptfolder] @@ -241,8 +309,11 @@ if {$::punkmake::command eq "get-project-info"} { } puts stdout "- version control : $vc" puts stdout "- revision : $rev" - puts stdout "- remote : $rem" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- remote" + foreach ln [split $rem \n] { + puts stdout " $ln" + } + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" exit 0 } @@ -564,7 +635,7 @@ if {$::punkmake::command eq "bootsupport"} { -if {$::punkmake::command ne "project"} { +if {$::punkmake::command ni {project modules}} { puts stderr "Command $::punkmake::command not implemented - aborting." flush stderr after 100 @@ -803,6 +874,19 @@ if {[punk::repo::is_fossil_root $projectroot]} { $installer destroy } +if {$::punkmake::command ne "project"} { + #command = modules + puts stdout "vfs folders not checked" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into base vfs folder" + puts stdout " - use 'make.tcl project' to build executable kits/zipkits from vfs folders as well if you have runtimes installed" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" + puts stdout "-done-" + exit 0 +} + + set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] if {$buildfolder ne "$sourcefolder/_build"} { puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" @@ -832,10 +916,12 @@ if {![llength $runtimes]} { exit 0 } +set has_sdx 1 if {[catch {exec sdx help} errM]} { puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" puts stderr "err: $errM" - exit 1 + #exit 1 + set has_sdx 0 } # -- --- --- --- --- --- --- --- --- --- @@ -1025,6 +1111,8 @@ foreach runtimefile $runtimes { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set failed_kits [list] set installed_kits [list] +set skipped_kits [list] +set skipped_kit_installs [list] proc ::make_file_traversal_error {args} { error "file_traverse error: $args" @@ -1304,30 +1392,39 @@ foreach vfstail $vfs_tails { } } kit { - if {[catch { - if {$rtname ne "-"} { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose - } else { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose - } - } result]} { - if {$rtname ne "-"} { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" - } else { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" - } - puts stderr "sdx wrap $targetkit failed" - lappend failed_kits [list kit $targetkit reason $sdxmsg] + if {!$has_sdx} { + puts stderr "no sdx available to wrap $targetkit" + lappend failed_kits [list kit $targetkit reason "sdx_executable_unavailable"] $vfs_event targetset_end FAILED $vfs_event destroy $vfs_installer destroy continue - } else { - puts stdout "ok - finished sdx" - set separator [string repeat = 40] - puts stdout $separator - puts stdout $result - puts stdout $separator + } else { + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose + } + } result]} { + if {$rtname ne "-"} { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" + } else { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" + } + puts stderr "sdx wrap $targetkit failed" + lappend failed_kits [list kit $targetkit reason $sdxmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } } } } @@ -1435,6 +1532,7 @@ foreach vfstail $vfs_tails { set skipped_vfs_build 1 puts stderr "." puts stdout "Skipping build for vfs $vfstail with runtime $rtname - no change detected" + lappend skipped_kits [list kit $targetkit reason "no change detected"] $vfs_event targetset_end SKIPPED } $vfs_event destroy @@ -1489,6 +1587,7 @@ foreach vfstail $vfs_tails { set skipped_kit_install 1 puts stderr "." puts stdout "Skipping kit install for $targetkit with vfs $vfstail runtime $rtname - no change detected" + lappend skipped_kit_installs [list kit $targetkit reason "no change detected"] $bin_event targetset_end SKIPPED } $bin_event destroy @@ -1510,8 +1609,21 @@ if {[llength $failed_kits]} { punk::lib::showdict -channel stderr -roottype list $failed_kits */@*.@* #puts stderr [join $failed_kits \n] } - -puts stdout "done" +set had_kits [expr {[llength $installed_kits] || [llength $failed_kits] || [llength $skipped_kits]}] +if {$had_kits} { + puts stdout " module builds and kit/zipkit builds processed (vfs config: src/runtime/mapvfs.config)" + puts stdout " - use 'make.tcl modules' to build modules without scanning/building the vfs folders into executable kits/zipkits" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into the base vfs folder" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" +} else { + puts stdout " module builds processed" + puts stdout "" + puts stdout " If kit/zipkit based executables required - create src/vfs/.vfs folders containing lib,modules,modules_tcl9 etc folders" + puts stdout " Also ensure appropriate executables exist in src/runtime along with src/runtime/mapvfs.config" +} +puts stdout "-done-" exit 0 diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config index fc436d8c..3993e0c9 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config @@ -1,67 +1,67 @@ - -#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project -#They must be already built, so generally shouldn't come directly from src/modules. - -#each entry - base module -set bootsupport_modules [list\ - src/vendormodules cksum\ - src/vendormodules modpod\ - src/vendormodules overtype\ - src/vendormodules oolib\ - src/vendormodules http\ - src/vendormodules dictutils\ - src/vendormodules fileutil\ - src/vendormodules textutil::adjust\ - src/vendormodules textutil::repeat\ - src/vendormodules textutil::split\ - src/vendormodules textutil::string\ - src/vendormodules textutil::tabify\ - src/vendormodules textutil::trim\ - src/vendormodules textutil::wcswidth\ - src/vendormodules uuid\ - src/vendormodules md5\ - src/vendormodules sha1\ - src/vendormodules tomlish\ - src/vendormodules test::tomlish\ - modules punkcheck\ - modules natsort\ - modules punk::ansi\ - modules punk::assertion\ - modules punk::args\ - modules punk::cap\ - modules punk::cap::handlers::caphandler\ - modules punk::cap::handlers::scriptlibs\ - modules punk::cap::handlers::templates\ - modules punk::char\ - modules punk::console\ - modules punk::du\ - modules punk::encmime\ - modules punk::fileline\ - modules punk::docgen\ - modules punk::lib\ - modules punk::mix\ - modules punk::mix::base\ - modules punk::mix::cli\ - modules punk::mix::util\ - modules punk::mix::templates\ - modules punk::mix::commandset::buildsuite\ - modules punk::mix::commandset::debug\ - modules punk::mix::commandset::doc\ - modules punk::mix::commandset::layout\ - modules punk::mix::commandset::loadedlib\ - modules punk::mix::commandset::module\ - modules punk::mix::commandset::project\ - modules punk::mix::commandset::repo\ - modules punk::mix::commandset::scriptwrap\ - modules punk::ns\ - modules punk::overlay\ - modules punk::path\ - modules punk::repo\ - modules punk::tdl\ - modules punk::zip\ - modules punk::winpath\ - modules textblock\ - modules natsort\ - modules oolib\ -] - + +#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project +#They must be already built, so generally shouldn't come directly from src/modules. + +#each entry - base module +set bootsupport_modules [list\ + src/vendormodules cksum\ + src/vendormodules modpod\ + src/vendormodules overtype\ + src/vendormodules oolib\ + src/vendormodules http\ + src/vendormodules dictutils\ + src/vendormodules fileutil\ + src/vendormodules textutil::adjust\ + src/vendormodules textutil::repeat\ + src/vendormodules textutil::split\ + src/vendormodules textutil::string\ + src/vendormodules textutil::tabify\ + src/vendormodules textutil::trim\ + src/vendormodules textutil::wcswidth\ + src/vendormodules uuid\ + src/vendormodules md5\ + src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ + modules punkcheck\ + modules natsort\ + modules punk::ansi\ + modules punk::assertion\ + modules punk::args\ + modules punk::cap\ + modules punk::cap::handlers::caphandler\ + modules punk::cap::handlers::scriptlibs\ + modules punk::cap::handlers::templates\ + modules punk::char\ + modules punk::console\ + modules punk::du\ + modules punk::encmime\ + modules punk::fileline\ + modules punk::docgen\ + modules punk::lib\ + modules punk::mix\ + modules punk::mix::base\ + modules punk::mix::cli\ + modules punk::mix::util\ + modules punk::mix::templates\ + modules punk::mix::commandset::buildsuite\ + modules punk::mix::commandset::debug\ + modules punk::mix::commandset::doc\ + modules punk::mix::commandset::layout\ + modules punk::mix::commandset::loadedlib\ + modules punk::mix::commandset::module\ + modules punk::mix::commandset::project\ + modules punk::mix::commandset::repo\ + modules punk::mix::commandset::scriptwrap\ + modules punk::ns\ + modules punk::overlay\ + modules punk::path\ + modules punk::repo\ + modules punk::tdl\ + modules punk::zip\ + modules punk::winpath\ + modules textblock\ + modules natsort\ + modules oolib\ +] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm deleted file mode 100644 index ecf2cca9..00000000 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm +++ /dev/null @@ -1,200 +0,0 @@ -#JMN - api should be kept in sync with package patternlib where possible -# -package provide oolib [namespace eval oolib { - variable version - set version 0.1.1 -}] - -namespace eval oolib { - oo::class create collection { - variable o_data ;#dict - variable o_alias - constructor {} { - set o_data [dict create] - } - method info {} { - return [dict info $o_data] - } - method count {} { - return [dict size $o_data] - } - method isEmpty {} { - expr {[dict size $o_data] == 0} - } - method names {{globOrIdx {}}} { - if {[llength $globOrIdx]} { - if {[string is integer -strict $globOrIdx]} { - set idx $globOrIdx - if {$idx < 0} { - set idx "end-[expr {abs($idx + 1)}]" - } - if {[catch {lindex [dict keys $o_data] $idx} result]} { - error "[self object] no such index : '$idx'" - } else { - return $result - } - } else { - #glob - return [lsearch -glob -all -inline [dict keys $o_data] $globOrIdx] - } - } else { - return [dict keys $o_data] - } - } - #like names but without globbing - method keys {} { - dict keys $o_data - } - method key {{posn 0}} { - if {$posn < 0} { - set posn "end-[expr {abs($posn + 1)}]" - } - if {[catch {lindex [dict keys $o_data] $posn} result]} { - error "[self object] no such index : '$posn'" - } else { - return $result - } - } - method hasKey {key} { - dict exists $o_data $key - } - method get {} { - return $o_data - } - method items {} { - return [dict values $o_data] - } - method item {key} { - if {[string is integer -strict $key]} { - if {$key >= 0} { - set valposn [expr {(2*$key) +1}] - return [lindex $o_data $valposn] - } else { - set key "end-[expr {abs($key + 1)}]" - return [lindex $o_data $key] - #return [lindex [dict keys $o_data] $key] - } - } - if {[dict exists $o_data $key]} { - return [dict get $o_data $key] - } - } - #inverse lookup - method itemKeys {value} { - set value_indices [lsearch -all [dict values $o_data] $value] - set keylist [list] - foreach i $value_indices { - set idx [expr {(($i + 1) *2) -2}] - lappend keylist [lindex $o_data $idx] - } - return $keylist - } - method search {value args} { - set matches [lsearch {*}$args [dict values $o_data] $value] - if {"-inline" in $args} { - return $matches - } else { - set keylist [list] - foreach i $matches { - set idx [expr {(($i + 1) *2) -2}] - lappend keylist [lindex $o_data $idx] - } - return $keylist - } - } - #review - see patternlib. Is the intention for aliases to be configurable independent of whether the target exists? - method alias {newAlias existingKeyOrAlias} { - if {[string is integer -strict $newAlias]} { - error "[self object] collection key alias cannot be integer" - } - if {[string length $existingKeyOrAlias]} { - set o_alias($newAlias) $existingKeyOrAlias - } else { - unset o_alias($newAlias) - } - } - method aliases {{key ""}} { - if {[string length $key]} { - set result [list] - foreach {n v} [array get o_alias] { - if {$v eq $key} { - lappend result $n $v - } - } - return $result - } else { - return [array get o_alias] - } - } - #if the supplied index is an alias, return the underlying key; else return the index supplied. - method realKey {idx} { - if {[catch {set o_alias($idx)} key]} { - return $idx - } else { - return $key - } - } - method add {value key} { - if {[string is integer -strict $key]} { - error "[self object] collection key must not be an integer. Use another structure if integer keys required" - } - if {[dict exists $o_data $key]} { - error "[self object] col_processors object error: key '$key' already exists in collection" - } - dict set o_data $key $value - return [expr {[dict size $o_data] - 1}] ;#return index of item - } - method remove {idx {endRange ""}} { - if {[string length $endRange]} { - error "[self object] collection error: ranged removal not yet implemented.. remove one item at a time" - } - if {[string is integer -strict $idx]} { - if {$idx < 0} { - set idx "end-[expr {abs($idx+1)}]" - } - set key [lindex [dict keys $o_data] $idx] - set posn $idx - } else { - set key $idx - set posn [lsearch -exact [dict keys $o_data] $key] - if {$posn < 0} { - error "[self object] no such index: '$idx' in this collection" - } - } - dict unset o_data $key - return - } - method clear {} { - set o_data [dict create] - return - } - method reverse_the_collection {} { - #named slightly obtusely because reversing the data when there may be references held is a potential source of bugs - #the name reverse_the_collection should make it clear that the object is being modified in place as opposed to simply 'reverse' which may imply a view/copy. - #todo - consider implementing a get_reverse which provides an interface to the same collection without affecting original references, yet both allowing delete/edit operations. - set dictnew [dict create] - foreach k [lreverse [dict keys $o_data]] { - dict set dictnew $k [dict get $o_data $k] - } - set o_data $dictnew - return - } - #review - cmd as list vs cmd as script? - method map {cmd} { - set seed [list] - dict for {k v} $o_data { - lappend seed [uplevel #0 [list {*}$cmd $v]] - } - return $seed - } - method objectmap {cmd} { - set seed [list] - dict for {k v} $o_data { - lappend seed [uplevel #0 [list $v {*}$cmd]] - } - return $seed - } - } - -} - diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm deleted file mode 100644 index 42876322..00000000 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm +++ /dev/null @@ -1,3685 +0,0 @@ -# -*- tcl -*- -# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt -# -# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. -# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -# (C) Julian Noble 2003-2023 -# -# @@ Meta Begin -# Application overtype 1.6.4 -# Meta platform tcl -# Meta license BSD -# @@ Meta End - - -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -# doctools header -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -#*** !doctools -#[manpage_begin overtype_module_overtype 0 1.6.4] -#[copyright "2024"] -#[titledesc {overtype text layout - ansi aware}] [comment {-- Name section and table of contents description --}] -#[moddesc {overtype text layout}] [comment {-- Description at end of page heading --}] -#[require overtype] -#[keywords module text ansi] -#[description] -#[para] - - -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ - -#*** !doctools -#[section Overview] -#[para] overview of overtype -#[subsection Concepts] -#[para] - - - -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Requirements -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ - -#*** !doctools -#[subsection dependencies] -#[para] packages used by overtype -#[list_begin itemized] - -package require Tcl 8.6- -package require textutil -package require punk::lib ;#required for lines_as_list -package require punk::ansi ;#required to detect, split, strip and calculate lengths -package require punk::char ;#box drawing - and also unicode character width determination for proper layout of text with double-column-width chars -package require punk::assertion -#*** !doctools -#[item] [package {Tcl 8.6}] -#[item] [package textutil] -#[item] [package punk::ansi] -#[para] - required to detect, split, strip and calculate lengths of text possibly containing ansi codes -#[item] [package punk::char] -#[para] - box drawing - and also unicode character width determination for proper layout of text with double-column-width chars - -# #package require frobz -# #*** !doctools -# #[item] [package {frobz}] - -#*** !doctools -#[list_end] - -#PERFORMANCE notes -#overtype is very performance sensitive - used in ansi output all over the place ie needs to be optimised -#NOTE use of tcl::dict::for tcl::string::range etc instead of ensemble versions. This is for the many tcl 8.6/8.7 interps which don't compile ensemble commands when in safe interps -#similar for tcl::namespace::eval - but this is at least on some versions of Tcl - faster even in a normal interp. Review to see if that holds for Tcl 9. -#for string map: when there are exactly 2 elements - it is faster to use a literal which has a special case optimisation in the c code -#ie use tcl::string::map {\n ""} ... instead of tcl::string::map [list \n ""] ... -#note that we can use unicode (e.g \uFF31) and other escapes such as \t within these curly braces - we don't have to use double quotes -#generally using 'list' is preferred for the map as less error prone. -#can also use: tcl::string::map "token $var" .. but be careful regarding quoting and whitespace in var. This should be used sparingly if at all. - - -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -#*** !doctools -#[section API] - - -#Julian Noble - 2003 -#Released under standard 'BSD license' conditions. -# -#todo - ellipsis truncation indicator for center,right - -#v1.4 2023-07 - naive ansi color handling - todo - fix tcl::string::range -# - need to extract and replace ansi codes? - -tcl::namespace::eval overtype { - namespace import ::punk::assertion::assert - punk::assertion::active true - - namespace path ::punk::lib - - namespace export * - variable default_ellipsis_horizontal "..." ;#fallback - variable default_ellipsis_vertical "..." - tcl::namespace::eval priv { - proc _init {} { - upvar ::overtype::default_ellipsis_horizontal e_h - upvar ::overtype::default_ellipsis_vertical e_v - set e_h [format %c 0x2026] ;#Unicode Horizontal Ellipsis - set e_v [format %c 0x22EE] - #The unicode ellipsis looks more natural than triple-dash which is centred vertically whereas ellipsis is at floorline of text - #Also - unicode ellipsis has semantic meaning that other processors can interpret - #unicode does also provide a midline horizontal ellipsis 0x22EF - - #set e [format %c 0x2504] ;#punk::char::charshort boxd_ltdshhz - Box Drawings Light Triple Dash Horizontal - #if {![catch {package require punk::char}]} { - # set e [punk::char::charshort boxd_ltdshhz] - #} - } - } - priv::_init -} -proc overtype::about {} { - return "Simple text formatting. Author JMN. BSD-License" -} - -tcl::namespace::eval overtype { - variable grapheme_widths [tcl::dict::create] - - variable escape_terminals - #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). - tcl::dict::set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] - #tcl::dict::set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic - tcl::dict::set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals - - #self-contained 2 byte ansi escape sequences - review more? - variable ansi_2byte_codes_dict - set ansi_2byte_codes_dict [tcl::dict::create\ - "reset_terminal" "\u001bc"\ - "save_cursor_posn" "\u001b7"\ - "restore_cursor_posn" "\u001b8"\ - "cursor_up_one" "\u001bM"\ - "NEL - Next Line" "\u001bE"\ - "IND - Down one line" "\u001bD"\ - "HTS - Set Tab Stop" "\u001bH"\ - ] - - #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. - # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ -} - - -#proc overtype::stripansi {text} { -# variable escape_terminals ;#dict -# variable ansi_2byte_codes_dict -# #important that we don't spend too much time on this for plain text that doesn't contain any escapes anyway -# if {[string first \033 $text] <0 && [string first \009c $text] <0} { -# #\033 same as \x1b -# return $text -# } -# -# set text [convert_g0 $text] -# -# #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. -# #line endings can theoretically occur within an ansi escape sequence (review e.g title?) -# set inputlist [split $text ""] -# set outputlist [list] -# -# set 2bytecodes [dict values $ansi_2byte_codes_dict] -# -# set in_escapesequence 0 -# #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls -# set i 0 -# foreach u $inputlist { -# set v [lindex $inputlist $i+1] -# set uv ${u}${v} -# if {$in_escapesequence eq "2b"} { -# #2nd byte - done. -# set in_escapesequence 0 -# } elseif {$in_escapesequence != 0} { -# set escseq [tcl::dict::get $escape_terminals $in_escapesequence] -# if {$u in $escseq} { -# set in_escapesequence 0 -# } elseif {$uv in $escseq} { -# set in_escapseequence 2b ;#flag next byte as last in sequence -# } -# } else { -# #handle both 7-bit and 8-bit CSI and OSC -# if {[regexp {^(?:\033\[|\u009b)} $uv]} { -# set in_escapesequence CSI -# } elseif {[regexp {^(?:\033\]|\u009c)} $uv]} { -# set in_escapesequence OSC -# } elseif {$uv in $2bytecodes} { -# #self-contained e.g terminal reset - don't pass through. -# set in_escapesequence 2b -# } else { -# lappend outputlist $u -# } -# } -# incr i -# } -# return [join $outputlist ""] -#} - - - - - -proc overtype::string_columns {text} { - if {[punk::ansi::ta::detect $text]} { - #error "error string_columns is for calculating character length of string - ansi codes must be stripped/rendered first e.g with punk::ansi::stripansi. Alternatively try punk::ansi::printing_length" - set text [punk::ansi::stripansi $text] - } - return [punk::char::ansifreestring_width $text] -} - -#todo - consider a way to merge overtype::left/centre/right -#These have similar algorithms/requirements - and should be refactored to be argument-wrappers over a function called something like overtype::renderblock -#overtype::renderblock could render the input to a defined (possibly overflowing in x or y) rectangle overlapping the underlay. -#(i.e not even necessariy having it's top left within the underlay) -tcl::namespace::eval overtype::priv { -} - -#could return larger than colwidth -proc _get_row_append_column {row} { - upvar outputlines outputlines - set idx [expr {$row -1}] - if {$row <= 1 || $row > [llength $outputlines]} { - return 1 - } else { - upvar opt_overflow opt_overflow - upvar colwidth colwidth - set existinglen [punk::ansi::printing_length [lindex $outputlines $idx]] - set endpos [expr {$existinglen +1}] - if {$opt_overflow} { - return $endpos - } else { - if {$endpos > $colwidth} { - return $colwidth + 1 - } else { - return $endpos - } - } - } -} - -tcl::namespace::eval overtype { - #*** !doctools - #[subsection {Namespace overtype}] - #[para] Core API functions for overtype - #[list_begin definitions] - - - - #tcl::string::range should generally be avoided for both undertext and overtext which contain ansi escapes and other cursor affecting chars such as \b and \r - #render onto an already-rendered (ansi already processed) 'underlay' string, a possibly ansi-laden 'overlay' string. - #The underlay and overlay can be multiline blocks of text of varying line lengths. - #The overlay may just be an ansi-colourised block - or may contain ansi cursor movements and cursor save/restore calls - in which case the apparent length and width of the overlay can't be determined as if it was a block of text. - #This is a single-shot rendering of strings - ie there is no way to chain another call containing a cursor-restore to previously rendered output and have it know about any cursor-saves in the first call. - # a cursor start position other than top-left is a possible addition to consider. - #see editbuf in punk::repl for a more stateful ansi-processor. Both systems use loops over overtype::renderline - proc renderspace {args} { - #*** !doctools - #[call [fun overtype::renderspace] [arg args] ] - #[para] usage: ?-transparent [lb]0|1[rb]? ?-overflow [lb]1|0[rb]? ?-ellipsis [lb]1|0[rb]? ?-ellipsistext ...? undertext overtext - - # @c overtype starting at left (overstrike) - # @c can/should we use something like this?: 'format "%-*s" $len $overtext - variable default_ellipsis_horizontal - - if {[llength $args] < 2} { - error {usage: ?-transparent [0|1]? ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} - } - lassign [lrange $args end-1 end] underblock overblock - set opts [tcl::dict::create\ - -bias ignored\ - -width \uFFEF\ - -height \uFFEF\ - -startcolumn 1\ - -wrap 0\ - -ellipsis 0\ - -ellipsistext $default_ellipsis_horizontal\ - -ellipsiswhitespace 0\ - -overflow 0\ - -appendlines 1\ - -transparent 0\ - -exposed1 \uFFFD\ - -exposed2 \uFFFD\ - -experimental 0\ - -looplimit \uFFEF\ - ] - #-ellipsis args not used if -wrap is true - set argsflags [lrange $args 0 end-2] - foreach {k v} $argsflags { - switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -appendlines - -transparent - -exposed1 - -exposed2 - -experimental { - tcl::dict::set opts $k $v - } - default { - error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" - } - } - } - #set opts [tcl::dict::merge $defaults $argsflags] - # -- --- --- --- --- --- - set opt_overflow [tcl::dict::get $opts -overflow] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### - #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. - set opt_width [tcl::dict::get $opts -width] - set opt_height [tcl::dict::get $opts -height] - set opt_startcolumn [tcl::dict::get $opts -startcolumn] - set opt_appendlines [tcl::dict::get $opts -appendlines] - set opt_transparent [tcl::dict::get $opts -transparent] - set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] - set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] - set opt_exposed1 [tcl::dict::get $opts -exposed1] ;#widechar_exposed_left - todo - set opt_exposed2 [tcl::dict::get $opts -exposed2] ;#widechar_exposed_right - todo - # -- --- --- --- --- --- - - # ---------------------------- - # -experimental dev flag to set flags etc - # ---------------------------- - set data_mode 0 - set test_mode 1 - set info_mode 0 - set edit_mode 0 - set opt_experimental [tcl::dict::get $opts -experimental] - foreach o $opt_experimental { - switch -- $o { - test_mode { - set test_mode 1 - set info_mode 1 - } - old_mode { - set test_mode 0 - set info_mode 1 - } - data_mode { - set data_mode 1 - } - info_mode { - set info_mode 1 - } - edit_mode { - set edit_mode 1 - } - } - } - set test_mode 1 ;#try to eliminate - # ---------------------------- - - #modes - set insert_mode 0 ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode 0 - - - set underblock [tcl::string::map {\r\n \n} $underblock] - set overblock [tcl::string::map {\r\n \n} $overblock] - - - #set underlines [split $underblock \n] - - #underblock is a 'rendered' block - so width height make sense - #colwidth & colheight were originally named with reference to rendering into a 'column' of output e.g a table column - before cursor row/col was implemented. - #The naming is now confusing. It should be something like renderwidth renderheight ?? review - - if {$opt_width eq "\uFFEF" || $opt_height eq "\uFFEF"} { - lassign [blocksize $underblock] _w colwidth _h colheight - if {$opt_width ne "\uFFEF"} { - set colwidth $opt_width - } - if {$opt_height ne "\uFFEF"} { - set colheight $opt_height - } - } else { - set colwidth $opt_width - set colheight $opt_height - } - - # -- --- --- --- - #REVIEW - do we need ansi resets in the underblock? - if {$underblock eq ""} { - set underlines [lrepeat $colheight ""] - } else { - set underlines [split $underblock \n] - } - #if {$underblock eq ""} { - # set blank "\x1b\[0m\x1b\[0m" - # #set underlines [list "\x1b\[0m\x1b\[0m"] - # set underlines [lrepeat $colheight $blank] - #} else { - # #lines_as_list -ansiresets 1 will do nothing if -ansiresets 1 isn't specified - REVIEW - # set underlines [lines_as_list -ansiresets 1 $underblock] - #} - # -- --- --- --- - - #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. - #In practice an overlay ANSI stream can be a single line with ansi moves/restores etc - or even have no moves or newlines, just relying on wrapping at the output colwidth - #In such cases - we process the whole shebazzle for the first output line - only reducing by the applied amount at the head each time, reprocessing the long tail each time. - #(in cases where there are interline moves or cursor jumps anyway) - #This works - but doesn't seem efficient. - #On the other hand.. maybe it depends on the data. For simpler files it's more efficient than splitting first - - #a hack until we work out how to avoid infinite loops... - # - set looplimit [tcl::dict::get $opts -looplimit] - if {$looplimit eq "\uFFEF"} { - #looping for each char is worst case (all newlines?) - anything over that is an indication of something broken? - #do we need any margin above the length? (telnet mapscii.me test) - set looplimit [expr {[tcl::string::length $overblock] + 10}] - } - - if {!$test_mode} { - set inputchunks [split $overblock \n] - } else { - set scheme 3 - switch -- $scheme { - 0 { - #one big chunk - set inputchunks [list $overblock] - } - 1 { - set inputchunks [punk::ansi::ta::split_codes $overblock] - } - 2 { - - #split into lines if possible first - then into plaintext/ansi-sequence chunks ? - set inputchunks [list ""] ;#put an empty plaintext split in for starters - set i 1 - set lines [split $overblock \n] - foreach ln $lines { - if {$i < [llength $lines]} { - append ln \n - } - set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? - set lastpt [lindex $inputchunks end] - lset inputchunks end [tcl::string::cat $lastpt [lindex $sequence_split 0]] - lappend inputchunks {*}[lrange $sequence_split 1 end] - incr i - } - } - 3 { - #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice - set lflines [list] - set inputchunks [split $overblock \n] - foreach ln $inputchunks { - append ln \n - lappend lflines $ln - } - if {[llength $lflines]} { - lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] - } - set inputchunks $lflines[unset lflines] - - } - } - } - - - #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height - #lassign [blocksize $overblock] _w overblock_width _h overblock_height - - - set replay_codes_underlay [tcl::dict::create 1 ""] - #lappend replay_codes_overlay "" - set replay_codes_overlay "" - set unapplied "" - set cursor_saved_position [tcl::dict::create] - set cursor_saved_attributes "" - - - set outputlines $underlines - set overidx 0 - - #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext - set row 1 - if {$data_mode} { - set col [_get_row_append_column $row] - } else { - set col $opt_startcolumn - } - - set instruction_stats [tcl::dict::create] - - set loop 0 - #while {$overidx < [llength $inputchunks]} { } - - while {[llength $inputchunks]} { - #set overtext [lindex $inputchunks $overidx]; lset inputchunks $overidx "" - set overtext [lpop inputchunks 0] - if {![tcl::string::length $overtext]} { - incr loop - continue - } - #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" - set undertext [lindex $outputlines [expr {$row -1}]] - set renderedrow $row - - #renderline pads each underaly line to width with spaces and should track where end of data is - - - #set overtext [tcl::string::cat [lindex $replay_codes_overlay $overidx] $overtext] - set overtext [tcl::string::cat $replay_codes_overlay $overtext] - if {[tcl::dict::exists $replay_codes_underlay $row]} { - set undertext [tcl::string::cat [tcl::dict::get $replay_codes_underlay $row] $undertext] - } - #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set LASTCALL [list -info 1 -insert_mode $insert_mode -autowrap_mode $autowrap_mode -transparent $opt_transparent -width $colwidth -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -cursor_column $col -cursor_row $row $undertext $overtext] - set rinfo [renderline -experimental $opt_experimental\ - -info 1\ - -insert_mode $insert_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -autowrap_mode $autowrap_mode\ - -transparent $opt_transparent\ - -width $colwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -overflow $opt_overflow\ - -cursor_column $col\ - -cursor_row $row\ - $undertext\ - $overtext\ - ] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - #set reverse_mode [tcl::dict::get $rinfo reverse_mode];#how to support in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] - #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] - set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - - - - #-- todo - detect looping properly - if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { - puts stderr "overtype::renderspace loop?" - puts [ansistring VIEW $rinfo] - break - } - #-- - - if {[tcl::dict::size $c_saved_pos] >= 1} { - set cursor_saved_position $c_saved_pos - set cursor_saved_attributes $c_saved_attributes - } - - - set overflow_handled 0 - - - - set nextprefix "" - - - #todo - handle potential insertion mode as above for cursor restore? - #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { - {} { - if {$test_mode == 0} { - incr row - if {$data_mode} { - set col [_get_row_append_column $row] - if {$col > $colwidth} { - - } - } else { - set col 1 - } - } else { - #lf included in data - set row $post_render_row - set col $post_render_col - - #set col 1 - #if {$post_render_row != $renderedrow} { - # set col 1 - #} else { - # set col $post_render_col - #} - } - } - up { - - #renderline knows it's own line number, and knows not to go above row l - #it knows that a move whilst 1-beyond the width conflicts with the linefeed and reduces the move by one accordingly. - #row returned should be correct. - #column may be the overflow column - as it likes to report that to the caller. - - #Note that an ansi up sequence after last column going up to a previous line and also beyond the last column, will result in the next grapheme going onto the following line. - #this seems correct - as the column remains beyond the right margin so subsequent chars wrap (?) review - #puts stderr "up $post_render_row" - #puts stderr "$rinfo" - - #puts stdout "1 row:$row col $col" - set row $post_render_row - #data_mode (naming?) determines if we move to end of existing data or not. - #data_mode 0 means ignore existing line length and go to exact column - #set by -experimental flag - if {$data_mode == 0} { - set col $post_render_col - } else { - #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data - #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $colwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $colwidth - } - } - - #puts stdout "2 row:$row col $col" - #puts stdout "-----------------------" - #puts stdout $rinfo - #flush stdout - } - down { - if {$data_mode == 0} { - #renderline doesn't know how far down we can go.. - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines "" - } - } - set row $post_render_row - set col $post_render_col - } else { - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines "" - } - } - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $colwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $colwidth - } - - } - } - restore_cursor { - #testfile belinda.ans uses this - - #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" - if {[tcl::dict::exists $cursor_saved_position row]} { - set row [tcl::dict::get $cursor_saved_position row] - set col [tcl::dict::get $cursor_saved_position column] - #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" - #set nextprefix $cursor_saved_attributes - #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes - set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay]$cursor_saved_attributes - #set replay_codes_overlay $cursor_saved_attributes - set cursor_saved_position [tcl::dict::create] - set cursor_saved_attributes "" - } else { - #TODO - #?restore without save? - #should move to home position and reset ansi SGR? - #puts stderr "overtype::renderspace cursor_restore without save data available" - } - #If we were inserting prior to hitting the cursor_restore - there could be overflow_right data - generally the overtype functions aren't for inserting - but ansi can enable it - #if we were already in overflow when cursor_restore was hit - it shouldn't have been processed as an action - just stored. - if {!$overflow_handled && $overflow_right ne ""} { - #wrap before restore? - possible effect on saved cursor position - #this overflow data has previously been rendered so has no cursor movements or further save/restore operations etc - #we can just insert another call to renderline to solve this.. ? - #It would perhaps be more properly handled as a queue of instructions from our initial renderline call - #we don't need to worry about overflow next call (?)- but we should carry forward our gx and ansi stacks - - puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - - set sub_info [overtype::renderline -info 1 -width $colwidth -insert_mode $insert_mode -autowrap_mode $autowrap_mode -overflow [tcl::dict::get $opts -overflow] "" $overflow_right] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed.. - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. - linsert outputlines $renderedrow $foldline - #review - row & col set by restore - but not if there was no save.. - } - set overflow_handled 1 - - } - move { - ######## - if {$post_render_row > [llength $outputlines]} { - #Ansi moves need to create new lines ? - #if {$opt_appendlines} { - # set diff [expr {$post_render_row - [llength $outputlines]}] - # if {$diff > 0} { - # lappend outputlines {*}[lrepeat $diff ""] - # } - # set row $post_render_row - #} else { - set row [llength $outputlines] - #} - } else { - set row $post_render_row - } - ####### - set col $post_render_col - #overflow + unapplied? - } - lf_start { - #raw newlines - must be test_mode - # ---------------------- - #test with fruit.ans - #test - treating as newline below... - #append rendered $overflow_right - #set overflow_right "" - set row $renderedrow - incr row - if {$row > [llength $outputlines]} { - lappend outputlines "" - } - set col $opt_startcolumn - # ---------------------- - } - lf_mid { - - if 0 { - #set rhswidth [punk::ansi::printing_length $overflow_right] - #only show debug when we have overflow? - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - - set rhs "" - if {$overflow_right ne ""} { - set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $overflow_right]] - set rhs [textblock::frame -title overflow_right $rhs] - } - puts [textblock::join $lhs " $post_render_col " $rhs] - } - - if {!$test_mode} { - #rendered - append rendered $overflow_right - #set replay_codes_overlay "" - set overflow_right "" - - - set row $renderedrow - - set col $opt_startcolumn - incr row - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] - } - } else { - set edit_mode 0 - if {$edit_mode} { - set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] - set overflow_right "" - set unapplied "" - set row $post_render_row - #set col $post_render_col - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] - } - } else { - append rendered $overflow_right - set overflow_right "" - set row $post_render_row - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] - } - } - } - } - lf_overflow { - #linefeed after colwidth e.g at column 81 for an 80 col width - #we may also have other control sequences that came after col 80 e.g cursor save - - if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" - - #assertion - there should be no overflow.. - puts $lhs - } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right - - set row $post_render_row - #set row $renderedrow - #incr row - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] - } - set col $opt_startcolumn - - } - newlines_above { - #we get a newlines_above instruction when received at column 1 - #In some cases we want to treat that as request to insert a new blank line above, and move our row 1 down (staying with the data) - #in other cases - we want to treat at column 1 the same as any other - - puts "--->newlines_above" - puts "rinfo: $rinfo" - #renderline doesn't advance the row for us - the caller has the choice to implement or not - set row $post_render_row - set col $post_render_col - if {$insert_lines_above > 0} { - set row $renderedrow - set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] - incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above - #? set row $post_render_row #can renderline tell us? - } - } - newlines_below { - #obsolete? - use for ANSI insert lines sequence - if {$data_mode == 0} { - puts --->nl_below - set row $post_render_row - set col $post_render_col - if {$insert_lines_below == 1} { - if {$test_mode == 0} { - set row $renderedrow - set outputlines [linsert $outputlines [expr {$renderedrow }] {*}[lrepeat $insert_lines_below ""]] ;#note - linsert can add to end too - incr row $insert_lines_below - set col $opt_startcolumn - } else { - #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] - #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] - #set rhs "" - #if {$overflow_right ne ""} { - # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] - # set rhs [textblock::frame -title overflow_right $rhs] - #} - #puts [textblock::join $lhs $rhs] - - #rendered - append rendered $overflow_right - # - - - set overflow_right "" - set row $renderedrow - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat $insert_lines_below ""] - } - incr row $insert_lines_below - set col $opt_startcolumn - - - - } - } - } else { - set row $post_render_row - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines "" - } - } else { - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $colwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $colwidth - } - } - } - } - wrapmoveforward { - #doesn't seem to be used by fruit.ans testfile - #used by dzds.ans - #note that cursor_forward may move deep into the next line - or even span multiple lines !TODO - set c $colwidth - set r $post_render_row - if {$post_render_col > $colwidth} { - set i $c - while {$i <= $post_render_col} { - if {$c == $colwidth+1} { - incr r - if {$opt_appendlines} { - if {$r < [llength $outputlines]} { - lappend outputlines "" - } - } - set c $opt_startcolumn - } else { - incr c - } - incr i - } - set col $c - } else { - #why are we getting this instruction then? - puts stderr "wrapmoveforward - test" - set r [expr {$post_render_row +1}] - set c $post_render_col - } - set row $r - set col $c - } - wrapmovebackward { - set c $colwidth - set r $post_render_row - if {$post_render_col < 1} { - set c 1 - set i $c - while {$i >= $post_render_col} { - if {$c == 0} { - if {$r > 1} { - incr r -1 - set c $colwidth - } else { - #leave r at 1 set c 1 - #testfile besthpav.ans first line top left border alignment - set c 1 - break - } - } else { - incr c -1 - } - incr i -1 - } - set col $c - } else { - puts stderr "Wrapmovebackward - but postrendercol >= 1???" - } - set row $r - set col $c - } - overflow { - #normal single-width grapheme overflow - #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" - set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { - incr row - set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? - } else { - #this works for test_mode (which should become the default) - but could give a bad result otherwise - review - add tests fix. - set col $post_render_col - #set unapplied "" ;#this seems wrong? - #set unapplied [tcl::string::range $unapplied 1 end] - #The overflow can only be triggered by a grapheme (todo cluster?) - but our unapplied could contain SGR codes prior to the grapheme that triggered overflow - so we need to skip beyond any SGRs - #There may be more than one, because although the stack leading up to overflow may have been merged - codes between the last column and the overflowing grapheme will remain separate - #We don't expect any movement or other ANSI codes - as if they came before the grapheme, they would have triggered a different instruction to 'overflow' - set idx 0 - set next_grapheme_index -1 - foreach u $unapplied_list { - if {![punk::ansi::ta::detect $u]} { - set next_grapheme_index $idx - break - } - incr idx - } - assert {$next_grapheme_index >= 0} - #drop the overflow grapheme - keeping all codes in place. - set unapplied [join [lreplace $unapplied_list $next_grapheme_index $next_grapheme_index] ""] - #we need to run the reduced unapplied on the same line - further graphemes will just overflow again, but codes or control chars could trigger jumps to other lines - - set overflow_handled 1 - #handled by dropping overflow if any - } - } - overflow_splitchar { - set row $post_render_row ;#renderline will not advance row when reporting overflow char - - #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts - #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { - if {$colwidth < 2} { - #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character - set idx 0 - set triggering_grapheme_index -1 - foreach u $unapplied_list { - if {![punk::ansi::ta::detect $u]} { - set triggering_grapheme_index $idx - break - } - incr idx - } - set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] - } else { - set col $opt_startcolumn - incr row - } - } else { - set overflow_handled 1 - #handled by dropping entire overflow if any - if {$colwidth < 2} { - set idx 0 - set triggering_grapheme_index -1 - foreach u $unapplied_list { - if {![punk::ansi::ta::detect $u]} { - set triggering_grapheme_index $idx - break - } - incr idx - } - set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] - } - } - - } - vt { - - #can vt add a line like a linefeed can? - set row $post_render_row - set col $post_render_col - } - default { - puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" - } - - } - - - if {!$opt_overflow && !$autowrap_mode} { - #not allowed to overflow column or wrap therefore we get overflow data to truncate - if {[tcl::dict::get $opts -ellipsis]} { - set show_ellipsis 1 - if {!$opt_ellipsiswhitespace} { - #we don't want ellipsis if only whitespace was lost - set lostdata "" - if {$overflow_right ne ""} { - append lostdata $overflow_right - } - if {$unapplied ne ""} { - append lostdata $unapplied - } - if {[tcl::string::trim $lostdata] eq ""} { - set show_ellipsis 0 - } - #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] - if {[tcl::string::trim [punk::ansi::stripansi $lostdata]] eq ""} { - set show_ellipsis 0 - } - } - if {$show_ellipsis} { - set rendered [overtype::right $rendered $opt_ellipsistext] - } - set overflow_handled 1 - } else { - #no wrap - no ellipsis - silently truncate - set overflow_handled 1 - } - } - - - - if {$renderedrow <= [llength $outputlines]} { - lset outputlines [expr {$renderedrow-1}] $rendered - } else { - if {$opt_appendlines} { - lappend outputlines $rendered - } else { - #? - lset outputlines [expr {$renderedrow-1}] $rendered - } - } - - if {!$overflow_handled} { - append nextprefix $overflow_right - } - - append nextprefix $unapplied - - if 0 { - if {$nextprefix ne ""} { - set nextoveridx [expr {$overidx+1}] - if {$nextoveridx >= [llength $inputchunks]} { - lappend inputchunks $nextprefix - } else { - #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] - set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] - } - } - } - - if {$nextprefix ne ""} { - set inputchunks [linsert $inputchunks 0 $nextprefix] - } - - - incr overidx - incr loop - if {$loop >= $looplimit} { - puts stderr "overtype::renderspace looplimit reached ($looplimit)" - lappend outputlines "[a+ red bold] - looplimit $looplimit reached[a]" - set Y [a+ yellow bold] - set RST [a] - set sep_header ----DEBUG----- - set debugmsg "" - append debugmsg "${Y}${sep_header}${RST}" \n - append debugmsg "looplimit $looplimit reached\n" - append debugmsg "test_mode:$test_mode\n" - append debugmsg "data_mode:$data_mode\n" - append debugmsg "opt_appendlines:$opt_appendlines\n" - append debugmsg "prev_row :[tcl::dict::get $LASTCALL -cursor_row]\n" - append debugmsg "prev_col :[tcl::dict::get $LASTCALL -cursor_column]\n" - tcl::dict::for {k v} $rinfo { - append debugmsg "${Y}$k [ansistring VIEW -lf 1 -vt 1 $v]$RST" \n - } - append debugmsg "${Y}[string repeat - [string length $sep_header]]$RST" \n - - puts stdout $debugmsg - #todo - config regarding error dumps rather than just dumping in working dir - set fd [open [pwd]/error_overtype.txt w] - puts $fd $debugmsg - close $fd - error $debugmsg - break - } - } - - set result [join $outputlines \n] - if {$info_mode} { - #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? - #append result \n$instruction_stats\n - } - return $result - } - - #todo - left-right ellipsis ? - proc centre {args} { - variable default_ellipsis_horizontal - if {[llength $args] < 2} { - error {usage: ?-transparent [0|1]? ?-bias [left|right]? ?-overflow [1|0]? undertext overtext} - } - - foreach {underblock overblock} [lrange $args end-1 end] break - - #todo - vertical vs horizontal overflow for blocks - set opts [tcl::dict::create\ - -bias left\ - -ellipsis 0\ - -ellipsistext $default_ellipsis_horizontal\ - -ellipsiswhitespace 0\ - -overflow 0\ - -transparent 0\ - -exposed1 \uFFFD\ - -exposed2 \uFFFD\ - ] - set argsflags [lrange $args 0 end-2] - foreach {k v} $argsflags { - switch -- $k { - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 { - tcl::dict::set opts $k $v - } - default { - set known_opts [tcl::dict::keys $opts] - error "overtype::centre unknown option '$k'. Known options: $known_opts" - } - } - } - #set opts [tcl::dict::merge $defaults $argsflags] - # -- --- --- --- --- --- - set opt_transparent [tcl::dict::get $opts -transparent] - set opt_ellipsis [tcl::dict::get $opts -ellipsis] - set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] - set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] - set opt_exposed1 [tcl::dict::get $opts -exposed1] - set opt_exposed2 [tcl::dict::get $opts -exposed2] - # -- --- --- --- --- --- - - - set underblock [tcl::string::map {\r\n \n} $underblock] - set overblock [tcl::string::map {\r\n \n} $overblock] - - set underlines [split $underblock \n] - #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] - lassign [blocksize $underblock] _w colwidth _h colheight - set overlines [split $overblock \n] - #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] - lassign [blocksize $overblock] _w overblock_width _h overblock_height - set under_exposed_max [expr {$colwidth - $overblock_width}] - if {$under_exposed_max > 0} { - #background block is wider - if {$under_exposed_max % 2 == 0} { - #even left/right exposure - set left_exposed [expr {$under_exposed_max / 2}] - } else { - set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division - if {[tcl::string::tolower [tcl::dict::get $opts -bias]] eq "left"} { - set left_exposed $beforehalf - } else { - #bias to the right - set left_exposed [expr {$beforehalf + 1}] - } - } - } else { - set left_exposed 0 - } - - set outputlines [list] - if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { - set replay_codes "[punk::ansi::a]" - } else { - set replay_codes "" - } - set replay_codes_underlay "" - set replay_codes_overlay "" - foreach undertext $underlines overtext $overlines { - set overtext_datalen [punk::ansi::printing_length $overtext] - set ulen [punk::ansi::printing_length $undertext] - if {$ulen < $colwidth} { - set udiff [expr {$colwidth - $ulen}] - set undertext "$undertext[string repeat { } $udiff]" - } - set undertext [tcl::string::cat $replay_codes_underlay $undertext] - set overtext [tcl::string::cat $replay_codes_overlay $overtext] - - set overflowlength [expr {$overtext_datalen - $colwidth}] - #review - right-to-left langs should elide on left! - extra option required - - if {$overflowlength > 0} { - #overlay line wider or equal - set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow [tcl::dict::get $opts -overflow] -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set unapplied [tcl::dict::get $rinfo unapplied] - #todo - get replay_codes from overflow_right instead of wherever it was truncated? - - #overlay line data is wider - trim if overflow not specified in opts - and overtype an ellipsis at right if it was specified - if {![tcl::dict::get $opts -overflow]} { - #lappend outputlines [tcl::string::range $overtext 0 [expr {$colwidth - 1}]] - #set overtext [tcl::string::range $overtext 0 $colwidth-1 ] - if {$opt_ellipsis} { - set show_ellipsis 1 - if {!$opt_ellipsiswhitespace} { - #we don't want ellipsis if only whitespace was lost - #don't use tcl::string::range on ANSI data - #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] - set lostdata "" - if {$overflow_right ne ""} { - append lostdata $overflow_right - } - if {$unapplied ne ""} { - append lostdata $unapplied - } - if {[tcl::string::trim $lostdata] eq ""} { - set show_ellipsis 0 - } - } - if {$show_ellipsis} { - set rendered [overtype::right $rendered $opt_ellipsistext] - } - } - } - lappend outputlines $rendered - #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent $undertext $overtext] - } else { - #background block is wider than or equal to data for this line - #lappend outputlines [renderline -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] - set rinfo [renderline -info 1 -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] - lappend outputlines [tcl::dict::get $rinfo result] - } - set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] - set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - } - return [join $outputlines \n] - } - - #overtype::right is for a rendered ragged underblock and a rendered ragged overblock - #ie we can determine the block width for bost by examining the lines and picking the longest. - # - proc right {args} { - #NOT the same as align-right - which should be done to the overblock first if required - variable default_ellipsis_horizontal - # @d !todo - implement overflow, length checks etc - - if {[llength $args] < 2} { - error {usage: ?-overflow [1|0]? ?-transparent 0|? undertext overtext} - } - foreach {underblock overblock} [lrange $args end-1 end] break - - set opts [tcl::dict::create\ - -bias ignored\ - -ellipsis 0\ - -ellipsistext $default_ellipsis_horizontal\ - -ellipsiswhitespace 0\ - -overflow 0\ - -transparent 0\ - -exposed1 \uFFFD\ - -exposed2 \uFFFD\ - -align "left"\ - ] - set argsflags [lrange $args 0 end-2] - tcl::dict::for {k v} $argsflags { - switch -- $k { - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -align { - tcl::dict::set opts $k $v - } - default { - set known_opts [tcl::dict::keys $opts] - error "overtype::centre unknown option '$k'. Known options: $known_opts" - } - } - } - #set opts [tcl::dict::merge $defaults $argsflags] - # -- --- --- --- --- --- - set opt_transparent [tcl::dict::get $opts -transparent] - set opt_ellipsis [tcl::dict::get $opts -ellipsis] - set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] - set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] - set opt_overflow [tcl::dict::get $opts -overflow] - set opt_exposed1 [tcl::dict::get $opts -exposed1] - set opt_exposed2 [tcl::dict::get $opts -exposed2] - set opt_align [tcl::dict::get $opts -align] - # -- --- --- --- --- --- - - set underblock [tcl::string::map {\r\n \n} $underblock] - set overblock [tcl::string::map {\r\n \n} $overblock] - - set underlines [split $underblock \n] - #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] - lassign [blocksize $underblock] _w colwidth _h colheight - set overlines [split $overblock \n] - #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] - lassign [blocksize $overblock] _w overblock_width _h overblock_height - set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] - set left_exposed $under_exposed_max - - - - set outputlines [list] - if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { - set replay_codes "[punk::ansi::a]" - } else { - set replay_codes "" - } - set replay_codes_underlay "" - set replay_codes_overlay "" - foreach undertext $underlines overtext $overlines { - set overtext_datalen [punk::ansi::printing_length $overtext] - set ulen [punk::ansi::printing_length $undertext] - if {$ulen < $colwidth} { - set udiff [expr {$colwidth - $ulen}] - #puts xxx - append undertext [string repeat { } $udiff] - } - if {$overtext_datalen < $overblock_width} { - set odiff [expr {$overblock_width - $overtext_datalen}] - switch -- $opt_align { - left { - set startoffset 0 - } - right { - set startoffset $odiff - } - default { - set half [expr {$odiff / 2}] - #set lhs [string repeat { } $half] - #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left - #set rhs [string repeat { } $righthalf] - set startoffset $half - } - } - } else { - set startoffset 0 ;#negative? - } - - set undertext [tcl::string::cat $replay_codes_underlay $undertext] - set overtext [tcl::string::cat $replay_codes_overlay $overtext] - - set overflowlength [expr {$overtext_datalen - $colwidth}] - if {$overflowlength > 0} { - #raw overtext wider than undertext column - set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] - set replay_codes [tcl::dict::get $rinfo replay_codes] - set rendered [tcl::dict::get $rinfo result] - if {!$opt_overflow} { - if {$opt_ellipsis} { - set show_ellipsis 1 - if {!$opt_ellipsiswhitespace} { - #we don't want ellipsis if only whitespace was lost - set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] - if {[tcl::string::trim $lostdata] eq ""} { - set show_ellipsis 0 - } - } - if {$show_ellipsis} { - set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] - #todo - overflow on left if allign = right?? - set rendered [overtype::right $rendered $ellipsis] - } - } - } - lappend outputlines $rendered - } else { - #padded overtext - #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] - #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset - set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] - lappend outputlines [tcl::dict::get $rinfo result] - } - set replay_codes [tcl::dict::get $rinfo replay_codes] - set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] - set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - } - - return [join $outputlines \n] - } - - proc left {args} { - overtype::block -blockalign left {*}$args - } - #overtype a (possibly ragged) underblock with a (possibly ragged) overblock - proc block {args} { - variable default_ellipsis_horizontal - # @d !todo - implement overflow, length checks etc - - if {[llength $args] < 2} { - error {usage: ?-blockalign left|centre|right? ?-textalign left|centre|right? ?-overflow [1|0]? ?-transparent 0|? undertext overtext} - } - #foreach {underblock overblock} [lrange $args end-1 end] break - lassign [lrange $args end-1 end] underblock overblock - - set opts [tcl::dict::create\ - -ellipsis 0\ - -ellipsistext $default_ellipsis_horizontal\ - -ellipsiswhitespace 0\ - -overflow 0\ - -transparent 0\ - -exposed1 \uFFFD\ - -exposed2 \uFFFD\ - -textalign "left"\ - -textvertical "top"\ - -blockalign "left"\ - -blockalignbias left\ - -blockvertical "top"\ - ] - set argsflags [lrange $args 0 end-2] - tcl::dict::for {k v} $argsflags { - switch -- $k { - -blockalignbias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -textalign - -blockalign - -blockvertical { - tcl::dict::set opts $k $v - } - default { - error "overtype::block unknown option '$k'. Known options: [tcl::dict::keys $opts]" - } - } - } - # -- --- --- --- --- --- - set opt_transparent [tcl::dict::get $opts -transparent] - set opt_ellipsis [tcl::dict::get $opts -ellipsis] - set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] - set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] - set opt_overflow [tcl::dict::get $opts -overflow] - set opt_exposed1 [tcl::dict::get $opts -exposed1] - set opt_exposed2 [tcl::dict::get $opts -exposed2] - set opt_textalign [tcl::dict::get $opts -textalign] - set opt_blockalign [tcl::dict::get $opts -blockalign] - if {$opt_blockalign eq "center"} { - set opt_blockalign "centre" - } - # -- --- --- --- --- --- - - set underblock [tcl::string::map {\r\n \n} $underblock] - set overblock [tcl::string::map {\r\n \n} $overblock] - - set underlines [split $underblock \n] - #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] - lassign [blocksize $underblock] _w colwidth _h colheight - set overlines [split $overblock \n] - #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] - lassign [blocksize $overblock] _w overblock_width _h overblock_height - set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] - - switch -- $opt_blockalign { - left { - set left_exposed 0 - } - right { - set left_exposed $under_exposed_max - } - centre { - if {$under_exposed_max > 0} { - #background block is wider - if {$under_exposed_max % 2 == 0} { - #even left/right exposure - set left_exposed [expr {$under_exposed_max / 2}] - } else { - set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division - if {[tcl::string::tolower [tcl::dict::get $opts -blockalignbias]] eq "left"} { - set left_exposed $beforehalf - } else { - #bias to the right - set left_exposed [expr {$beforehalf + 1}] - } - } - } else { - set left_exposed 0 - } - } - default { - set left_exposed 0 - } - } - - - - set outputlines [list] - if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { - set replay_codes "[punk::ansi::a]" - } else { - set replay_codes "" - } - set replay_codes_underlay "" - set replay_codes_overlay "" - foreach undertext $underlines overtext $overlines { - set overtext_datalen [punk::ansi::printing_length $overtext] - set ulen [punk::ansi::printing_length $undertext] - if {$ulen < $colwidth} { - set udiff [expr {$colwidth - $ulen}] - #puts xxx - append undertext [string repeat { } $udiff] - } - if {$overtext_datalen < $overblock_width} { - set odiff [expr {$overblock_width - $overtext_datalen}] - switch -- $opt_textalign { - left { - set startoffset 0 - } - right { - set startoffset $odiff - } - default { - set half [expr {$odiff / 2}] - #set lhs [string repeat { } $half] - #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left - #set rhs [string repeat { } $righthalf] - set startoffset $half - } - } - } else { - set startoffset 0 ;#negative? - } - - set undertext [tcl::string::cat $replay_codes_underlay $undertext] - set overtext [tcl::string::cat $replay_codes_overlay $overtext] - - set overflowlength [expr {$overtext_datalen - $colwidth}] - if {$overflowlength > 0} { - #raw overtext wider than undertext column - set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] - set replay_codes [tcl::dict::get $rinfo replay_codes] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set unapplied [tcl::dict::get $rinfo unapplied] - if {!$opt_overflow} { - if {$opt_ellipsis} { - set show_ellipsis 1 - if {!$opt_ellipsiswhitespace} { - #we don't want ellipsis if only whitespace was lost - #don't use tcl::string::range on ANSI data - #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] - set lostdata "" - if {$overflow_right ne ""} { - append lostdata $overflow_right - } - if {$unapplied ne ""} { - append lostdata $unapplied - } - if {[tcl::string::trim $lostdata] eq ""} { - set show_ellipsis 0 - } - } - if {$show_ellipsis} { - set rendered [overtype::block -blockalign right $rendered $opt_ellipsistext] - } - } - - #if {$opt_ellipsis} { - # set show_ellipsis 1 - # if {!$opt_ellipsiswhitespace} { - # #we don't want ellipsis if only whitespace was lost - # set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] - # if {[tcl::string::trim $lostdata] eq ""} { - # set show_ellipsis 0 - # } - # } - # if {$show_ellipsis} { - # set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] - # #todo - overflow on left if allign = right?? - # set rendered [overtype::right $rendered $ellipsis] - # } - #} - } - lappend outputlines $rendered - } else { - #padded overtext - #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] - #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset - set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set unapplied [tcl::dict::get $rinfo unapplied] - lappend outputlines [tcl::dict::get $rinfo result] - } - set replay_codes [tcl::dict::get $rinfo replay_codes] - set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] - set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - } - - return [join $outputlines \n] - } - # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - # renderline written from a left-right line orientation perspective as a first-shot at getting something useful. - # ultimately right-to-left, top-to-bottom and bottom-to-top are probably needed. - # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - # - # - #-returnextra enables returning of overflow and length - #review - use punk::ansi::ta::detect to short-circuit processing and do simpler string calcs as an optimisation? - #review - DECSWL/DECDWL double width line codes - very difficult/impossible to align and compose with other elements - #(could render it by faking it with sixels and a lot of work - find/make a sixel font and ensure it's exactly 2 cols per char) - #todo - review transparency issues with single/double width characters - #bidi - need a base direction and concept of directional runs for RTL vs LTR - may be best handled at another layer? - proc renderline {args} { - #*** !doctools - #[call [fun overtype::renderline] [arg args] ] - #[para] renderline is the core engine for overtype string processing (frames & textblocks), and the raw mode commandline repl for the Tcl Punk Shell - #[para] It is also a central part of an ansi (micro) virtual terminal-emulator of sorts - #[para] This system does a half decent job at rendering 90's ANSI art to manipulable colour text blocks that can be joined & framed for layout display within a unix or windows terminal - #[para] Renderline helps maintain ANSI text styling reset/replay codes so that the styling of one block doesn't affect another. - #[para] Calling on the punk::ansi library - it can coalesce codes to keep the size down. - #[para] It is a giant mess of doing exactly what common wisdom says not to do... lots at once. - #[para] renderline is part of the Unicode and ANSI aware Overtype system which 'renders' a block of text onto a static underlay - #[para] The underlay is generally expected to be an ordered set of lines or a rectangular text block analogous to a terminal screen - but it can also be ragged in line length, or just blank. - #[para] The overlay couuld be similar - in which case it may often be used to overwrite a column or section of the underlay. - #[para] The overlay could however be a sequence of ANSI-laden text that jumps all over the place. - # - #[para] renderline itself only deals with a single line - or sometimes a single character. It is generally called from a loop that does further terminal-like or textblock processing. - #[para] By suppyling the -info 1 option - it can return various fields indicating the state of the render. - #[para] The main 3 are the result, overflow_right, and unapplied. - #[para] Renderline handles cursor movements from either keystrokes or ANSI sequences but for a full system the aforementioned loop will need to be in place to manage the set of lines under manipulation. - - if {[llength $args] < 2} { - error {usage: ?-info 0|1? ?-startcolumn ? ?-cursor_column ? ?-cursor_row |""? ?-transparent [0|1|]? ?-overflow [1|0]? undertext overtext} - } - lassign [lrange $args end-1 end] under over - if {[string first \n $under] >= 0} { - error "overtype::renderline not allowed to contain newlines in undertext" - } - #if {[string first \n $over] >=0 || [string first \n $under] >= 0} { - # error "overtype::renderline not allowed to contain newlines" - #} - - #generally faster to create a new dict in the proc than to use a namespace variable to create dict once and link to variable (2024 8.6/8.7) - set opts [tcl::dict::create\ - -etabs 0\ - -width \uFFEF\ - -overflow 0\ - -transparent 0\ - -startcolumn 1\ - -cursor_column 1\ - -cursor_row ""\ - -insert_mode 1\ - -autowrap_mode 1\ - -reverse_mode 0\ - -info 0\ - -exposed1 \uFFFD\ - -exposed2 \uFFFD\ - -cursor_restore_attributes ""\ - -cp437 0\ - -experimental {}\ - ] - #-cursor_restore_attributes only - for replay stack - position and actual setting/restoring handled by throwback to caller - - #cursor_row, when numeric will allow detection of certain row moves that are still within our row - allowing us to avoid an early return - #An empty string for cursor_row tells us we have no info about our own row context, and to return with an unapplied string if any row move occurs - - #exposed1 and exposed2 for first and second col of underying 2wide char which is truncated by transparency or overflow - #todo - return info about such grapheme 'cuts' in -info structure and/or create option to raise an error - - set argsflags [lrange $args 0 end-2] - tcl::dict::for {k v} $argsflags { - switch -- $k { - -experimental - -cp437 - -width - -overflow - -transparent - -startcolumn - -cursor_column - -cursor_row - -insert_mode - -autowrap_mode - -reverse_mode - -info - -exposed1 - -exposed2 - -cursor_restore_attributes { - tcl::dict::set opts $k $v - } - default { - error "overtype::renderline unknown option '$k'. Known options: [tcl::dict::keys $opts]" - } - } - } - # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_width [tcl::dict::get $opts -width] - set opt_etabs [tcl::dict::get $opts -etabs] - set opt_overflow [tcl::dict::get $opts -overflow] - set opt_colstart [tcl::dict::get $opts -startcolumn] ;#lhs limit for overlay - an offset to cursor_column - first visible column is 1. 0 or < 0 are before the start of the underlay - set opt_colcursor [tcl::dict::get $opts -cursor_column];#start cursor column relative to overlay - set opt_row_context [tcl::dict::get $opts -cursor_row] - if {[string length $opt_row_context]} { - if {![tcl::string::is integer -strict $opt_row_context] || $opt_row_context <1 } { - error "overtype::renderline -cursor_row must be empty for unspecified/unknown or a non-zero positive integer. received: '$opt_row_context'" - } - } - # -- --- --- --- --- --- --- --- --- --- --- --- - #The _mode flags correspond to terminal modes that can be set/reset via escape sequences (e.g DECAWM wraparound mode) - set opt_insert_mode [tcl::dict::get $opts -insert_mode];#should usually be 1 for each new line in editor mode but must be initialised to 1 externally (review) - #default is for overtype - # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_autowrap_mode [tcl::dict::get $opts -autowrap_mode] ;#DECAWM - char or movement can go beyond leftmost/rightmost col to prev/next line - set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] ;#DECSNM - # -- --- --- --- --- --- --- --- --- --- --- --- - set temp_cursor_saved [tcl::dict::get $opts -cursor_restore_attributes] - - set test_mode 0 - set cp437_glyphs [tcl::dict::get $opts -cp437] - foreach e [tcl::dict::get $opts -experimental] { - switch -- $e { - test_mode { - set test_mode 1 - set cp437_glyphs 1 - } - } - } - set test_mode 1 ;#try to elminate - set cp437_map [tcl::dict::create] - if {$cp437_glyphs} { - set cp437_map [set ::punk::ansi::cp437_map] - #for cp437 images we need to map these *after* splitting ansi - #some old files might use newline for its glyph.. but we can't easily support that. - #Not sure how old files did it.. maybe cr lf in sequence was newline and any lone cr or lf were displayed as glyphs? - tcl::dict::unset cp437_map \n - } - - set opt_transparent [tcl::dict::get $opts -transparent] - if {$opt_transparent eq "0"} { - set do_transparency 0 - } else { - set do_transparency 1 - if {$opt_transparent eq "1"} { - set opt_transparent {[\s]} - } - } - # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_returnextra [tcl::dict::get $opts -info] - # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_exposed1 [tcl::dict::get $opts -exposed1] - set opt_exposed2 [tcl::dict::get $opts -exposed2] - # -- --- --- --- --- --- --- --- --- --- --- --- - - if {$opt_row_context eq ""} { - set cursor_row 1 - } else { - set cursor_row $opt_row_context - } - - - #----- - # - if {[info exists punk::console::tabwidth]} { - #punk console is updated if punk::console::set_tabstop_width is used or rep is started/restarted - #It is way too slow to test the current width by querying the terminal here - so it could conceivably get out of sync - #todo - we also need to handle the new threaded repl where console config is in a different thread. - # - also - concept of sub-regions being mini-consoles with their own settings - 'id' for console, or use in/out channels as id? - set tw $::punk::console::tabwidth - } else { - set tw 8 - } - - set overdata $over - if {!$cp437_glyphs} { - #REVIEW! tabify will give different answers for an ANSI colourised string vs plain text - if {!$opt_etabs} { - if {[string first \t $under] >= 0} { - #set under [textutil::tabify::untabify2 $under] - set under [textutil::tabify::untabifyLine $under $tw] - } - if {[string first \t $over] >= 0} { - #set overdata [textutil::tabify::untabify2 $over] - set overdata [textutil::tabify::untabifyLine $over $tw] - } - } - } - #------- - - #ta_detect ansi and do simpler processing? - - #we repeat tests for grapheme width in different loops - rather than create another datastructure to store widths based on column, - #we'll use the grapheme_width_cached function as a lookup table of all graphemes encountered - as there will often be repeats in different positions anyway. - - # -- --- --- --- --- --- --- --- - if {$under ne ""} { - if {[punk::ansi::ta::detect $under]} { - set undermap [punk::ansi::ta::split_codes_single $under] - } else { - #single plaintext part - set undermap [list $under] - } - } else { - set undermap [list] - } - set understacks [list] - set understacks_gx [list] - - set i_u -1 ;#underlay may legitimately be empty - set undercols [list] - set u_codestack [list] - #u_gx_stack probably isn't really a stack - I don't know if g0 g1 can stack or not - for now we support only g0 anyway - set u_gx_stack [list] ;#separate stack for g0 (g1 g2 g3?) graphics on and off (DEC special graphics) - #set pt_underchars "" ;#for string_columns length calculation for overflow 0 truncation - set remainder [list] ;#for returnextra - foreach {pt code} $undermap { - #pt = plain text - #append pt_underchars $pt - if {$cp437_glyphs} { - set pt [tcl::string::map $cp437_map $pt] - } - foreach grapheme [punk::char::grapheme_split $pt] { - #an ugly hack to serve *some* common case ascii quickly with byte-compiled literal switch - feels dirty. - #.. but even 0.5uS per char (grapheme_width_cached) adds up quickly when stitching lots of lines together. - switch -- $grapheme { - " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - - a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - - z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { - set width 1 - } - default { - if {$grapheme eq "\u0000"} { - #use null as empty cell representation - review - #use of this will probably collide with some application at some point - #consider an option to set the empty cell character - set width 1 - } else { - set width [grapheme_width_cached $grapheme] - #we still want most controls and other zero-length codepoints such as \u200d (zero width joiner) to stay zero-length - #we substitute lone ESC that weren't captured within ANSI context as a debugging aid to see malformed ANSI - #todo - default to off and add a flag (?) to enable this substitution - set sub_stray_escapes 0 - if {$sub_stray_escapes && $width == 0} { - if {$grapheme eq "\x1b"} { - set gvis [ansistring VIEW $grapheme] - set grapheme $gvis - set width 1 - } - } - } - } - } - #set width [grapheme_width_cached $grapheme] - incr i_u - lappend understacks $u_codestack - lappend understacks_gx $u_gx_stack - - lappend undercols $grapheme - if {$width > 1} { - #presumably there are no triple-column or wider unicode chars.. until the aliens arrive.(?) - #but what about emoji combinations etc - can they be wider than 2? - #todo - if -etabs enabled - then we treat \t as the width determined by our elastic tabstop - incr i_u - lappend understacks $u_codestack - lappend understacks_gx $u_gx_stack - lappend undercols "" - } - } - - #underlay should already have been rendered and not have non-sgr codes - but let's retain the check for them and not stack them if other codes are here - - #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc - if {$code ne ""} { - set c1c2 [tcl::string::range $code 0 1] - - set leadernorm [tcl::string::range [tcl::string::map [list\ - \x1b\[ 7CSI\ - \x9b 8CSI\ - \x1b\( 7GFX\ - ] $c1c2] 0 3];# leadernorm is 1st 2 chars mapped to normalised indicator - or is original 2 chars - - switch -- $leadernorm { - 7CSI - 8CSI { - #need to exclude certain leaders after the lb e.g < for SGR 1006 mouse - #REVIEW - what else could end in m but be mistaken as a normal SGR code here? - set maybemouse "" - if {[tcl::string::index $c1c2 0] eq "\x1b"} { - set maybemouse [tcl::string::index $code 2] - } - - if {$maybemouse ne "<" && [tcl::string::index $code end] eq "m"} { - if {[punk::ansi::codetype::is_sgr_reset $code]} { - set u_codestack [list "\x1b\[m"] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { - set u_codestack [list $code] - } else { - #basic simplification first.. straight dups - set dup_posns [lsearch -all -exact $u_codestack $code] ;#-exact because of square-bracket glob chars - set u_codestack [lremove $u_codestack {*}$dup_posns] - lappend u_codestack $code - } - } - } - 7GFX { - switch -- [tcl::string::index $code 2] { - "0" { - set u_gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess - } - B { - set u_gx_stack [list] - } - } - } - default { - - } - - } - - #if {[punk::ansi::codetype::is_sgr_reset $code]} { - # #set u_codestack [list] - #} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { - #} elseif {[punk::ansi::codetype::is_sgr $code]} { - #} else { - # #leave SGR stack as is - # if {[punk::ansi::codetype::is_gx_open $code]} { - # } elseif {[punk::ansi::codetype::is_gx_close $code]} { - # } - #} - } - #consider also if there are other codes that should be stacked..? - } - - if {!$test_mode} { - #fill columns to width with spaces, and carry over stacks - we will have to keep track of where the underlying data ends manually - TODO - #Specifying a width is suitable for terminal-like applications and text-blocks - if {$opt_width ne "\uFFEF"} { - if {[llength $understacks]} { - set cs $u_codestack - set gs $u_gx_stack - } else { - set cs [list] - set gs [list] - } - if {[llength $undercols]< $opt_width} { - set diff [expr {$opt_width- [llength $undercols]}] - if {$diff > 0} { - lappend undercols {*}[lrepeat $diff " "] - lappend understacks {*}[lrepeat $diff $cs] - lappend understacks_gx {*}[lrepeat $diff $gs] - } - } - } - } else { - #NULL empty cell indicator - if {$opt_width ne "\uFFEF"} { - if {[llength $understacks]} { - set cs $u_codestack - set gs $u_gx_stack - } else { - set cs [list] - set gs [list] - } - if {[llength $undercols]< $opt_width} { - set diff [expr {$opt_width- [llength $undercols]}] - if {$diff > 0} { - lappend undercols {*}[lrepeat $diff "\u0000"] - lappend understacks {*}[lrepeat $diff $cs] - lappend understacks_gx {*}[lrepeat $diff $gs] - } - } - } - - } - if {$opt_width ne "\uFFEF"} { - set colwidth $opt_width - } else { - set colwidth [llength $undercols] - } - - - if 0 { - # ----------------- - # if we aren't extending understacks & understacks_gx each time we incr idx above the undercols length.. this doesn't really serve a purpose - # Review. - # ----------------- - #replay code for last overlay position in input line - # whether or not we get that far - we need to return it for possible replay on next line - if {[llength $understacks]} { - lappend understacks $u_codestack - lappend understacks_gx $u_gx_stack - } else { - #in case overlay onto emptystring as underlay - lappend understacks [list] - lappend understacks_gx [list] - } - # ----------------- - } - - #trailing codes in effect for underlay - if {[llength $u_codestack]} { - #set replay_codes_underlay [join $u_codestack ""] - set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] - } else { - set replay_codes_underlay "" - } - - - # -- --- --- --- --- --- --- --- - #### - #if opt_colstart - we need to build a space (or any singlewidth char ?) padding on the left of the right number of columns. - #this will be processed as transparent - and handle doublewidth underlay characters appropriately - set startpad_overlay [string repeat " " [expr {$opt_colstart -1}]] - append startpad_overlay $overdata ;#overdata with left padding spaces based on col-start under will show through for left-padding portion regardless of -transparency - if {$startpad_overlay ne ""} { - if {[punk::ansi::ta::detect $startpad_overlay]} { - set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] - } else { - #single plaintext part - set overmap [list $startpad_overlay] - } - } else { - set overmap [list] - } - #set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] - #### - - #??? - set colcursor $opt_colstart - #TODO - make a little virtual column object - #we need to refer to column1 or columnmin? or columnmax without calculating offsets due to to startcolumn - #need to lock-down what start column means from perspective of ANSI codes moving around - the offset perspective is unclear and a mess. - - - #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} - #as at 2024-02 punk::char::grapheme_split uses these - not aware of more complex graphemes - - set overstacks [list] - set overstacks_gx [list] - - set o_codestack [list]; #SGR codestack (not other codes such as movement,insert key etc) - set o_gxstack [list] - set pt_overchars "" - set i_o 0 - set overlay_grapheme_control_list [list] ;#tag each with g, sgr or other. 'other' are things like cursor-movement or insert-mode or codes we don't recognise/use - #experiment - set overlay_grapheme_control_stacks [list] - foreach {pt code} $overmap { - - #todo - wrap in test for empty pt (we used split_codes_single - and it may be common for sgr sequences to be unmerged and so have empty pts between) - if {$cp437_glyphs} { - set pt [tcl::string::map $cp437_map $pt] - } - append pt_overchars $pt - #will get empty pt between adjacent codes - foreach grapheme [punk::char::grapheme_split $pt] { - lappend overstacks $o_codestack - lappend overstacks_gx $o_gxstack - incr i_o - lappend overlay_grapheme_control_list [list g $grapheme] - lappend overlay_grapheme_control_stacks $o_codestack - } - - #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc - #order of if-else based on assumptions: - # that pure resets are fairly common - more so than leading resets with other info - # that non-sgr codes are not that common, so ok to check for resets before verifying it is actually SGR at all. - if {$code ne ""} { - lappend overlay_grapheme_control_stacks $o_codestack - #there will always be an empty code at end due to foreach on 2 vars with odd-sized list ending with pt (overmap coming from perlish split) - if {[punk::ansi::codetype::is_sgr_reset $code]} { - set o_codestack [list "\x1b\[m"] ;#reset better than empty list - fixes some ansi art issues - lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { - set o_codestack [list $code] - lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[priv::is_sgr $code]} { - #basic simplification first - remove straight dupes - set dup_posns [lsearch -all -exact $o_codestack $code] ;#must be -exact because of square-bracket glob chars - set o_codestack [lremove $o_codestack {*}$dup_posns] - lappend o_codestack $code - lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[regexp {\x1b7|\x1b\[s} $code]} { - #experiment - #cursor_save - for the replays review. - #jmn - #set temp_cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] - lappend overlay_grapheme_control_list [list other $code] - } elseif {[regexp {\x1b8|\x1b\[u} $code]} { - #experiment - #cursor_restore - for the replays - set o_codestack [list $temp_cursor_saved] - lappend overlay_grapheme_control_list [list other $code] - } else { - if {[punk::ansi::codetype::is_gx_open $code]} { - set o_gxstack [list "gx0_on"] - lappend overlay_grapheme_control_list [list gx0 gx0_on] ;#don't store code - will complicate debugging if we spit it out and jump character sets - } elseif {[punk::ansi::codetype::is_gx_close $code]} { - set o_gxstack [list] - lappend overlay_grapheme_control_list [list gx0 gx0_off] ;#don't store code - will complicate debugging if we spit it out and jump character sets - } else { - lappend overlay_grapheme_control_list [list other $code] - } - } - } - - } - #replay code for last overlay position in input line - should take account of possible trailing sgr code after last grapheme - set max_overlay_grapheme_index [expr {$i_o -1}] - lappend overstacks $o_codestack - lappend overstacks_gx $o_gxstack - - #set replay_codes_overlay [join $o_codestack ""] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] - - #if {[tcl::dict::exists $overstacks $max_overlay_grapheme_index]} { - # set replay_codes_overlay [join [tcl::dict::get $overstacks $max_overlay_grapheme_index] ""] - #} else { - # set replay_codes_overlay "" - #} - # -- --- --- --- --- --- --- --- - - - #potential problem - combinining diacritics directly following control chars like \r \b - - # -- --- --- - #we need to initialise overflow_idx before any potential row-movements - as they need to perform a loop break and force in_excess to 1 - if {$opt_overflow} { - #somewhat counterintuitively - overflow true means we can have lines as long as we want, but either way there can be excess data that needs to be thrown back to the calling loop. - set overflow_idx -1 - } else { - #overflow zero - we can't grow beyond our column width - so we get ellipsis or truncation - if {$opt_width ne "\uFFEF"} { - set overflow_idx [expr {$opt_width}] - } else { - #review - this is also the cursor position when adding a char at end of line? - set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it - } - } - # -- --- --- - - set outcols $undercols ;#leave undercols as is, outcols can potentially be appended to. - - set unapplied "" ;#if we break for move row (but not for /v ?) - set unapplied_list [list] - - set insert_lines_above 0 ;#return key - set insert_lines_below 0 - set instruction "" - - # -- --- --- - #cursor_save_dec, cursor_restore_dec etc - set cursor_restore_required 0 - set cursor_saved_attributes "" - set cursor_saved_position "" - # -- --- --- - - #set idx 0 ;# line index (cursor - 1) - #set idx [expr {$opt_colstart + $opt_colcursor} -1] - - #idx is the per column output index - set idx [expr {$opt_colcursor -1}] ;#don't use opt_colstart here - we have padded and won't start emitting until idx reaches opt_colstart-1 - #cursor_column is usually one above idx - but we have opt_colstart which is like a margin - todo: remove cursor_column from the following loop and calculate it's offset when breaking or at end. - #(for now we are incrementing/decrementing both in sync - which is a bit silly) - set cursor_column $opt_colcursor - - #idx_over is the per grapheme overlay index - set idx_over -1 - - - #movements only occur within the overlay range. - #an underlay is however not necessary.. e.g - #renderline -overflow 1 "" data - #foreach {pt code} $overmap {} - set insert_mode $opt_insert_mode ;#default 1 - set autowrap_mode $opt_autowrap_mode ;#default 1 - - #set re_mode {\x1b\[\?([0-9]*)(h|l)} ;#e.g DECAWM - #set re_col_move {\x1b\[([0-9]*)(C|D|G)$} - #set re_row_move {\x1b\[([0-9]*)(A|B)$} - #set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} ;# or "f" ? - #set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} - #set re_cursor_save {\x1b\[s$} ;#note probable incompatibility with DECSLRM (set left right margin)! - #set re_cursor_restore {\x1b\[u$} - #set re_cursor_save_dec {\x1b7$} - #set re_cursor_restore_dec {\x1b8$} - #set re_other_single {\x1b(D|M|E)$} - #set re_decstbm {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)r$} ;#DECSTBM set top and bottom margins - - #puts "-->$overlay_grapheme_control_list<--" - #puts "-->overflow_idx: $overflow_idx" - for {set gci 0} {$gci < [llength $overlay_grapheme_control_list]} {incr gci} { - set gc [lindex $overlay_grapheme_control_list $gci] - lassign $gc type item - - #emit plaintext chars first using existing SGR codes from under/over stack as appropriate - #then check if the following code is a cursor movement within the line and adjust index if so - #foreach ch $overlay_graphemes {} - switch -- $type { - g { - set ch $item - incr idx_over; #idx_over (until unapplied reached anyway) is per *grapheme* in the overlay - not per col. - if {($idx < ($opt_colstart -1))} { - incr idx [grapheme_width_cached $ch] - continue - } - #set within_undercols [expr {$idx <= [llength $undercols]-1}] ;#within our active data width - set within_undercols [expr {$idx <= $colwidth-1}] - - #https://www.enigma.com/resources/blog/the-secret-world-of-newline-characters - #\x85 NEL in the c1 control set is treated by some terminal emulators (e.g Hyper) as a newline, - #on some it's invisble but doesn't change the line, on some it's a visible glyph of width 1. - #This is hard to process in any standard manner - but I think the Hyper behaviour of doing what it was intended is perhaps most reasonable - #We will map it to the same behaviour as lf here for now... but we need also to consider the equivalent ANSI sequence: \x1bE - - set chtest [tcl::string::map [list \n \x85 \b \r \v \x7f ] $ch] - #puts --->chtest:$chtest - #specials - each shoud have it's own test of what to do if it happens after overflow_idx reached - switch -- $chtest { - "" { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - if {$idx == 0} { - #puts "---a at col 1" - #linefeed at column 1 - #leave the overflow_idx ;#? review - set instruction lf_start ;#specific instruction for newline at column 1 - priv::render_unapplied $overlay_grapheme_control_list $gci - break - } elseif {$overflow_idx != -1 && $idx == $overflow_idx} { - #linefeed after final column - #puts "---c at overflow_idx=$overflow_idx" - incr cursor_row - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 - set instruction lf_overflow ;#only special treatment is to give it it's own instruction in case caller needs to handle differently - priv::render_unapplied $overlay_grapheme_control_list $gci - break - } else { - #linefeed occurred in middle or at end of text - #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break - } - - } - "" { - #will we want/need to use raw for keypresses in terminal? (terminal with LNM in standard reset mode means enter= this is the usual config for terminals) - #So far we are assuming the caller has translated to and handle above.. REVIEW. - - #consider also the old space-carriagereturn softwrap convention used in some terminals. - #In the context of rendering to a block of text - this works similarly in that the space gets eaten so programs emitting space-cr at the terminal width col will pretty much get what they expect. - set idx [expr {$opt_colstart -1}] - set cursor_column $opt_colstart ;#? - } - "" { - #literal backspace char - not necessarily from keyboard - #review - backspace effect on double-width chars - we are taking a column-editing perspective in overtype - #(important for -transparent option - hence replacement chars for half-exposed etc) - #review - overstrike support as per nroff/less (generally considered an old technology replaced by unicode mechanisms and/or ansi SGR) - if {$idx > ($opt_colstart -1)} { - incr idx -1 - incr cursor_column -1 - } else { - set flag 0 - if $flag { - #review - conflicting requirements? Need a different sequence for destructive interactive backspace? - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction backspace_at_start - break - } - } - } - "" { - #literal del character - some terminals send just this for what is generally expected to be a destructive backspace - #We instead treat this as a pure delete at current cursor position - it is up to the repl or terminal to remap backspace key to a sequence that has the desired effect. - priv::render_delchar $idx - } - "" { - #end processing this overline. rest of line is remainder. cursor for column as is. - #REVIEW - this theoretically depends on terminal's vertical tabulation setting (name?) - #e.g it could be configured to jump down 6 rows. - #On the other hand I've seen indications that some modern terminal emulators treat it pretty much as a linefeed. - #todo? - incr cursor_row - set overflow_idx $idx - #idx_over has already been incremented as this is both a movement-control and in some sense a grapheme - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction vt - break - } - default { - if {$overflow_idx != -1} { - #review - how to check arbitrary length item such as tab is going to overflow .. before we get to overflow_idx? - #call grapheme_width_cached on each ch, or look for tab specifically as it's currently the only known reason to have a grapheme width > 2? - #we need to decide what a tab spanning the overflow_idx means and how it affects wrap etc etc - if {$idx == $overflow_idx-1} { - set owidth [grapheme_width_cached $ch] - if {$owidth == 2} { - #review split 2w overflow? - #we don't want to make the decision here to split a 2w into replacement characters at end of line and beginning of next line - #better to consider the overlay char as unable to be applied to the line - #render empty column(?) - and reduce overlay grapheme index by one so that the current ch goes into unapplied - #throwing back to caller with instruction complicates its job - but is necessary to avoid making decsions for it here. - priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - #change the overflow_idx - set overflow_idx $idx - incr idx - incr idx_over -1 ;#set overlay grapheme index back one so that sgr stack from previous overlay grapheme used - priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#note $gci-1 instead of just gci - #throw back to caller's loop - add instruction to caller as this is not the usual case - #caller may for example choose to render a single replacement char to this line and omit the grapheme, or wrap it to the next line - set instruction overflow_splitchar - break - } elseif {$owidth > 2} { - #? tab? - #TODO! - puts stderr "overtype::renderline long overtext grapheme '[ansistring VIEW -lf 1 -vt 1 $ch]' not handled" - #tab of some length dependent on tabstops/elastic tabstop settings? - } - } elseif {$idx >= $overflow_idx} { - #jmn? - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci-1]] - #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - #don't incr idx beyond the overflow_idx - #idx_over already incremented - decrement so current overlay grapheme stacks go to unapplied - incr idx_over -1 - #priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#back one index here too - priv::render_this_unapplied $overlay_grapheme_control_list $gci ;# - set instruction overflow - break - } - } else { - #review. - #This corresponds to opt_overflow being true (at least until overflow_idx is in some cases forced to a value when throwing back to calling loop) - } - - if {($do_transparency && [regexp $opt_transparent $ch])} { - #pre opt_colstart is effectively transparent (we have applied padding of required number of columns to left of overlay) - if {$idx > [llength $outcols]-1} { - lappend outcols " " - #tcl::dict::set understacks $idx [list] ;#review - use idx-1 codestack? - lset understacks $idx [list] - incr idx - incr cursor_column - } else { - #todo - punk::char::char_width - set g [lindex $outcols $idx] - set uwidth [grapheme_width_cached $g] - if {[lindex $outcols $idx] eq ""} { - #2nd col of 2-wide char in underlay - incr idx - incr cursor_column - } elseif {$uwidth == 0} { - #e.g control char ? combining diacritic ? - incr idx - incr cursor_column - } elseif {$uwidth == 1} { - set owidth [grapheme_width_cached $ch] - incr idx - incr cursor_column - if {$owidth > 1} { - incr idx - incr cursor_column - } - } elseif {$uwidth > 1} { - if {[grapheme_width_cached $ch] == 1} { - if {!$insert_mode} { - #normal singlewide transparent overlay onto double-wide underlay - set next_pt_overchar [tcl::string::index $pt_overchars $idx_over+1] ;#lookahead of next plain-text char in overlay - if {$next_pt_overchar eq ""} { - #special-case trailing transparent - no next_pt_overchar - incr idx - incr cursor_column - } else { - if {[regexp $opt_transparent $next_pt_overchar]} { - incr idx - incr cursor_column - } else { - #next overlay char is not transparent.. first-half of underlying 2wide char is exposed - #priv::render_addchar $idx $opt_exposed1 [tcl::dict::get $overstacks $idx_over] [tcl::dict::get $overstacks_gx $idx_over] $insert_mode - priv::render_addchar $idx $opt_exposed1 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - incr cursor_column - } - } - } else { - #? todo - decide what transparency even means for insert mode - incr idx - incr cursor_column - } - } else { - #2wide transparency over 2wide in underlay - review - incr idx - incr cursor_column - } - } - } - } else { - - set idxchar [lindex $outcols $idx] - #non-transparent char in overlay or empty cell - if {$idxchar eq "\u0000"} { - #empty/erased cell indicator - set uwidth 1 - } else { - set uwidth [grapheme_width_cached $idxchar] - } - if {$within_undercols} { - if {$idxchar eq ""} { - #2nd col of 2wide char in underlay - if {!$insert_mode} { - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 - #JMN - this has to expose if our startposn chopped an underlay - but not if we already overwrote the first half of the widechar underlay grapheme - #e.g renderline \uFF21\uFF21--- a\uFF23\uFF23 - #vs - # renderline -startcolumn 2 \uFF21---- \uFF23 - if {[lindex $outcols $idx-1] != ""} { - #verified it's an empty following a filled - so it's a legit underlay remnant (REVIEW - when would it not be??) - #reset previous to an exposed 1st-half - but leave understacks code as is - priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 - } - incr idx - } else { - set prevcolinfo [lindex $outcols $idx-1] - #for insert mode - first replace the empty 2ndhalf char with exposed2 before shifting it right - #REVIEW - this leaves a replacement character permanently in our columns.. but it is consistent regarding length (?) - #The alternative is to disallow insertion at a column cursor that is at 2nd half of 2wide char - #perhaps by inserting after the char - this may be worthwhile - but may cause other surprises - #It is perhaps best avoided at another level and try to make renderline do exactly as it's told - #the advantage of this 2w splitting method is that the inserted character ends up in exactly the column we expect. - priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 ;#replace not insert - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 1 ;#insert - same index - if {$prevcolinfo ne ""} { - #we've split the 2wide - it may already have been rendered as an exposed1 - but not for example if our startcolumn was current idx - priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 ;#replace not insert - } ;# else?? - incr idx - } - if {$cursor_column < [llength $outcols] || $overflow_idx == -1} { - incr cursor_column - } - } elseif {$uwidth == 0} { - #what if this is some other c0/c1 control we haven't handled specifically? - - #by emitting a preceding empty-string column - we associate whatever this char is with the preceeding non-zero-length character and any existing zero-lengths that follow it - #e.g combining diacritic - increment before over char REVIEW - #arguably the previous overchar should have done this - ie lookahead for combiners? - #if we can get a proper grapheme_split function - this should be easier to tidy up. - priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - incr cursor_column 2 - - if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { - set cursor_column [llength $outcols] - } - } elseif {$uwidth == 1} { - #includes null empty cells - set owidth [grapheme_width_cached $ch] - if {$owidth == 1} { - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - } else { - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - #if next column in underlay empty - we've overwritten first half of underlying 2wide grapheme - #replace with rhs exposure in case there are no more overlay graphemes coming - use underlay's stack - if {([llength $outcols] >= $idx +2) && [lindex $outcols $idx+1] eq ""} { - priv::render_addchar [expr {$idx+1}] $opt_exposed2 [lindex $understacks $idx+1] [lindex $understacks_gx $idx+1] $insert_mode - } - incr idx - } - if {($cursor_column < [llength $outcols]) || $overflow_idx == -1 || $test_mode} { - incr cursor_column - } - } elseif {$uwidth > 1} { - set owidth [grapheme_width_cached $ch] - if {$owidth == 1} { - #1wide over 2wide in underlay - if {!$insert_mode} { - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - incr cursor_column - priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - #don't incr idx - we are just putting a broken-indication in the underlay - which may get overwritten by next overlay char - } else { - #insert mode just pushes all to right - no exposition char here - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - incr cursor_column - } - } else { - #2wide over 2wide - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx 2 - incr cursor_column 2 - } - - if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { - set cursor_column [llength $outcols] - } - } - } else { - priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode - incr idx - incr cursor_column - if {$overflow_idx !=-1 && !$test_mode} { - #overflow - if {$cursor_column > [llength $outcols]} { - set cursor_column [llength $outcols] - } - } - } - } - } - } ;# end switch - - - } - other { - #todo - consider CSI s DECSLRM vs ansi.sys \x1b\[s - we need \x1b\[s for oldschool ansi art - but may have to enable only for that. - #we should probably therefore reverse this mapping so that x1b7 x1b8 are the primary codes for save/restore - set code [tcl::string::map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $item] - #since this element isn't a grapheme - advance idx_over to next grapheme overlay when about to fill 'unapplied' - - set matchinfo [list] - - #remap of DEC cursor_save/cursor_restore from ESC sequence to equivalent CSI - #probably not ideal - consider putting cursor_save/cursor_restore in functions so they can be called from the appropriate switch branch instead of using this mapping - #review - cost/benefit of function calls within these switch-arms instead of inline code? - - set c1 [tcl::string::index $code 0] - set c1c2c3 [tcl::string::range $code 0 2] - #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} - #tcl 8.7 - faster to use inline list than to store it in a local var outside of loop. - #(surprising - but presumably ) - set leadernorm [tcl::string::range [tcl::string::map [list\ - \x1b\[< 1006\ - \x1b\[ 7CSI\ - \x9b 8CSI\ - \x1b\] 7OSC\ - \x9d 8OSC\ - \x1b 7ESC\ - ] $c1c2c3] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars - - #we leave the tail of the code unmapped for now - switch -- $leadernorm { - 1006 { - #https://invisible-island.net/xterm/ctlseqs/ctlseqs.html - #SGR (1006) CSI < followed by colon separated encoded-button-value,px,py ordinates and final M for button press m for button release - set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 3 end]] - } - 7CSI - 7OSC { - set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 2 end]] - } - 7ESC { - set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] - } - 8CSI - 8OSC { - set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] - } - default { - #we haven't made a mapping for this - set codenorm $code - } - } - - #we've mapped 7 and 8bit escapes to values we can handle as literals in switch statements to take advantange of jump tables. - switch -- $leadernorm { - 1006 { - #TODO - # - switch -- [tcl::string::index $codenorm end] { - M { - puts stderr "mousedown $codenorm" - } - m { - puts stderr "mouseup $codenorm" - } - } - - } - {7CSI} - {8CSI} { - set param [tcl::string::range $codenorm 4 end-1] - #puts stdout "--> CSI [tcl::string::index $leadernorm 0] bit param:$param" - switch -- [tcl::string::index $codenorm end] { - D { - #Col move - #puts stdout "<-back" - #cursor back - #left-arrow/move-back when ltr mode - set num $param - if {$num eq ""} {set num 1} - - set version 2 - if {$version eq "2"} { - #todo - startcolumn offset! - if {$cursor_column - $num >= 1} { - incr idx -$num - incr cursor_column -$num - } else { - if {!$autowrap_mode} { - set cursor_column 1 - set idx 0 - } else { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - incr cursor_column -$num - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction wrapmovebackward - break - } - } - } else { - incr idx -$num - incr cursor_column -$num - if {$idx < $opt_colstart-1} { - #wrap to previous line and position cursor at end of data - set idx [expr {$opt_colstart-1}] - set cursor_column $opt_colstart - } - } - } - C { - #Col move - #puts stdout "->forward" - #todo - consider right-to-left cursor mode (e.g Hebrew).. some day. - #cursor forward - #right-arrow/move forward - set num $param - if {$num eq ""} {set num 1} - - #todo - retrict to moving 1 position past datalen? restrict to column width? - #should ideally wrap to next line when interactive and not on last row - #(some ansi art seems to expect this behaviour) - #This presumably depends on the terminal's wrap mode - #e.g DECAWM autowrap mode - # CSI ? 7 h - set: autowrap (also tput smam) - # CSI ? 7 l - reset: no autowrap (also tput rmam) - set version 2 - if {$version eq "2"} { - set max [llength $outcols] - if {$overflow_idx == -1} { - incr max - } - if {$test_mode && $cursor_column == $max+1} { - #move_forward while in overflow - incr cursor_column -1 - } - - if {($cursor_column + $num) <= $max} { - incr idx $num - incr cursor_column $num - } else { - if {$autowrap_mode} { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - #jmn - if {$idx == $overflow_idx} { - incr num - } - - #horizontal movement beyond line extent needs to wrap - throw back to caller - #we may have both overflow_rightand unapplied data - #(can have overflow_right if we were in insert_mode and processed chars prior to this movement) - #leave row as is - caller will need to determine how many rows the column-movement has consumed - incr cursor_column $num ;#give our caller the necessary info as columns from start of row - #incr idx_over - #should be gci following last one applied - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction wrapmoveforward - break - } else { - set cursor_column $max - set idx [expr {$cursor_column -1}] - } - } - } else { - if {!$opt_overflow || ($cursor_column + $num) <= [llength $outcols+1]} { - incr idx $num - incr cursor_column $num - } else { - if {!$insert_mode} { - #block editing style with arrow keys - #overtype mode - set idxstart $idx - set idxend [llength $outcols] - set moveend [expr {$idxend - $idxstart}] - if {$moveend < 0} {set moveend 0} ;#sanity? - #puts "idxstart:$idxstart idxend:$idxend outcols[llength $outcols] undercols:[llength $undercols]" - incr idx $moveend - incr cursor_column $moveend - #if {[tcl::dict::exists $understacks $idx]} { - # set stackinfo [tcl::dict::get $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext - #} else { - # set stackinfo [list] - #} - if {$idx < [llength $understacks]} { - set stackinfo [lindex $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext - } else { - set stackinfo [list] - } - if {$idx < [llength $understacks_gx]} { - #set gxstackinfo [tcl::dict::get $understacks_gx $idx] - set gxstackinfo [lindex $understacks_gx $idx] - } else { - set gxstackinfo [list] - } - #pad outcols - set movemore [expr {$num - $moveend}] - #assert movemore always at least 1 or we wouldn't be in this branch - for {set m 1} {$m <= $movemore} {incr m} { - incr idx - incr cursor_column - priv::render_addchar $idx " " $stackinfo $gxstackinfo $insert_mode - } - } else { - #normal - insert - incr idx $num - incr cursor_column $num - if {$idx > [llength $outcols]} { - set idx [llength $outcols];#allow one beyond - for adding character at end of line - set cursor_column [expr {[llength $outcols]+1}] - } - } - } - } - } - G { - #Col move - #move absolute column - #adjust to colstart - as column 1 is within overlay - #??? - set idx [expr {$param + $opt_colstart -1}] - set cursor_column $param - error "renderline absolute col move ESC G unimplemented" - } - A { - #Row move - up - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - set num $param - if {$num eq ""} {set num 1} - incr cursor_row -$num - - if {$cursor_row < 1} { - set cursor_row 1 - } - - #ensure rest of *overlay* is emitted to remainder - incr idx_over - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction up - #retain cursor_column - break - } - B { - #Row move - down - set num $param - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - #move down - if {$num eq ""} {set num 1} - incr cursor_row $num - - - incr idx_over ;#idx_over hasn't encountered a grapheme and hasn't advanced yet - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction down - #retain cursor_column - break - } - H - f { - #$re_both_move - lassign [split $param {;}] row col - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - #lassign $matchinfo _match row col - - if {$col eq ""} {set col 1} - set max [llength $outcols] - if {$overflow_idx == -1} { - incr max - } - if {$col > $max} { - set cursor_column $max - } else { - set cursor_column $col - } - set idx [expr {$cursor_column -1}] - - if {$row eq ""} {set row 1} - set cursor_row $row - if {$cursor_row < 1} { - set cursor_row 1 - } - - incr idx_over - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move - break - - } - X { - puts stderr "X - $param" - #ECH - erase character - if {$param eq "" || $param eq "0"} {set param 1}; #param=count of chars to erase - priv::render_erasechar $idx $param - #cursor position doesn't change. - } - r { - #$re_decstbm - #https://www.vt100.net/docs/vt510-rm/DECSTBM.html - #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins - lassign [split $param {;}] margin_top margin_bottom - - #todo - return these for the caller to process.. - puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" - #Also moves the cursor to col 1 line 1 of the page - set cursor_column 1 - set cursor_row 1 - - incr idx_over - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move ;#own instruction? decstbm? - break - } - s { - # - todo - make ansi.sys CSI s cursor save only apply for certain cases? - may need to support DECSLRM instead which uses same code - - #$re_cursor_save - #cursor save could come after last column - if {$overflow_idx != -1 && $idx == $overflow_idx} { - #bartman2.ans test file - fixes misalignment at bottom of dialog bubble - #incr cursor_row - #set cursor_column 1 - #bwings1.ans test file - breaks if we actually incr cursor (has repeated saves) - set cursor_saved_position [list row [expr {$cursor_row+1}] column 1] - } else { - set cursor_saved_position [list row $cursor_row column $cursor_column] - } - #there may be overlay stackable codes emitted that aren't in the understacks because they come between the last emmited character and the cursor_save control. - #we need the SGR and gx overlay codes prior to the cursor_save - - #a real terminal would not be able to know the state of the underlay.. so we should probably ignore it. - #set sgr_stack [lindex $understacks $idx] - #set gx_stack [lindex $understacks_gx $idx] ;#not actually a stack - just a boolean state (for now?) - - set sgr_stack [list] - set gx_stack [list] - - #we shouldn't need to scan for intermediate cursor save/restores - as restores would throw-back to the calling loop - so our overlay 'line' is since those. - #The overlay_grapheme_control_list had leading resets from previous lines - so we go back to the beginning not just the first grapheme. - - foreach gc [lrange $overlay_grapheme_control_list 0 $gci-1] { - lassign $gc type code - #types g other sgr gx0 - switch -- $type { - gx0 { - #code is actually a stand-in for the graphics on/off code - not the raw code - #It is either gx0_on or gx0_off - set gx_stack [list $code] - } - sgr { - #code is the raw code - if {[punk::ansi::codetype::is_sgr_reset $code]} { - #jmn - set sgr_stack [list "\x1b\[m"] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { - set sgr_stack [list $code] - lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[priv::is_sgr $code]} { - #often we don't get resets - and codes just pile up. - #as a first step to simplifying - at least remove earlier straight up dupes - set dup_posns [lsearch -all -exact $sgr_stack $code] ;#needs -exact - codes have square-brackets (glob chars) - set sgr_stack [lremove $sgr_stack {*}$dup_posns] - lappend sgr_stack $code - } - } - } - } - set cursor_saved_attributes "" - switch -- [lindex $gx_stack 0] { - gx0_on { - append cursor_saved_attributes "\x1b(0" - } - gx0_off { - append cursor_saved_attributes "\x1b(B" - } - } - #append cursor_saved_attributes [join $sgr_stack ""] - append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] - - #as there is apparently only one cursor storage element we don't need to throw back to the calling loop for a save. - - #don't incr index - or the save will cause cursor to move to the right - #carry on - - } - u { - #$re_cursor_restore - #we are going to jump somewhere.. for now we will assume another line, and process accordingly. - #The caller has the cursor_saved_position/cursor_saved_attributes if any (?review - if we always pass it back it, we could save some calls for moves in same line) - #don't set overflow at this point. The existing underlay to the right must be preserved. - #we only want to jump and render the unapplied at the new location. - - #lset overstacks $idx_over [list] - #set replay_codes_overlay "" - - #if {$cursor_saved_attributes ne ""} { - # set replay_codes_overlay $cursor_saved_attributes ;#empty - or last save if it happend in this input chunk - #} else { - #jj - #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - set replay_codes_overlay "" - #} - - #like priv::render_unapplied - but without the overlay's ansi reset or gx stacks from before the restore code - incr idx_over - - set unapplied "" - set unapplied_list [list] - foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { - lassign $gc type item - if {$type eq "gx0"} { - if {$item eq "gx0_on"} { - lappend unapplied_list "\x1b(0" - } elseif {$item eq "gx0_off"} { - lappend unapplied_list "\x1b(B" - } - } else { - lappend unapplied_list $item - } - #incr idx_over - } - set unapplied [join $unapplied_list ""] - #if the save occured within this line - that's ok - it's in the return value list and caller can prepend for the next loop. - set instruction restore_cursor - break - } - ~ { - #$re_vt_sequence - #lassign $matchinfo _match key mod - lassign [split $param {;}] key mod - - #Note that f1 to f4 show as ESCOP|Q|R|S (VT220?) but f5+ show as ESC\[15~ - # - #e.g esc \[2~ insert esc \[2;2~ shift-insert - #mod - subtract 1, and then use bitmask - #shift = 1, (left)Alt = 2, control=4, meta=8 (meta seems to do nothing on many terminals on windows? Intercepted by windows?) - #puts stderr "vt key:$key mod:$mod code:[ansistring VIEW $code]" - if {$key eq "1"} { - #home - } elseif {$key eq "2"} { - #Insert - if {$mod eq ""} { - #no modifier key - set insert_mode [expr {!$insert_mode}] - #rather than set the cursor - we return the insert mode state so the caller can decide - } - } elseif {$key eq "3"} { - #Delete - presumably this shifts other chars in the line, with empty cells coming in from the end - switch -- $mod { - "" { - priv::render_delchar $idx - } - "5" { - #ctrl-del - delete to end of word (pwsh) - possibly word on next line if current line empty(?) - } - } - } elseif {$key eq "4"} { - #End - } elseif {$key eq "5"} { - #pgup - } elseif {$key eq "6"} { - #pgDn - } elseif {$key eq "7"} { - #Home - #?? - set idx [expr {$opt_colstart -1}] - set cursor_column 1 - } elseif {$key eq "8"} { - #End - } elseif {$key eq "11"} { - #F1 - or ESCOP or e.g shift F1 ESC\[1;2P - } elseif {$key eq "12"} { - #F2 - or ESCOQ - } elseif {$key eq "13"} { - #F3 - or ESCOR - } elseif {$key eq "14"} { - #F4 - or ESCOS - } elseif {$key eq "15"} { - #F5 or shift F5 ESC\[15;2~ - } elseif {$key eq "17"} { - #F6 - } elseif {$key eq "18"} { - #F7 - } elseif {$key eq "19"} { - #F8 - } elseif {$key eq "20"} { - #F9 - } elseif {$key eq "21"} { - #F10 - } elseif {$key eq "23"} { - #F11 - } elseif {$key eq "24"} { - #F12 - } - - } - h - l { - #we are matching only last char to get to this arm - but are there other sequences ending in h|l we need to handle? - - #$re_mode if first after CSI is "?" - #some docs mention ESC=h|l - not seen on windows terminals.. review - #e.g https://www2.math.upenn.edu/~kazdan/210/computer/ansi.html - if {[tcl::string::index $codenorm 4] eq "?"} { - set num [tcl::string::range $codenorm 5 end-1] ;#param between ? and h|l - #lassign $matchinfo _match num type - switch -- $num { - 5 { - #DECSNM - reverse video - #How we simulate this to render within a block of text is an open question. - #track all SGR stacks and constantly flip based on the current SGR reverse state? - #It is the job of the calling loop to do this - so at this stage we'll just set the states - #DECAWM autowrap - if {$type eq "h"} { - #set (enable) - set reverse_mode 1 - } else { - #reset (disable) - set reverse_mode 0 - } - - } - 7 { - #DECAWM autowrap - if {$type eq "h"} { - #set (enable) - set autowrap_mode 1 - if {$opt_width ne "\uFFEF"} { - set overflow_idx $opt_width - } else { - #review - this is also the cursor position when adding a char at end of line? - set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it - } - #review - can idx ever be beyond overflow_idx limit when we change e.g with a width setting and cursor movements? presume not - but sanity check for now. - if {$idx >= $overflow_idx} { - puts stderr "renderline error - idx '$idx' >= overflow_idx '$overflow_idx' - unexpected" - } - } else { - #reset (disable) - set autowrap_mode 0 - set overflow_idx -1 - } - } - 25 { - if {$type eq "h"} { - #visible cursor - - } else { - #invisible cursor - - } - } - } - - } else { - puts stderr "overtype::renderline CSI...h|l code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" - } - } - default { - puts stderr "overtype::renderline CSI code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" - } - } - } - 7ESC { - #$re_other_single - switch -- [tcl::string::index $codenorm end] { - D { - #\x84 - #index (IND) - #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" - puts stderr "ESC D not fully implemented" - incr cursor_row - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction down - #retain cursor_column - break - } - M { - #\x8D - #Reverse Index (RI) - #vt102-docs: "Moves cursor up one line in same column. If cursor is at top margin, screen performs a scroll-down" - puts stderr "ESC M not fully implemented" - - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] - #move up - incr cursor_row -1 - if {$cursor_row < 1} { - set cursor_row 1 - } - #ensure rest of *overlay* is emitted to remainder - priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction up ;#need instruction for scroll-down? - #retain cursor_column - break - } - E { - #\x85 - #review - is behaviour different to lf? - #todo - possibly(?) same logic as handling above. i.e return instruction depends on where column_cursor is at the time we get NEL - #leave implementation until logic for is set in stone... still under review - #It's arguable NEL is a pure cursor movement as opposed to the semantic meaning of crlf or lf in a file. - # - #Next Line (NEL) "Move the cursor to the left margin on the next line. If the cursor is at the bottom margin, scroll the page up" - puts stderr "ESC E unimplemented" - - } - default { - puts stderr "overtype::renderline ESC code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" - } - } - - } - } - - #switch -regexp -matchvar matchinfo -- $code\ - #$re_mode { - #}\ - #default { - # puts stderr "overtype::renderline code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" - #} - - } - default { - #don't need to handle sgr or gx0 types - #we have our sgr gx0 codes already in stacks for each overlay grapheme - } - } - } - - #-------- - if {$opt_overflow == 0} { - #need to truncate to the width of the original undertext - #review - string_width vs printing_length here. undertext requirement to be already rendered therefore punk::char::string_width ok? - #set num_under_columns [punk::char::string_width $pt_underchars] ;#plaintext underchars - } - if {$overflow_idx == -1} { - #overflow was initially unlimited and hasn't been overridden - } else { - - } - #-------- - - - #coalesce and replay codestacks for outcols grapheme list - set outstring "" ;#output prior to overflow - set overflow_right "" ;#remainder after overflow point reached - set i 0 - set cstack [list] - set prevstack [list] - set prev_g0 [list] - #note overflow_idx may already have been set lower if we had a row move above due to \v or ANSI moves - set in_overflow 0 ;#used to stop char-width scanning once in overflow - if {$overflow_idx == 0} { - #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW - set in_overflow 1 - } - foreach ch $outcols { - #puts "---- [ansistring VIEW $ch]" - - set gxleader "" - if {$i < [llength $understacks_gx]} { - #set g0 [tcl::dict::get $understacks_gx $i] - set g0 [lindex $understacks_gx $i] - if {$g0 ne $prev_g0} { - if {$g0 eq [list "gx0_on"]} { - set gxleader "\x1b(0" - } else { - set gxleader "\x1b(B" - } - } - set prev_g0 $g0 - } else { - set prev_g0 [list] - } - - set sgrleader "" - if {$i < [llength $understacks]} { - #set cstack [tcl::dict::get $understacks $i] - set cstack [lindex $understacks $i] - if {$cstack ne $prevstack} { - if {[llength $prevstack] && ![llength $cstack]} { - #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. But why is cstack empty? - append sgrleader \033\[m - } else { - append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] - } - } - set prevstack $cstack - } else { - set prevstack [list] - } - - - - if {$in_overflow} { - if {$i == $overflow_idx} { - set 0 [lindex $understacks_gx $i] - set gxleader "" - if {$g0 eq [list "gx0_on"]} { - set gxleader "\x1b(0" - } elseif {$g0 eq [list "gx0_off"]} { - set gxleader "\x1b(B" - } - append overflow_right $gxleader - set cstack [lindex $understacks $i] - set sgrleader "" - #whether cstack is same or differs from previous char's stack - we must have an output at the start of the overflow_right - #if {[llength $prevstack] && ![llength $cstack]} { - # append sgrleader \033\[m - #} - append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] - append overflow_right $sgrleader - append overflow_right $ch - } else { - append overflow_right $gxleader - append overflow_right $sgrleader - append overflow_right $ch - } - } else { - if {$overflow_idx != -1 && $i+1 == $overflow_idx} { - #one before overflow - #will be in overflow in next iteration - set in_overflow 1 - if {[grapheme_width_cached $ch]> 1} { - #we overflowed with second-half of a double-width char - replace first-half with user-supplied exposition char (should be 1 wide) - set ch $opt_exposed1 - } - } - append outstring $gxleader - append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] - } else { - append outstring $ch - } - } - incr i - } - #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] - - set replay_codes "" - if {[llength $understacks] > 0} { - if {$overflow_idx == -1} { - #set tail_idx [tcl::dict::size $understacks] - set tail_idx [llength $understacks] - } else { - set tail_idx [llength $undercols] - } - if {$tail_idx-1 < [llength $understacks]} { - #set replay_codes [join [lindex $understacks $tail_idx-1] ""] ;#tail replay codes - set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes - } - if {$tail_idx-1 < [llength $understacks_gx]} { - set gx0 [lindex $understacks_gx $tail_idx-1] - if {$gx0 eq [list "gx0_on"]} { - #if it was on, turn gx0 off at the point we stop processing overlay - append outstring "\x1b(B" - } - } - } - if {[string length $overflow_right]} { - #puts stderr "remainder:$overflow_right" - } - #pdict $understacks - - if {[punk::ansi::ta::detect_sgr $outstring]} { - append outstring [punk::ansi::a] ;#without this - we would get for example, trailing backgrounds after rightmost column - - #close off any open gx? - #probably should - and overflow_right reopen? - } - - if {$opt_returnextra} { - #replay_codes is the codestack at the boundary - used for ellipsis colouring to match elided text - review - #replay_codes_underlay is the set of codes in effect at the very end of the original underlay - - #review - #replay_codes_overlay is the set of codes in effect at the very end of the original overlay (even if not all overlay was applied) - #todo - replay_codes for gx0 mode - - #overflow_idx may change during ansi & character processing - if {$overflow_idx == -1} { - set overflow_right_column "" - } else { - set overflow_right_column [expr {$overflow_idx+1}] - } - set result [tcl::dict::create\ - result $outstring\ - visualwidth [punk::ansi::printing_length $outstring]\ - instruction $instruction\ - stringlen [string length $outstring]\ - overflow_right_column $overflow_right_column\ - overflow_right $overflow_right\ - unapplied $unapplied\ - unapplied_list $unapplied_list\ - insert_mode $insert_mode\ - autowrap_mode $autowrap_mode\ - insert_lines_above $insert_lines_above\ - insert_lines_below $insert_lines_below\ - cursor_saved_position $cursor_saved_position\ - cursor_saved_attributes $cursor_saved_attributes\ - cursor_column $cursor_column\ - cursor_row $cursor_row\ - opt_overflow $opt_overflow\ - replay_codes $replay_codes\ - replay_codes_underlay $replay_codes_underlay\ - replay_codes_overlay $replay_codes_overlay\ - ] - if {$opt_returnextra == 1} { - return $result - } else { - #human/debug - map special chars to visual glyphs - set viewop VIEW - switch -- $opt_returnextra { - 2 { - #codes and character data - set viewop VIEWCODES ;#ansi colorisation of codes - green for SGR, blue/blue reverse for cursor_save/cursor_restore, cyan for movements, orange for others - } - 3 { - set viewop VIEWSTYLE ;#ansi colorise the characters within the output with preceding codes, stacking codes only within each dict value - may not be same SGR effect as the effect in-situ. - } - } - tcl::dict::set result result [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result result]] - tcl::dict::set result overflow_right [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result overflow_right]] - tcl::dict::set result unapplied [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied]] - tcl::dict::set result unapplied_list [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied_list]] - tcl::dict::set result replay_codes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes]] - tcl::dict::set result replay_codes_underlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_underlay]] - tcl::dict::set result replay_codes_overlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_overlay]] - tcl::dict::set result cursor_saved_attributes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result cursor_saved_attributes]] - return $result - } - } else { - return $outstring - } - #return [join $out ""] - } - - #*** !doctools - #[list_end] [comment {--- end definitions namespace overtype ---}] -} - -tcl::namespace::eval overtype::piper { - proc overcentre {args} { - if {[llength $args] < 2} { - error {usage: ?-bias left|right? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} - } - lassign [lrange $args end-1 end] over under - set argsflags [lrange $args 0 end-2] - tailcall overtype::centre {*}$argsflags $under $over - } - proc overleft {args} { - if {[llength $args] < 2} { - error {usage: ?-startcolumn ? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} - } - lassign [lrange $args end-1 end] over under - set argsflags [lrange $args 0 end-2] - tailcall overtype::left {*}$argsflags $under $over - } -} - - -# -- --- --- --- --- --- --- --- --- --- --- -proc overtype::transparentline {args} { - foreach {under over} [lrange $args end-1 end] break - set argsflags [lrange $args 0 end-2] - set defaults [tcl::dict::create\ - -transparent 1\ - -exposed 1 " "\ - -exposed 2 " "\ - ] - set newargs [tcl::dict::merge $defaults $argsflags] - tailcall overtype::renderline {*}$newargs $under $over -} -#renderline may not make sense as it is in the long run for blocks of text - but is handy in the single-line-handling form anyway. -# We are trying to handle ansi codes in a block of text which is acting like a mini-terminal in some sense. -#We can process standard cursor moves such as \b \r - but no way to respond to other cursor movements e.g moving to other lines. -# -tcl::namespace::eval overtype::piper { - proc renderline {args} { - if {[llength $args] < 2} { - error {usage: ?-start ? ?-transparent [0|1|]? ?-overflow [1|0]? overtext pipelinedata} - } - foreach {over under} [lrange $args end-1 end] break - set argsflags [lrange $args 0 end-2] - tailcall overtype::renderline {*}$argsflags $under $over - } -} -interp alias "" piper_renderline "" overtype::piper::renderline - -#intended for single grapheme - but will work for multiple -#cannot contain ansi or newlines -#(a cache of ansifreestring_width calls - as these are quite regex heavy) -proc overtype::grapheme_width_cached {ch} { - variable grapheme_widths - if {[tcl::dict::exists $grapheme_widths $ch]} { - return [tcl::dict::get $grapheme_widths $ch] - } - set width [punk::char::ansifreestring_width $ch] - tcl::dict::set grapheme_widths $ch $width - return $width -} - - - -proc overtype::test_renderline {} { - set t \uFF5E ;#2-wide tilde - set u \uFF3F ;#2-wide underscore - set missing \uFFFD - return [list $t $u A${t}B] -} - -#maintenance warning -#same as textblock::size - but we don't want that circular dependency -#block width and height can be tricky. e.g \v handled differently on different terminal emulators and can affect both -proc overtype::blocksize {textblock} { - if {$textblock eq ""} { - return [tcl::dict::create width 0 height 1] ;#no such thing as zero-height block - for consistency with non-empty strings having no line-endings - } - if {[tcl::string::first \t $textblock] >= 0} { - if {[info exists punk::console::tabwidth]} { - set tw $::punk::console::tabwidth - } else { - set tw 8 - } - set textblock [textutil::tabify::untabify2 $textblock $tw] - } - #stripansi on entire block in one go rather than line by line - result should be the same - review - make tests - if {[punk::ansi::ta::detect $textblock]} { - set textblock [punk::ansi::stripansi $textblock] - } - if {[tcl::string::last \n $textblock] >= 0} { - set num_le [expr {[tcl::string::length $textblock]-[tcl::string::length [tcl::string::map {\n {}} $textblock]]}] ;#faster than splitting into single-char list - set width [tcl::mathfunc::max {*}[lmap v [split $textblock \n] {::punk::char::ansifreestring_width $v}]] - } else { - set num_le 0 - set width [punk::char::ansifreestring_width $textblock] - } - #our concept of block-height is likely to be different to other line-counting mechanisms - set height [expr {$num_le + 1}] ;# one line if no le - 2 if there is one trailing le even if no data follows le - - return [tcl::dict::create width $width height $height] ;#maintain order in 'image processing' standard width then height - caller may use lassign [dict values [blocksize ]] width height -} - -tcl::namespace::eval overtype::priv { - variable cache_is_sgr [tcl::dict::create] - - #we are likely to be asking the same question of the same ansi codes repeatedly - #caching the answer saves some regex expense - possibly a few uS to lookup vs under 1uS - #todo - test if still worthwhile after a large cache is built up. (limit cache size?) - proc is_sgr {code} { - variable cache_is_sgr - if {[tcl::dict::exists $cache_is_sgr $code]} { - return [tcl::dict::get $cache_is_sgr $code] - } - set answer [punk::ansi::codetype::is_sgr $code] - tcl::dict::set cache_is_sgr $code $answer - return $answer - } - proc render_unapplied {overlay_grapheme_control_list gci} { - upvar idx_over idx_over - upvar unapplied unapplied - upvar unapplied_list unapplied_list ;#maintaining as a list allows caller to utilize it without having to re-split - upvar overstacks overstacks - upvar overstacks_gx overstacks_gx - upvar overlay_grapheme_control_stacks og_stacks - - #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] - set unapplied "" - set unapplied_list [list] - #append unapplied [join [lindex $overstacks $idx_over] ""] - #append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $overstacks $idx_over]] - set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] - if {$sgr_merged ne ""} { - lappend unapplied_list $sgr_merged - } - switch -- [lindex $overstacks_gx $idx_over] { - "gx0_on" { - lappend unapplied_list "\x1b(0" - } - "gx0_off" { - lappend unapplied_list "\x1b(B" - } - } - - foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { - lassign $gc type item - #types g other sgr gx0 - if {$type eq "gx0"} { - if {$item eq "gx0_on"} { - lappend unapplied_list "\x1b(0" - } elseif {$item eq "gx0_off"} { - lappend unapplied_list "\x1b(B" - } - } else { - lappend unapplied_list $item - } - } - set unapplied [join $unapplied_list ""] - } - - #clearer - renders the specific gci forward as unapplied - prefixed with it's merged sgr stack - proc render_this_unapplied {overlay_grapheme_control_list gci} { - upvar idx_over idx_over - upvar unapplied unapplied - upvar unapplied_list unapplied_list - upvar overstacks overstacks - upvar overstacks_gx overstacks_gx - upvar overlay_grapheme_control_stacks og_stacks - - #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] - set unapplied "" - set unapplied_list [list] - - set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] - if {$sgr_merged ne ""} { - lappend unapplied_list $sgr_merged - } - switch -- [lindex $overstacks_gx $idx_over] { - "gx0_on" { - lappend unapplied_list "\x1b(0" - } - "gx0_off" { - lappend unapplied_list "\x1b(B" - } - } - - foreach gc [lrange $overlay_grapheme_control_list $gci end] { - lassign $gc type item - #types g other sgr gx0 - if {$type eq "gx0"} { - if {$item eq "gx0_on"} { - lappend unapplied_list "\x1b(0" - } elseif {$item eq "gx0_off"} { - lappend unapplied_list "\x1b(B" - } - } else { - lappend unapplied_list $item - } - } - set unapplied [join $unapplied_list ""] - } - proc render_delchar {i} { - upvar outcols o - upvar understacks ustacks - upvar understacks_gx gxstacks - set nxt [llength $o] - if {$i < $nxt} { - set o [lreplace $o $i $i] - set ustacks [lreplace $ustacks $i $i] - set gxstacks [lreplace $gxstacks $i $i] - } else { - - } - } - proc render_erasechar {i count} { - upvar outcols o - upvar understacks ustacks - upvar understacks_gx gxstacks - #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. - if {![tcl::string::is integer -strict $count] || $count < 1} { - error "render_erasechar count must be integer >= 1" - } - set start $i - set end [expr {$i + $count -1}] - #we restrict ECH to current line - as some terminals do - review - is that the only way it's implemented? - if {$i > [llength $o]-1} { - return - } - if {$end > [llength $o]-1} { - set end [expr {[llength $o]-1}] - } - set num [expr {$end - $start + 1}] - set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] - return - } - proc render_setchar {i c } { - upvar outcols o - lset o $i $c - } - #is actually addgrapheme? - proc render_addchar {i c sgrstack gx0stack {insert_mode 0}} { - upvar outcols o - upvar understacks ustacks - upvar understacks_gx gxstacks - - if 0 { - if {$c eq "c"} { - puts "i:$i c:$c sgrstack:[ansistring VIEW $sgrstack]" - puts "understacks:[ansistring VIEW $ustacks]" - upvar overstacks overstacks - puts "overstacks:[ansistring VIEW $overstacks]" - puts "info level 0:[info level 0]" - } - } - - set nxt [llength $o] - if {!$insert_mode} { - if {$i < $nxt} { - #These lists must always be in sync - lset o $i $c - } else { - lappend o $c - } - if {$i < [llength $ustacks]} { - lset ustacks $i $sgrstack - lset gxstacks $i $gx0stack - } else { - lappend ustacks $sgrstack - lappend gxstacks $gx0stack - } - } else { - #insert of single-width vs double-width when underlying is double-width? - if {$i < $nxt} { - set o [linsert $o $i $c] - } else { - lappend o $c - } - if {$i < [llength $ustacks]} { - set ustacks [linsert $ustacks $i $sgrstack] - set gxstacks [linsert $gxstacks $i $gx0stack] - } else { - lappend ustacks $sgrstack - lappend gxstacks $gx0stack - } - } - } - -} - - - -# -- --- --- --- --- --- --- --- --- --- --- -tcl::namespace::eval overtype { - interp alias {} ::overtype::center {} ::overtype::centre -} - -# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Ready -package provide overtype [tcl::namespace::eval overtype { - variable version - set version 1.6.4 -}] -return - -#*** !doctools -#[manpage_end] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm index 492341d6..3c200d26 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm @@ -439,7 +439,8 @@ tcl::namespace::eval overtype { if {[llength $lflines]} { lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] } - set inputchunks $lflines[unset lflines] + #set inputchunks $lflines[unset lflines] + set inputchunks [lindex [list $lflines [unset lflines]] 0] } } @@ -2115,6 +2116,7 @@ tcl::namespace::eval overtype { if {[llength $undercols]< $opt_width} { set diff [expr {$opt_width- [llength $undercols]}] if {$diff > 0} { + #set undercols [list {*}$undercols {*}[lrepeat $diff "\u0000"]] ;#2024 - much slower lappend undercols {*}[lrepeat $diff "\u0000"] lappend understacks {*}[lrepeat $diff $cs] lappend understacks_gx {*}[lrepeat $diff $gs] @@ -3889,7 +3891,19 @@ tcl::namespace::eval overtype { #OSC 4 - set colour palette #can take multiple params #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ - set params [tcl::string::range $code_content 1 end] + set params [tcl::string::range $code_content 2 end] ;#strip 4 and first semicolon + set cmap [dict create] + foreach {cnum spec} [split $params {;}] { + if {$cnum >= 0 and $cnum <= 255} { + #todo - parse spec from names like 'red' to RGB + #todo - accept rgb:ab/cd/ef as well as rgb:/a/b/c (as alias for aa/bb/cc) + #also - what about rgb:abcd/defg/hijk and 12-bit abc/def/ghi ? + dict set cmap $cnum $spec + } else { + #todo - log + puts stderr "overtype::renderline OSC 4 set colour palette - bad color number: $cnum must be from 0 to 255. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm index e367ce9e..887888e8 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm @@ -183,7 +183,9 @@ namespace eval punk::console { variable previous_stty_state_$channel set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] eq ""} { - set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + if {[catch {{*}$sttycmd -g <@$channel} previous_stty_state_$channel]} { + set previous_stty_state_$channel "" + } } exec {*}$sttycmd raw -echo <@$channel @@ -253,13 +255,21 @@ namespace eval punk::console { return "line" } } elseif {$raw_or_line eq "raw"} { - punk::console::enableRaw + if {[catch { + punk::console::enableRaw + } errM]} { + puts stderr "Warning punk::console::enableRaw failed - $errM" + } if {[can_ansi]} { punk::console::enableVirtualTerminal both } } elseif {$raw_or_line eq "line"} { #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) - punk::console::disableRaw + if {[catch { + punk::console::disableRaw + } errM]} { + puts stderr "Warning punk::console::disableRaw failed - $errM" + } if {[can_ansi]} { punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc punk::console::enableVirtualTerminal output ;#display/use ansi codes @@ -290,12 +300,15 @@ namespace eval punk::console { set loadstate [zzzload::pkg_require twapi] #loadstate could also be stuck on loading? - review - zzzload not very ripe - #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. - + #Twapi can be relatively slow to load (on some systems) - can be 1s plus in some cases - and much longer if there are disk performance issues. if {$loadstate ni [list failed]} { + #possibly still 'loading' #review zzzload usage #puts stdout "=========== console loading twapi =============" - zzzload::pkg_wait twapi + set loadstate [zzzload::pkg_wait twapi] ;#can return 'failed' will return version if already loaded or loaded during wait + } + + if {$loadstate ni [list failed]} { package require twapi ;#should be fast once twapi dll loaded in zzzload thread set ::punk::console::has_twapi 1 @@ -523,6 +536,9 @@ namespace eval punk::console { set is_raw 0 return [list stdin [list from $oldmode to $newmode]] } elseif {[set sttycmd [auto_execok stty]] ne ""} { + #stty can return info on windows - but doesn't seem to be able to set anything. + #review - is returned info even valid? + set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] ne ""} { exec {*}$sttycmd [set previous_stty_state_$channel] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm index 63f32dee..872e4807 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -339,6 +339,92 @@ namespace eval punk::lib { set has_twapi [expr {![catch {package require twapi}]}] } + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + proc lswap {lvar a z} { + upvar $lvar l + if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { + #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + set a_index [lindex_resolve $l $a] + set a_msg "" + switch -- $a_index { + -2 { + "$a is greater th + } + -3 { + } + } + error "lswap cannot indices $a and $z $a is out of range" + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} #*** !doctools #[subsection {Namespace punk::lib}] @@ -347,6 +433,7 @@ namespace eval punk::lib { if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to} { lseq $from $to @@ -1009,24 +1096,28 @@ namespace eval punk::lib { } set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds - if {${lower_resolve} == -1} { + if {${lower_resolve} == -2} { + ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max - } elseif {$lower_resolve == -2} { + } elseif {$lower_resolve == -3} { + ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve $dval $b] - if {$upper == -2} { + if {$upper == -3} { + ##x #upper bound is below list range - - if {$lower_resolve >=-1} { + if {$lower_resolve >=-2} { + ##x set upper 0 } else { continue } - } elseif {$upper == -1} { + } elseif {$upper == -2} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists @@ -1669,7 +1760,8 @@ namespace eval punk::lib { error "bad expression '$expression': must be integer?\[+-\]integer?" } } - + + # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bunds on upper vs lower side proc lindex_resolve {list index} { #*** !doctools #[call [fun lindex_resolve] [arg list] [arg index]] @@ -1679,11 +1771,13 @@ namespace eval punk::lib { #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: - #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) - #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] a) -3 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -2 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr #if {![llength $list]} { @@ -1694,9 +1788,9 @@ namespace eval punk::lib { if {[string is integer -strict $index]} { #can match +i -i if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } else { #integer may still have + sign - normalize with expr return [expr {$index}] @@ -1708,14 +1802,14 @@ namespace eval punk::lib { set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { - return -1 + return -2 } } else { - #end + #index is 'end' set index [expr {[llength $list]-1}] if {$index < 0} { - #special case - end with empty list - treat end like a positive number out of bounds - return -1 + #special case - 'end' with empty list - treat end like a positive number out of bounds + return -2 } else { return $index } @@ -1723,7 +1817,7 @@ namespace eval punk::lib { if {$offset == 0} { set index [expr {[llength $list]-1}] if {$index < 0} { - return -1 ;#special case + return -2 ;#special case as above } else { return $index } @@ -1732,7 +1826,7 @@ namespace eval punk::lib { set index [expr {([llength $list]-1) - $offset}] } if {$index < 0} { - return -2 + return -3 } else { return $index } @@ -1753,26 +1847,50 @@ namespace eval punk::lib { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } return $index } } } - proc lindex_resolve2 {list index} { - #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + proc lindex_resolve_basic {list index} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg list] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -1 for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ + # - which #for {set i 0} {$i < [llength $list]} {incr i} { # lappend indices $i #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < 0 || ($index >= [llength $list])} { + #even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } if {[llength $list]} { set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + #if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) } else { set indices [list] } set idx [lindex $indices $index] if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end return -1 } else { return $idx @@ -2334,13 +2452,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var - proc swapnumvars {namea nameb} { - upvar $namea a $nameb b - set a [expr {$a ^ $b}] - set b [expr {$a ^ $b}] - set a [expr {$a ^ $b}] - } #e.g linesort -decreasing $data proc linesort {args} { @@ -2956,7 +3067,7 @@ namespace eval punk::lib { # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { - set number [punk::objclone $unformattednumber] + set number [objclone $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm index 932c1db6..806b172e 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm @@ -4,6 +4,7 @@ package provide punk::mix::base [namespace eval punk::mix::base { }] package require punk::path +package require punk::lib ;#format_number etc #base internal plumbing functions namespace eval punk::mix::base { @@ -657,16 +658,38 @@ namespace eval punk::mix::base { #temp emission to stdout.. todo - repl telemetry channel puts stdout "cksum_path: creating temporary tar archive for $path" - puts stdout " at: $archivename .." - tar::create $archivename $target + puts -nonewline stdout " at: $archivename ..." + set tsstart [clock millis] + if {[set tarpath [auto_execok tar]] ne ""} { + #using an external binary is *significantly* faster than tar::create - but comes with some risks + #review - need to check behaviour/flag variances across platforms + #don't use -z flag. On at least some tar versions the zipped file will contain a timestamped subfolder of filename.tar - which ruins the checksum + #also - tar is generally faster without the compression (although this may vary depending on file size and disk speed?) + exec {*}$tarpath -cf $archivename $target ;#{*} needed in case spaces in tarpath + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar -cf done ($ms ms)" + } else { + set tsstart [clock millis] ;#don't include auto_exec search time for tar::create + tar::create $archivename $target + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar::create done ($ms ms)" + puts stdout " NOTE: install tar executable for potentially *much* faster directory checksum processing" + } + if {$ftype eq "file"} { - set sizeinfo "(size [file size $target])" + set sizeinfo "(size [punk::lib::format_number [file size $target]] bytes)" } else { - set sizeinfo "(file type $ftype - size unknown)" + set sizeinfo "(file type $ftype - tarred size [punk::lib::format_number [file size $archivename]] bytes)" } - puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set tsstart [clock millis] + puts -nonewline stdout "cksum_path: calculating cksum for $target $sizeinfo ... " set cksum [{*}$cksum_command $archivename] - #puts stdout "cksum_path: cleaning up.. " + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " cksum done ($ms ms)" + puts stdout " cksum: $cksum" file delete -force $archivename cd $startdir diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm index 9afc685c..80cab2a7 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm @@ -157,6 +157,9 @@ namespace eval punk::mix::commandset::project { set opt_force [dict get $opts -force] set opt_confirm [string tolower [dict get $opts -confirm]] # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- set opt_modules [dict get $opts -modules] if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { #if not specified - add a single module matching project name @@ -169,9 +172,6 @@ namespace eval punk::mix::commandset::project { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_layout [dict get $opts -layout] - set opt_update [dict get $opts -update] - # -- --- --- --- --- --- --- --- --- --- --- --- --- #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm index cf0bf70c..10250a9b 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1707,6 +1707,7 @@ tcl::namespace::eval punk::ns { lassign [internal::get_run_opts {-vars -nowarnings} {} $args] _r runopts _c cmdargs set use_vars [expr {"-vars" in $runopts}] set no_warnings [expr {"-nowarnings" in $runopts}] + set ver "" #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns @@ -1717,15 +1718,68 @@ tcl::namespace::eval punk::ns { } default { if {[string match ::* $pkg_or_existing_ns]} { + set pkg_unqualified [string range $pkg_or_existing_ns 2 end] if {![tcl::namespace::exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + set ver [package require $pkg_unqualified] } else { set ver "" } set ns $pkg_or_existing_ns } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns + set pkg_unqualified $pkg_or_existing_ns + set ver [package require $pkg_unqualified] + set ns ::$pkg_unqualified + } + #some packages don't create their namespace immediately and/or don't populate it with commands and instead put entries in ::auto_index + set previous_command_count 0 + if {[namespace exists $ns]} { + set previous_command_count [llength [info commands ${ns}::*]] + } + + + #also if a sub package was loaded first - then the namespace for the base or lower package may exist but have no commands + #for the purposes of pkguse - which most commonly interactive - we want the namespace populated + #It may still not be *fully* populated because we stop at first source that adds commands - REVIEW + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + + if {!$ns_populated} { + #we will catch-run an auto_index entry if any + #auto_index entry may or may not be prefixed with :: + set keys [list] + #first look for exact pkg_unqualified and ::pkg_unqualified + #leave these at beginning of keys list + if {[array exists ::auto_index($pkg_unqualified)]} { + lappend keys $pkg_unqualified + } + if {[array exists ::auto_index(::$pkg_unqualified)]} { + lappend keys ::$pkg_unqualified + } + #as auto_index is an array - we could get keys in arbitrary order + set matches [lsort [array names ::auto_index ${pkg_unqualified}::*]] + lappend keys {*}$matches + set matches [lsort [array names ::auto_index ::${pkg_unqualified}::*]] + lappend keys {*}$matches + set ns_populated 0 + set i 0 + set already_sourced [list] ;#often multiple triggers for the same source - don't waste time re-sourcing + set ns_depth [llength [punk::ns::nsparts [string trimleft $ns :]]] + while {!$ns_populated && $i < [llength $keys]} { + #todo - skip sourcing deeper entries from a subpkg which may have been loaded earlier than the base + #e.g if we are loading ::x::y + #only source for keys the same depth, or one deeper ie ::x::y, x::y, ::x::y::z not ::x or ::x::y::z::etc + set k [lindex $keys $i] + set k_depth [llength [punk::ns::nsparts [string trimleft $k :]]] + if {$k_depth == $ns_depth || $k_depth == $ns_depth + 1} { + set auto_source [set ::auto_index($k)] + if {$auto_source ni $already_sourced} { + uplevel 1 $auto_source + lappend already_sourced $auto_source + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + } + } + incr i + } + } } } @@ -1799,7 +1853,13 @@ tcl::namespace::eval punk::ns { return $out } } else { - error "Namespace $ns not found." + if {$ver eq ""} { + error "Namespace $ns not found. No package version found." + } else { + set out "(no package namespace found) remaining in [uplevel 1 {namespace current}]" + append out \n $ver + return $out + } } return $out } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm index 2cb5fd1d..e056b14a 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm @@ -468,7 +468,7 @@ namespace eval punk::repo { set path [string trim [string range $ln [string length "MISSING "] end]] dict set pathdict $path "missing" } - "EXTRA * " { + "EXTRA *" { #fossil will explicitly list files in a new folder - as opposed to git which shows just the folder set path [string trim [string range $ln [string length "EXTRA "] end]] dict set pathdict $path "extra" diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm index 4ea2ce3d..8405fae7 100644 Binary files a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm and b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm differ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm index d85d4416..3e13e75d 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm @@ -19,12 +19,20 @@ #*** !doctools #[manpage_begin tomlish_module_tomlish 0 1.1.1] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] #[require tomlish] -#[keywords module] +#[keywords module parsing toml configuration] #[description] -#[para] - +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -71,17 +79,41 @@ package require logger namespace eval tomlish { namespace export {[a-z]*}; # Convention: export all lowercase variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + #ARRAY is analogous to a Tcl list #TABLE is analogous to a Tcl dict #WS = inline whitespace - #KEYVAL = bare key and value - #QKEYVAL = quoted key and value + #KEY = bare key and value + #QKEY = double quoted key and value + #SQKEY = single quoted key and value #ITABLE = inline table (*can* be anonymous table) # inline table values immediately create a table with the opening brace # inline tables are fully defined between their braces, as are dotted-key subtables defined within # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained - set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) set min_int -9223372036854775808 ;#-2^63 @@ -114,10 +146,13 @@ namespace eval tomlish { #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error ">>>_get_keyval_value doesn't seem to be a properly structured { = } list" + } foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -127,10 +162,10 @@ namespace eval tomlish { } } if {!$found_value} { - error "Failed to find value element in KEYVAL. '$keyval_element'" + error "Failed to find value element in KEY. '$keyval_element'" } if {$found_value > 1} { - error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + error "Found multiple value elements in KEY, expected exactly one. '$keyval_element'" } switch -exact -- $type { @@ -141,16 +176,28 @@ namespace eval tomlish { STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - LITSTRING { + LITERAL - LITERALPART { #REVIEW set result [list type $type value $value] } - TABLE - ITABLE - ARRAY - MULTISTRING { - #jmn2024 - added ITABLE - review + TABLE { + #invalid? + error "_get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::get_dict [list $found_sub]] + } + ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! set result [list type $type value [::tomlish::get_dict [list $found_sub]]] } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } default { error "Unexpected value type '$type' found in keyval '$keyval_element'" } @@ -158,6 +205,48 @@ namespace eval tomlish { return $result } + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "_get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } #get_dict is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. # get_dict is primarily for reading toml data. @@ -193,9 +282,12 @@ namespace eval tomlish { set tag [lindex $item 0] #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL { + KEY - QKEY - SQKEY { log::debug "--> processing $tag: $item" set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } #!todo - normalize key. (may be quoted/doublequoted) if {[dict exists $datastructure $key]} { @@ -206,6 +298,43 @@ namespace eval tomlish { set keyval_dict [_get_keyval_value $item] dict set datastructure $key $keyval_dict } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "get_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } TABLE { set tablename [lindex $item 1] set tablename [::tomlish::utils::tablename_trim $tablename] @@ -220,21 +349,20 @@ namespace eval tomlish { #toml spec rule - all segments mst be non-empty #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - set key_hierarchy [list] - set key_hierarchy_raw [list] + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] foreach rawseg $name_segments { set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes - set c1 [::string index $rawseg 0] - set c2 [::string index $rawseg end] + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes are processed within it. - set seg [::string range $rawseg 1 end-1] + set seg [tcl::string::range $rawseg 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. - set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] - #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] } else { set seg $rawseg } @@ -243,15 +371,16 @@ namespace eval tomlish { #if {$rawseg eq ""} { # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" #} - lappend key_hierarchy $seg - lappend key_hierarchy_raw $rawseg + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg - if {[dict exists $datastructure {*}$key_hierarchy]} { + if {[dict exists $datastructure {*}$table_key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, - # but not if it was defined as a keyval/qkeyval + # but not if it was defined as a key/qkey/skey ? - set testkey [join $key_hierarchy_raw .] - set testkey_length [llength $key_hierarchy_raw] + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { set found_testkey 1 @@ -267,11 +396,12 @@ namespace eval tomlish { # instead compare {a b.c d} with {a b c d} # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #dots within table segments might seem like an 'edge case' - # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" if {$testkey eq $seen_match} { set found_testkey 1 } @@ -279,35 +409,81 @@ namespace eval tomlish { } if {$found_testkey == 0} { - #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg } } } + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } #We must do this after the key-collision test above! lappend tablenames_seen $tablename - - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -320,16 +496,36 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { - set keyval_key [lindex $element 1] + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure $keyval_key $keyval_dict + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -350,12 +546,16 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ARRAY - MULTISTRING { + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } - WS - SEP { - #ignore whitespace and commas + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments } default { error "Unexpected value type '$type' found in array" @@ -363,6 +563,49 @@ namespace eval tomlish { } } } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } MULTISTRING { #triple dquoted string log::debug "--> processing multistring: $item" @@ -372,7 +615,14 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "doulbe quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } @@ -533,8 +783,8 @@ namespace eval tomlish::encode { proc float {f} { #convert any non-lower case variants of special values to lowercase for Toml - if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { - return [list FLOAT [string tolower $f]] + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] @@ -553,44 +803,56 @@ namespace eval tomlish::encode { proc boolean {b} { #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false - if {![string is boolean -strict $b]} { + if {![tcl::string::is boolean -strict $b]} { error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { if {[expr {$b && 1}]} { - return [list BOOL true] + return [::list BOOL true] } else { - return [list BOOL false] + return [::list BOOL false] } } } + + #TODO #Take tablename followed by - # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types proc table {name args} { set pairs [list] foreach t $args { - if {[llength $t] == 3} { - if {[lindex $t 0] ne "KEYVAL"} { - error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" } - lappend pairs $t + lappend pairs [list KEY $keystr = $valuepart] } elseif {[llength $t] == 2} { #!todo - type heuristics lassign $t n v - lappend pairs [list KEYVAL $n [list STRING $v]] + lappend pairs [list KEY $n = [list STRING $v]] } else { - error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" foreach part [lrange $item 1 end] { - append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$litstring''' + append toml '''$literal''' } INT - BOOL - @@ -777,6 +1079,7 @@ namespace eval tomlish::decode { # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. # For this reason, we also do absolutely no line-ending transformations based on platform. # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { #*** !doctools #[call [fun toml] [arg s]] @@ -835,11 +1138,12 @@ namespace eval tomlish::decode { set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. - set state "key-space" - ::tomlish::parse::spacestack push {space key-space} + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) set linenum 1 - + + set ::tomlish::parse::state_list [list] try { while {$r} { set r [::tomlish::parse::tok $s] @@ -851,31 +1155,162 @@ namespace eval tomlish::decode { #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" #puts "-->tok: $tok tokenType='$tokenType'" set prevstate $state - ##### - set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below - set state $nextstate - if {$state eq "err"} { - error "State error - aborting parse. [tomlish::parse::report_line]" + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) } - - if {$last_space_action eq "pop"} { + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } newline { incr linenum lappend v($nest) [list NEWLINE $tok] } tablename { #note: a tablename only 'pops' if we are greater than zero - error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" } tablearrayname { #!review - tablearrayname different to tablename regarding push/pop? #note: a tablename only 'pops' if we are greater than zero - error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" } endarray { #nothing to do here. @@ -885,29 +1320,74 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - puts stderr "endinlinetable" + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } endmultiquote { - puts stderr "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" } default { - error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } - set parentlevel [expr {$nest -1}] - lappend v($parentlevel) [set v($nest)] + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + incr nest -1 } elseif {$last_space_action eq "push"} { + set prevnest $nest incr nest 1 set v($nest) [list] # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } barekey { - set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" } quotedkey - itablequotedkey { - set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! @@ -922,7 +1402,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -940,21 +1420,31 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - puts stderr "push trigger tokenType startmultiquote (todo)" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE - #JMN ??? - #set next_tokenType_known 1 - #::tomlish::parse::set_tokenType "multistring" - #set tok "" + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL } default { - error "push trigger tokenType '$tokenType' not yet implemented" + error "---- push trigger tokenType '$tokenType' not yet implemented" } } } else { #no space level change switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } starttablename { #$tok is triggered by the opening bracket and sends nothing to output } @@ -962,40 +1452,69 @@ namespace eval tomlish::decode { #$tok is triggered by the double opening brackets and sends nothing to output } tablename - tablenamearray { - error "did not expect 'tablename/tablearrayname' without space level change" + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" #set v($nest) [list TABLE $tok] } endtablename - endtablearrayname { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startlinetable without space level change" + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } startquote { - switch -exact -- $nextstate { - string { + switch -exact -- $newstate { + string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quotedkey { + quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "quotedkey" set tok "" } - itablequotedkey { + itable-quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablequotedkey" set tok "" } default { - error "startquote switch case not implemented for nextstate: $nextstate" + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startmultiquote { #review - puts stderr "no space level change - got startmultiquote" + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" set next_tokenType_known 1 ::tomlish::parse::set_tokenType "stringpart" set tok "" @@ -1004,27 +1523,53 @@ namespace eval tomlish::decode { #nothing to do? set tok "" } + endsquote { + set tok "" + } endmultiquote { #JMN!! set tok "" } string { - lappend v($nest) [list STRING $tok] + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes } - stringpart { - lappend v($nest) [list STRINGPART $tok] + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } } multistring { #review lappend v($nest) [list MULTISTRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST } itablequotedkey { } - untyped-value { + untyped_value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -1035,9 +1580,10 @@ namespace eval tomlish::decode { } elseif {[::tomlish::utils::is_datetime $tok]} { set tag DATETIME } else { - error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } lappend v($nest) [list $tag $tok] + } comment { #puts stdout "----- comment token returned '$tok'------" @@ -1068,17 +1614,18 @@ namespace eval tomlish::decode { #!todo - check previous tokens are complete/valid? } default { - error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" } } } if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } - if {$state eq "end"} { + if {$state eq "end-state"} { break } @@ -1108,8 +1655,6 @@ namespace eval tomlish::decode { } finally { set is_parsing 0 } - - return $v(0) } @@ -1136,31 +1681,84 @@ namespace eval tomlish::utils { set segments [tablename_split $tablename false] set trimmed_segments [list] foreach seg $segments { - lappend trimmed_segments [::string trim $seg [list " " \t]] + lappend trimmed_segments [::string trim $seg " \t"] } return [join $trimmed_segments .] } + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + #utils::tablename_split proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set i 0 - set sLen [::string length $tablename] + set sLen [tcl::string::length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" - for {} {$i < $sLen} {} { + for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { - set lastChar [::string index $tablename [expr {$i - 1}]] + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $tablename $i] - incr i + set c [tcl::string::index $tablename $i] if {$c eq "."} { switch -exact -- $mode { @@ -1188,7 +1786,7 @@ namespace eval tomlish::utils { } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { - if {[::string trim $seg] ne ""} { + if {[tcl::string::trim $seg] ne ""} { #we don't allow a quote in the middle of a bare key error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" } @@ -1240,7 +1838,7 @@ namespace eval tomlish::utils { } append seg $c } - if {$i == $sLen} { + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { @@ -1251,13 +1849,13 @@ namespace eval tomlish::utils { if {$normalize} { lappend segments $seg } else { - lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong } } litquoted { - set trimmed_seg [::string trim $seg] - if {[::string index $trimmed_seg end] ne "\'"} { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" } lappend segments $seg @@ -1275,14 +1873,14 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg [list " " \t]] + set trimmed [tcl::string::trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" #} if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename'" + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" } } return $segments @@ -1294,7 +1892,7 @@ namespace eval tomlish::utils { # is a valid 'unicode scalar value' # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} - if {[::string match {\\u*} $slashu]} { + if {[tcl::string::match {\\u*} $slashu]} { set exp {^\\u([0-9a-fA-F]{4}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %4x dec] != 1} { @@ -1306,7 +1904,7 @@ namespace eval tomlish::utils { } else { return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] } - } elseif {[::string match {\\U*} $slashu]} { + } elseif {[tcl::string::match {\\U*} $slashu]} { set exp {^\\U([0-9a-fA-F]{8}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %8x dec] != 1} { @@ -1340,7 +1938,7 @@ namespace eval tomlish::utils { set buffer4 "" ;#buffer for 4 hex characters following a \u set buffer8 "" ;#buffer for 8 hex characters following a \u - set sLen [::string length $str] + set sLen [tcl::string::length $str] #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc set slash_active 0 @@ -1352,12 +1950,12 @@ namespace eval tomlish::utils { set i 0 for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [::string index $str [expr {$i - 1}]] + set lastChar [tcl::string::index $str [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $str $i] + set c [tcl::string::index $str $i] ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { @@ -1380,10 +1978,10 @@ namespace eval tomlish::utils { } } else { if {$unicode4_active} { - if {[::string length $buffer4] < 4} { + if {[tcl::string::length $buffer4] < 4} { append buffer4 $c } - if {[::string length $buffer4] == 4} { + if {[tcl::string::length $buffer4] == 4} { #we have a \uHHHH to test set unicode4_active 0 set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] @@ -1394,10 +1992,10 @@ namespace eval tomlish::utils { } } } elseif {$unicode8_active} { - if {[::string length $buffer8] < 8} { + if {[tcl::string::length $buffer8] < 8} { append buffer8 $c } - if {[::string length $buffer8] == 8} { + if {[tcl::string::length $buffer8] == 8} { #we have a \UHHHHHHHH to test set unicode8_active 0 set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] @@ -1409,7 +2007,7 @@ namespace eval tomlish::utils { } } elseif {$slash_active} { set slash_active 0 - set ctest [string map {{"} dq} $c] + set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { set e "\\\"" @@ -1453,15 +2051,15 @@ namespace eval tomlish::utils { } proc normalize_key {rawkey} { - set c1 [::string index $rawkey 0] - set c2 [::string index $rawkey end] + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes allowed within it. - set key [::string range $rawkey 1 end-1] + set key [tcl::string::range $rawkey 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. # - set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only set key [::tomlish::utils::unescape_string $keydata] #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. } else { @@ -1497,11 +2095,11 @@ namespace eval tomlish::utils { #check if str is valid for use as a toml bare key proc is_barekey {str} { - if {[::string length $str] == 0} { + if {[tcl::string::length $str] == 0} { return 0 } else { set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters match the regexp return 1 } else { @@ -1512,7 +2110,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'integer'. proc int_validchars1 {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 } else { @@ -1521,7 +2119,7 @@ namespace eval tomlish::utils { } #add support for hex,octal,binary 0x.. 0o.. 0b... proc int_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { return 1 } else { @@ -1538,22 +2136,22 @@ namespace eval tomlish::utils { # --------------------------------------- #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } # --------------------------------------- #check +,- only occur in the first position. - if {[::string last - $str] > 0} { + if {[tcl::string::last - $str] > 0} { return 0 } - if {[::string last + $str] > 0} { + if {[tcl::string::last + $str] > 0} { return 0 } - set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) - if {![string is integer -strict $numeric_value]} { + if {![tcl::string::is integer -strict $numeric_value]} { return 0 } #!todo - check bounds only based on some config value @@ -1575,7 +2173,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'float'. proc float_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { @@ -1595,7 +2193,7 @@ namespace eval tomlish::utils { return 1 } - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) #Toml spec also disallows leading zeros in the exponent part @@ -1603,12 +2201,12 @@ namespace eval tomlish::utils { #for now we will allow leading zeros in exponents #!todo - configure 'strict' option to disallow? #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] set r {([0-9])*} regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E set z {([0])*} regexp $z $intpart leadingzeros - if {[::string length $leadingzeros] > 1} { + if {[tcl::string::length $leadingzeros] > 1} { return 0 } #for floats, +,- may occur in multiple places @@ -1616,9 +2214,9 @@ namespace eval tomlish::utils { #!todo - check bounds ? #strip underscores for tcl double check - set check [::string map {_ ""} $str] + set check [tcl::string::map {_ ""} $str] #string is double accepts inf nan +NaN etc. - if {![::string is double $check]} { + if {![tcl::string::is double $check]} { return 0 } @@ -1631,7 +2229,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'datetime'. proc datetime_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { return 1 } else { @@ -1639,19 +2237,37 @@ namespace eval tomlish::utils { } } + #review - we proc is_datetime {str} { - #e.g 1979-05-27T00:32:00-07:00 + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart #!todo - what if the value is 'time only'? - - if {[catch {clock scan $datepart} err]} { - puts stderr "tcl clock scan failed err:'$err'" - return 0 - } + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + #!todo - verify time part is reasonable } else { return 0 @@ -1670,174 +2286,434 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state - # states: - # key-space, curly-space, array-space - # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... # # notes: - # key-space i # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack - # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax # #stateMatrix defines for each state, actions to take for each possible token. - #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. - #dual-element actions are a push command and the name of the space to push on the stack. - # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases - #test variable stateMatrix set stateMatrix [dict create] - dict set stateMatrix\ - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + #xxx-space vs xxx-syntax inadequately documented - TODO + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# dict set stateMatrix\ - curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ } - #REVIEW - #toml spec looks like heading towards allowing newlines within inline tables - #https://github.com/toml-lang/toml/issues/781 + #itable-space/ curly-syntax : itables dict set stateMatrix\ - curly-syntax {\ - whitespace "curly-syntax"\ - newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-ok"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ } dict set stateMatrix\ - value-expected {\ - whitespace "value-expected"\ - newline "err"\ - eof "err"\ - untyped-value "samespace"\ - startquote "string"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - comment "err"\ - comma "err"\ - startarray {pushspace array-space}\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ } + + # ' = ' portion of keyval dict set stateMatrix\ - array-space {\ - whitespace "array-space"\ - newline "array-space"\ - eof "err"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "array-space"\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ } + #### dict set stateMatrix\ - array-syntax {\ - whitespace "array-syntax"\ - newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "err"\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - - + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} dict set stateMatrix\ - itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + leading-squote-space {\ + squote_seq "POPSPACE"\ + } #dict set stateMatrix\ - # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + dict set stateMatrix\ - itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ - itablekeyval-space {} + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW dict set stateMatrix\ - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } dict set stateMatrix\ - keyval-space {} + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } dict set stateMatrix\ - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + dict set stateMatrix\ - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } dict set stateMatrix\ - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + dict set stateMatrix\ - baretablename {whitespace "NA" newline "err" equal "value-expected"} + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } dict set stateMatrix\ - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - end {} - - #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push - variable stateMatrix_orig { - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} - value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} - array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} - array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - keyval-space {} - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - end {} + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] tcl::dict::for {s transitions} $stateMatrix { tcl::dict::for {token transition_to} $transitions { - set action [lindex $transition_to 0] - switch -exact -- $action { - pushspace - zeropoppushspace { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { if {$token ni $push_trigger_tokens} { lappend push_trigger_tokens $token } @@ -1845,84 +2721,139 @@ namespace eval tomlish::parse { } } } - puts stdout "push_trigger_tokens: $push_trigger_tokens" - #!todo - hard code once stateMatrix finalised? + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. - #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail + array-space array-syntax } - variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - key-space tablename + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail - variable state_list + variable state_list ;#reset every tomlish::decode::toml namespace export tomlish toml namespace ensemble create - proc getNextState {tokentype currentstate} { + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state variable nest variable v - + + set prevstate $currentstate + + variable spacePopTransitions variable spacePushTransitions - variable last_space_action "none" - variable last_space_type "none" + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" variable state_list set result "" + set starttok "" if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { - popspace { + POPSPACE { spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + set last_space_action "pop" set last_space_type $type - - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" } else { - set next $target + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } } set result $next } - samespace { - #note the same data as popspace (spacePopTransitions) is used here. - set parent [spacestack peek] - ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" - lassign $parent type target - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" } else { - set next $target + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } } set result $next } zeropoppushspace { if {$nest > 0} { - #pop back down to the root level (key-space) + #pop back down to the root level (table-space) spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + set last_space_action "pop" set last_space_type $type @@ -1935,36 +2866,72 @@ namespace eval tomlish::parse { } #re-entrancy - #set next [list pushspace [lindex $transition_to 1]] + #set next [list PUSHSPACE [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" - set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] } - pushspace { - set target [lindex $transition_to 1] - spacestack push [list space $target] + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + set last_space_action "push" set last_space_type "space" - - #puts $::tomlish::parse::spacePushTransitions - if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { - set next [dict get $::tomlish::parse::spacePushTransitions $target] - ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" } else { - set next $target + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } } set result $next } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } default { - set result $transition_to + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word } } } else { - set result "nostate-err" - + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" } - lappend state_list $result - return $result + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] } proc report_line {{line ""}} { @@ -1988,7 +2955,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2023,12 +2990,13 @@ namespace eval tomlish::parse { proc _shortcircuit_startquotesequence {} { variable tok variable i - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { set_tokenType "startquote" incr i -1 return -level 2 1 } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" set_tokenType "startquote" set tok "\"" incr i -2 @@ -2036,8 +3004,81 @@ namespace eval tomlish::parse { } } - #return a list of 0 1 or 2 tokens + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + proc tok {s} { variable nest variable v @@ -2046,14 +3087,12 @@ namespace eval tomlish::parse { variable type ;#character type variable state ;#FSM - set resultlist [list] variable tokenType variable tokenType_list variable endToken - set sLen [::string length $s] variable lastChar @@ -2063,400 +3102,601 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof variable token_waiting - if {[dict size $token_waiting]} { - set tokenType [dict get $token_waiting type] - set tok [dict get $token_waiting tok] - dict unset token_waiting type - dict unset token_waiting tok + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] return 1 } #------------------------------ + set resultlist [list] + set sLen [tcl::string::length $s] + set slash_active 0 set quote 0 set c "" set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [string index $s [expr {$i - 1}]] + set lastChar [tcl::string::index $s [expr {$i - 1}]] } else { set lastChar "" } - set c [string index $s $i] + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" - incr i ;#must incr here because we do'returns'inside the loop + incr i ;#must incr here because we do returns inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" } whitespace { # hash marks end of whitespace token #do a return for the whitespace, set token_waiting - #dict set token_waiting type comment - #dict set token_waiting tok "" + #set_token_waiting type comment value "" complete 1 incr i -1 ;#leave comment for next run return 1 } - untyped-value { + untyped_value { #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? - #we will accept a comment marker as an immediate terminator of the untyped-value. + #we will accept a comment marker as an immediate terminator of the untyped_value. incr i -1 return 1 } + starttablename - starttablearrayname { + #fix! + error "Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } default { - #quotedkey, string, multistring + #quotedkey, itablequotedkey, string,literal, multistring append tok $c } } } else { - #$slash_active not relevant when no tokenType - #start of token if we're not in a token - set_tokenType comment - set tok "" ;#The hash is not part of the comment data + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } } } lc { - set multi_dquote "" ;#!! - #test jmn2024 #left curly brace - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - error "unexpected tablename problem" - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - switch -exact -- $state { - value-expected { - #switch last key to tablename?? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - multistring-space { - set_tokenType "stringpart" - if {$slash_active} { - set tok "\\\{" - } else { - set tok "\{" - } - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - array-space - array-syntax { - #nested anonymous inline table - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - default { - error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rc { - set multi_dquote "" ;#!! #right curly brace - try { - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endinlinetable - dict set token_waiting tok "" - return 1 - } - tablearrayname { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } - itablevaltail { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - } - default { - #end any other token - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" - - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - curly-syntax - curly-space { - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - array-syntax - array-space { - #invalid - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - itablevaltail { - set_tokenType "endinlinetable" - set tok "" - #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 - incr i -1 - return 1 - } - itablekeyval-syntax { - error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" - } - default { - #JMN2024b keytail? - error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" - } + startquotesequence { + _shortcircuit_startquotesequence } - } - } on error {em} { - error $em - } finally { - set slash_active 0 - } - - } - lb { - set multi_dquote "" ;#!! - #left square bracket - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - set_tokenType "startarray" - set tok "\[" - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } - key-space { - #table name - #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray - #note that a starttablearrayname token may contain whitespace between the brackets - # e.g \[ \[ - set_tokenType "starttablename" - set tok "" ;#there is no output into the tomlish list for this token - } - array-space - array-syntax { - #nested array? - set_tokenType "startarray" - set tok "\[" - return 1 - #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } } - default { - error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rb { - set multi_dquote "" ;#!! #right square bracket - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } - tablearraynames { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 - } - default { + } else { incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 } } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endarray" - set tok "\]" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endarray" - set tok "\]" - return 1 + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endtablename" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endtablearray" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - array-syntax - array-space { - set_tokenType "endarray" - set tok "\]" - return 1 - } - default { - error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } bsl { set dquotes $multi_dquote set multi_dquote "" ;#!! #backslash - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - litstring - multilitstring - comment - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2474,13 +3714,15 @@ namespace eval tomlish::parse { set slash_active 1 } } - whitespace { - if {$state eq "multistring-space"} { - #end whitespace token - incr i -1 - return 1 + starttablename - starttablearrayname { + error "backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" } else { - error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + set slash_active 1 } } barekey { @@ -2491,206 +3733,448 @@ namespace eval tomlish::parse { } } } else { - if {$state eq "multistring-space"} { - set slash_active 1 - } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } } - dq { - #double quote - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { append tok $c - } elseif {$toklen == 2} { + if {$existingtoklen > 2} { + error "tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { append tok $c - set_tokenType "startmultiquote" - return 1 - } else { - error "unexpected token length in 'startquotesequence'" + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" } } - endquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { append tok $c - } elseif {$toklen == 2} { + } + 2 { + #switch? append tok $c - set_tokenType "endmultiquote" + set_tokenType triple_squote return 1 - } else { - error "unexpected token length in 'endquotesequence'" + } + default { + error "unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } - string { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 return 1 } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "unexpected _start_squote_sequence length $toklen" + } } - stringpart { - #sub element of multistring - if {$slash_active} { - append tok "\\" - append tok $c + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + append multi_dquote "\"" } } - whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } - } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - #append multi_dquote "\"" + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } } } - default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 - } } - } - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - quotedkey - itablequotedkey { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 } } - tablename - tablearrayname { - if {$slash_active} {append tok "\\"} + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" append tok $c - } - starttablename - starttablearrayname { - incr i -1 ;## + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } - default { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" - } } - } else { - #$slash_active not relevant when no tokenType - #token is string only if we're expecting a value at this point - switch -exact -- $state { - value-expected - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - multistring-space { - #REVIEW + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 } else { append multi_dquote "\"" } } - key-space { - set tokenType startquote - set tok $c - return 1 - } - curly-space { - set tokenType startquote - set tok $c - return 1 - } - tablename - tablearrayname { - set_tokenType $state - set tok $c - } - default { - error "Unexpected quote during state '$state' [tomlish::parse::report_line]" - } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } = { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + if {$had_slash} {append tok "\\"} append tok $dquotes$c } whitespace { - dict set token_waiting type equal - dict set token_waiting tok = - return 1 + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } } barekey { - dict set token_waiting type equal - dict set token_waiting tok = + #set_token_waiting type equal value = complete 1 + incr i -1 return 1 } + starttablename - starttablearrayname { + error "Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } default { error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" } @@ -2698,11 +4182,24 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok ${dquotes}= + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 } default { - set_tokenType equal + set_tokenType "equal" set tok = return 1 } @@ -2710,19 +4207,47 @@ namespace eval tomlish::parse { } } cr { + #REVIEW! set dquotes $multi_dquote set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warning "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } stringpart { append tok $dquotes$c } + starttablename - starttablearrayname { + error "Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } default { #!todo - error out if cr inappropriate for tokenType append tok $c @@ -2731,24 +4256,46 @@ namespace eval tomlish::parse { } else { #lf may be appended if next #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) - set_tokenType newline + set_tokenType "newline" set tok cr } } lf { + # \n newline set dquotes $multi_dquote set multi_dquote "" ;#!! - # \n newline - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } newline { + #review #this lf is the trailing part of a crlf - append tok lf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } stringpart { @@ -2757,11 +4304,20 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf - return 1 + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } } } + starttablename - tablename - tablearrayname - starttablearrayname { + error "Character is invalid in $tokenType. [tomlish::parse::report_line]" + } default { #newline ends all other tokens. #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) @@ -2770,60 +4326,130 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } } else { - set had_slash $slash_active - set slash_active 0 - if {$had_slash} { - set_tokenType "continuation" - set tok "\\" - incr i -1 - return 1 - } else { - set_tokenType newline - set tok lf - return 1 + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} } } , { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } default { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "," + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," } multiliteral-space { - set_tokenType literalpart + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" set tok "," } default { - set_tokenType comma + set_tokenType "comma" set tok "," return 1 } @@ -2831,47 +4457,106 @@ namespace eval tomlish::parse { } } . { + set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - stringpart - comment - quotedkey - untyped-value { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" append tok $c } - baretablename - tablename - tablearrayname { + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { #subtable - split later - review append tok $c } barekey { + #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review - switch_tokenType tablename - incr i -1 - - #error "barekey period unimplemented" + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 } default { - error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" - #dict set token_waiting type period - #dict set token_waiting tok "." + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 #return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "." + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." } multiliteral-space { - set_tokenType literalpart + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" set tok "." + return 1 } default { - set_tokenType untyped-value + set_tokenType "untyped_value" set tok "." } } @@ -2881,24 +4566,38 @@ namespace eval tomlish::parse { " " { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { + #todo had_slash - emit token or error #whitespace is a terminator for bare keys - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - untyped-value { + untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } @@ -2906,37 +4605,45 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $c - } - quotedkey - string { - if {$had_slash} { - append tok "\\" - } - #if {$dquotes eq "\""} { - #} - append tok $c + append tok $dquotes$c } - whitespace { + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } append tok $c } stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) if {$had_slash} { #REVIEW + #emit the stringpart - go back to the slash incr i -2 return 1 } else { #split into STRINGPART aaa WS " " - #keeping WS separate allows easier processing of CONT stripping append tok $dquotes incr i -1 return 1 } } - starttablename { - incr i -1 - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } } - starttablearrayname { + starttablename - starttablearrayname { incr i -1 return 1 } @@ -2951,19 +4658,21 @@ namespace eval tomlish::parse { } } else { set had_slash $slash_active - if {$slash_active} { - set slash_active 0 - } + set slash_active 0 switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - if {$had_slash} { - set tok "\\$c" - } else { - set tok $c - } + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c } multistring-space { if {$had_slash} { @@ -2976,12 +4685,16 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok $dquotes incr i -1 - return + return 1 } set_tokenType "whitespace" append tok $c } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { if {$had_slash} { error "unexpected backslash [tomlish::parse::report_line]" @@ -2996,35 +4709,58 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { #whitespace is a terminator for bare keys incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 return 1 } untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - quotedkey { + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { append tok $c } string - comment - whitespace { append tok $c } stringpart { - append tok $dquotes$c + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c } starttablename - starttablearrayname { incr i -1 @@ -3045,10 +4781,14 @@ namespace eval tomlish::parse { set slash_active 0 } switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname set tok $c } multistring-space { @@ -3069,6 +4809,10 @@ namespace eval tomlish::parse { } } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "whitespace" append tok $c @@ -3078,27 +4822,77 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - set_tokenType "bom" - set tok "\uFEFF" - return 1 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } } default { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - endquotesequence { - puts stderr "endquotesequence: $tok" + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } whitespace { - incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. - return 1 + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } } barekey { if {[tomlish::utils::is_barekey $c]} { @@ -3116,7 +4910,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3124,7 +4918,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - key-space - curly-space - curly-syntax { + table-space - itable-space { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -3133,6 +4927,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } multistring-space { set_tokenType "stringpart" if {$had_slash} { @@ -3142,16 +4945,25 @@ namespace eval tomlish::parse { set tok $dquotes$c } } - tablename { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { set_tokenType "tablename" set tok $c } - tablearrayname { + tablearrayname-state { set_tokenType "tablearrayname" set tok $c } + dottedkey-space { + set_tokenType barekey + set tok $c + } default { - set_tokenType "untyped-value" + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" set tok $c } } @@ -3162,32 +4974,48 @@ namespace eval tomlish::parse { } #run out of characters (eof) - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { #check for invalid ending tokens - #if {$state eq "err"} { + #if {$state eq "err-state"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - eror "eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" - return 1 + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } } } - dict set token_waiting type "eof" - dict set token_waiting tok "eof" + set_token_waiting type eof value eof complete 1 startindex $i ;#review return 1 } else { - ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } @@ -3312,7 +5140,7 @@ if {$argc > 0} { puts stderr "argc: $argc args: $argv" if {($argc == 1)} { - if {[::string tolower $argv] in {help -help h -h}} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { puts stdout "Usage: -app where appname one of:[tomlish::appnames]" exit 0 } else { diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl b/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl index 9edd90b0..24206ba7 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl @@ -13,7 +13,7 @@ namespace eval ::punkmake { variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] variable non_help_flags [list -k] variable help_flags [list -help --help /?] - variable known_commands [list project get-project-info shell vendorupdate bootsupport vfscommonupdate] + variable known_commands [list project modules info check shell vendorupdate bootsupport vfscommonupdate] } if {"::try" ni [info commands ::try]} { puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" @@ -21,7 +21,7 @@ if {"::try" ni [info commands ::try]} { } #------------------------------------------------------------------------------ -#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#Module loading from src/bootsupport or [pwd]/modules if pwd is a 'src' folder #------------------------------------------------------------------------------ #If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files # - then it will attempt to preference these modules @@ -35,8 +35,10 @@ if {[file exists [file join $startdir src bootsupport]]} { set bootsupport_mod [file join $startdir bootsupport modules] set bootsupport_lib [file join $startdir bootsupport lib] } -if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + +set package_paths_modified 0 +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list set original_auto_path $::auto_path @@ -63,8 +65,18 @@ if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { } - if {[file exists [pwd]/modules]} { - tcl::tm::add [pwd]/modules + #we deliberately don't use [pwd]/modules because commonly the launch dir may be the project dir. + #The /modules are the very modules we are building - and may be in a broken state, which make.tcl then can't fix. + if {[file tail $startdir] eq "src"} { + if {[file exists $startdir/modules]} { + #launch from /modules /lib etc." \n \n + append h " $scriptname modules" \n + append h " - build modules from src/modules etc without scanning src/runtime and src/vfs folders to build kit/zipkit executables" \n \n append h " $scriptname bootsupport" \n - append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n + append h " - bootsupport modules are available to make.tcl" \n \n append h " $scriptname vendorupdate" \n - append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n append h " $scriptname vfscommonupdate" \n - append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib" \n \n - append h " $scriptname get-project-info" \n - append h " - show the name and base folder of the project to be built" \n + append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib etc" \n + append h " - before calling this (followed by make project) - you can test using '(.exe) dev'" \n + append h " this will load modules from your /module /lib paths instead of from the kit/zipkit" \n \n + append h " $scriptname info" \n + append h " - show the name and base folder of the project to be built" \n append h "" \n if {[llength $::punkmake::pkg_missing]} { append h "* ** NOTE ** ***" \n @@ -220,12 +232,68 @@ if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} } set sourcefolder $projectroot/src +if {$::punkmake::command eq "check"} { + puts stdout "- tcl::tm::list" + foreach fld [tcl::tm::list] { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + puts stdout "- auto_path" + foreach fld $::auto_path { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + set v [package require punk::mix::base] + puts stdout "punk::mix::base version $v\n[package ifneeded punk::mix::base $v]" + exit 0 +} + +if {$package_paths_modified} { + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] +} + + -if {$::punkmake::command eq "get-project-info"} { - puts stdout "- -- --- --- --- --- --- --- --- --- ---" - puts stdout "- -- get-project-info -- -" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" +if {$::punkmake::command eq "info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" + puts stdout "- -- info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" puts stdout "- projectroot : $projectroot" + set sourcefolder $projectroot/src + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + puts stdout "- vendorlib folders: ([llength $vendorlibfolders])" + foreach fld $vendorlibfolders { + puts stdout " src/$fld" + } + puts stdout "- vendormodule folders: ([llength $vendormodulefolders])" + foreach fld $vendormodulefolders { + puts stdout " src/$fld" + } + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] + puts stdout "- source module paths: [llength $source_module_folderlist]" + foreach fld $source_module_folderlist { + puts stdout " $fld" + } + set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] + lappend projectlibfolders lib + puts stdout "- source libary paths: [llength $projectlibfolders]" + foreach fld $projectlibfolders { + puts stdout " src/$fld" + } if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { set vc "fossil" set rev [punk::repo::fossil_revision $scriptfolder] @@ -241,8 +309,11 @@ if {$::punkmake::command eq "get-project-info"} { } puts stdout "- version control : $vc" puts stdout "- revision : $rev" - puts stdout "- remote : $rem" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- remote" + foreach ln [split $rem \n] { + puts stdout " $ln" + } + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" exit 0 } @@ -564,7 +635,7 @@ if {$::punkmake::command eq "bootsupport"} { -if {$::punkmake::command ne "project"} { +if {$::punkmake::command ni {project modules}} { puts stderr "Command $::punkmake::command not implemented - aborting." flush stderr after 100 @@ -803,6 +874,19 @@ if {[punk::repo::is_fossil_root $projectroot]} { $installer destroy } +if {$::punkmake::command ne "project"} { + #command = modules + puts stdout "vfs folders not checked" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into base vfs folder" + puts stdout " - use 'make.tcl project' to build executable kits/zipkits from vfs folders as well if you have runtimes installed" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" + puts stdout "-done-" + exit 0 +} + + set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] if {$buildfolder ne "$sourcefolder/_build"} { puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" @@ -832,10 +916,12 @@ if {![llength $runtimes]} { exit 0 } +set has_sdx 1 if {[catch {exec sdx help} errM]} { puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" puts stderr "err: $errM" - exit 1 + #exit 1 + set has_sdx 0 } # -- --- --- --- --- --- --- --- --- --- @@ -1025,6 +1111,8 @@ foreach runtimefile $runtimes { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set failed_kits [list] set installed_kits [list] +set skipped_kits [list] +set skipped_kit_installs [list] proc ::make_file_traversal_error {args} { error "file_traverse error: $args" @@ -1304,30 +1392,39 @@ foreach vfstail $vfs_tails { } } kit { - if {[catch { - if {$rtname ne "-"} { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose - } else { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose - } - } result]} { - if {$rtname ne "-"} { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" - } else { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" - } - puts stderr "sdx wrap $targetkit failed" - lappend failed_kits [list kit $targetkit reason $sdxmsg] + if {!$has_sdx} { + puts stderr "no sdx available to wrap $targetkit" + lappend failed_kits [list kit $targetkit reason "sdx_executable_unavailable"] $vfs_event targetset_end FAILED $vfs_event destroy $vfs_installer destroy continue - } else { - puts stdout "ok - finished sdx" - set separator [string repeat = 40] - puts stdout $separator - puts stdout $result - puts stdout $separator + } else { + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose + } + } result]} { + if {$rtname ne "-"} { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" + } else { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" + } + puts stderr "sdx wrap $targetkit failed" + lappend failed_kits [list kit $targetkit reason $sdxmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } } } } @@ -1435,6 +1532,7 @@ foreach vfstail $vfs_tails { set skipped_vfs_build 1 puts stderr "." puts stdout "Skipping build for vfs $vfstail with runtime $rtname - no change detected" + lappend skipped_kits [list kit $targetkit reason "no change detected"] $vfs_event targetset_end SKIPPED } $vfs_event destroy @@ -1489,6 +1587,7 @@ foreach vfstail $vfs_tails { set skipped_kit_install 1 puts stderr "." puts stdout "Skipping kit install for $targetkit with vfs $vfstail runtime $rtname - no change detected" + lappend skipped_kit_installs [list kit $targetkit reason "no change detected"] $bin_event targetset_end SKIPPED } $bin_event destroy @@ -1510,8 +1609,21 @@ if {[llength $failed_kits]} { punk::lib::showdict -channel stderr -roottype list $failed_kits */@*.@* #puts stderr [join $failed_kits \n] } - -puts stdout "done" +set had_kits [expr {[llength $installed_kits] || [llength $failed_kits] || [llength $skipped_kits]}] +if {$had_kits} { + puts stdout " module builds and kit/zipkit builds processed (vfs config: src/runtime/mapvfs.config)" + puts stdout " - use 'make.tcl modules' to build modules without scanning/building the vfs folders into executable kits/zipkits" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into the base vfs folder" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" +} else { + puts stdout " module builds processed" + puts stdout "" + puts stdout " If kit/zipkit based executables required - create src/vfs/.vfs folders containing lib,modules,modules_tcl9 etc folders" + puts stdout " Also ensure appropriate executables exist in src/runtime along with src/runtime/mapvfs.config" +} +puts stdout "-done-" exit 0 diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config index fc436d8c..3993e0c9 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config @@ -1,67 +1,67 @@ - -#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project -#They must be already built, so generally shouldn't come directly from src/modules. - -#each entry - base module -set bootsupport_modules [list\ - src/vendormodules cksum\ - src/vendormodules modpod\ - src/vendormodules overtype\ - src/vendormodules oolib\ - src/vendormodules http\ - src/vendormodules dictutils\ - src/vendormodules fileutil\ - src/vendormodules textutil::adjust\ - src/vendormodules textutil::repeat\ - src/vendormodules textutil::split\ - src/vendormodules textutil::string\ - src/vendormodules textutil::tabify\ - src/vendormodules textutil::trim\ - src/vendormodules textutil::wcswidth\ - src/vendormodules uuid\ - src/vendormodules md5\ - src/vendormodules sha1\ - src/vendormodules tomlish\ - src/vendormodules test::tomlish\ - modules punkcheck\ - modules natsort\ - modules punk::ansi\ - modules punk::assertion\ - modules punk::args\ - modules punk::cap\ - modules punk::cap::handlers::caphandler\ - modules punk::cap::handlers::scriptlibs\ - modules punk::cap::handlers::templates\ - modules punk::char\ - modules punk::console\ - modules punk::du\ - modules punk::encmime\ - modules punk::fileline\ - modules punk::docgen\ - modules punk::lib\ - modules punk::mix\ - modules punk::mix::base\ - modules punk::mix::cli\ - modules punk::mix::util\ - modules punk::mix::templates\ - modules punk::mix::commandset::buildsuite\ - modules punk::mix::commandset::debug\ - modules punk::mix::commandset::doc\ - modules punk::mix::commandset::layout\ - modules punk::mix::commandset::loadedlib\ - modules punk::mix::commandset::module\ - modules punk::mix::commandset::project\ - modules punk::mix::commandset::repo\ - modules punk::mix::commandset::scriptwrap\ - modules punk::ns\ - modules punk::overlay\ - modules punk::path\ - modules punk::repo\ - modules punk::tdl\ - modules punk::zip\ - modules punk::winpath\ - modules textblock\ - modules natsort\ - modules oolib\ -] - + +#bootsupport modules can be pulled in from within other areas of src or from the built module folders of the project +#They must be already built, so generally shouldn't come directly from src/modules. + +#each entry - base module +set bootsupport_modules [list\ + src/vendormodules cksum\ + src/vendormodules modpod\ + src/vendormodules overtype\ + src/vendormodules oolib\ + src/vendormodules http\ + src/vendormodules dictutils\ + src/vendormodules fileutil\ + src/vendormodules textutil::adjust\ + src/vendormodules textutil::repeat\ + src/vendormodules textutil::split\ + src/vendormodules textutil::string\ + src/vendormodules textutil::tabify\ + src/vendormodules textutil::trim\ + src/vendormodules textutil::wcswidth\ + src/vendormodules uuid\ + src/vendormodules md5\ + src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ + modules punkcheck\ + modules natsort\ + modules punk::ansi\ + modules punk::assertion\ + modules punk::args\ + modules punk::cap\ + modules punk::cap::handlers::caphandler\ + modules punk::cap::handlers::scriptlibs\ + modules punk::cap::handlers::templates\ + modules punk::char\ + modules punk::console\ + modules punk::du\ + modules punk::encmime\ + modules punk::fileline\ + modules punk::docgen\ + modules punk::lib\ + modules punk::mix\ + modules punk::mix::base\ + modules punk::mix::cli\ + modules punk::mix::util\ + modules punk::mix::templates\ + modules punk::mix::commandset::buildsuite\ + modules punk::mix::commandset::debug\ + modules punk::mix::commandset::doc\ + modules punk::mix::commandset::layout\ + modules punk::mix::commandset::loadedlib\ + modules punk::mix::commandset::module\ + modules punk::mix::commandset::project\ + modules punk::mix::commandset::repo\ + modules punk::mix::commandset::scriptwrap\ + modules punk::ns\ + modules punk::overlay\ + modules punk::path\ + modules punk::repo\ + modules punk::tdl\ + modules punk::zip\ + modules punk::winpath\ + modules textblock\ + modules natsort\ + modules oolib\ +] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm index 492341d6..3c200d26 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm @@ -439,7 +439,8 @@ tcl::namespace::eval overtype { if {[llength $lflines]} { lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] } - set inputchunks $lflines[unset lflines] + #set inputchunks $lflines[unset lflines] + set inputchunks [lindex [list $lflines [unset lflines]] 0] } } @@ -2115,6 +2116,7 @@ tcl::namespace::eval overtype { if {[llength $undercols]< $opt_width} { set diff [expr {$opt_width- [llength $undercols]}] if {$diff > 0} { + #set undercols [list {*}$undercols {*}[lrepeat $diff "\u0000"]] ;#2024 - much slower lappend undercols {*}[lrepeat $diff "\u0000"] lappend understacks {*}[lrepeat $diff $cs] lappend understacks_gx {*}[lrepeat $diff $gs] @@ -3889,7 +3891,19 @@ tcl::namespace::eval overtype { #OSC 4 - set colour palette #can take multiple params #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ - set params [tcl::string::range $code_content 1 end] + set params [tcl::string::range $code_content 2 end] ;#strip 4 and first semicolon + set cmap [dict create] + foreach {cnum spec} [split $params {;}] { + if {$cnum >= 0 and $cnum <= 255} { + #todo - parse spec from names like 'red' to RGB + #todo - accept rgb:ab/cd/ef as well as rgb:/a/b/c (as alias for aa/bb/cc) + #also - what about rgb:abcd/defg/hijk and 12-bit abc/def/ghi ? + dict set cmap $cnum $spec + } else { + #todo - log + puts stderr "overtype::renderline OSC 4 set colour palette - bad color number: $cnum must be from 0 to 255. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm index e367ce9e..887888e8 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm @@ -183,7 +183,9 @@ namespace eval punk::console { variable previous_stty_state_$channel set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] eq ""} { - set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + if {[catch {{*}$sttycmd -g <@$channel} previous_stty_state_$channel]} { + set previous_stty_state_$channel "" + } } exec {*}$sttycmd raw -echo <@$channel @@ -253,13 +255,21 @@ namespace eval punk::console { return "line" } } elseif {$raw_or_line eq "raw"} { - punk::console::enableRaw + if {[catch { + punk::console::enableRaw + } errM]} { + puts stderr "Warning punk::console::enableRaw failed - $errM" + } if {[can_ansi]} { punk::console::enableVirtualTerminal both } } elseif {$raw_or_line eq "line"} { #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) - punk::console::disableRaw + if {[catch { + punk::console::disableRaw + } errM]} { + puts stderr "Warning punk::console::disableRaw failed - $errM" + } if {[can_ansi]} { punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc punk::console::enableVirtualTerminal output ;#display/use ansi codes @@ -290,12 +300,15 @@ namespace eval punk::console { set loadstate [zzzload::pkg_require twapi] #loadstate could also be stuck on loading? - review - zzzload not very ripe - #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. - + #Twapi can be relatively slow to load (on some systems) - can be 1s plus in some cases - and much longer if there are disk performance issues. if {$loadstate ni [list failed]} { + #possibly still 'loading' #review zzzload usage #puts stdout "=========== console loading twapi =============" - zzzload::pkg_wait twapi + set loadstate [zzzload::pkg_wait twapi] ;#can return 'failed' will return version if already loaded or loaded during wait + } + + if {$loadstate ni [list failed]} { package require twapi ;#should be fast once twapi dll loaded in zzzload thread set ::punk::console::has_twapi 1 @@ -523,6 +536,9 @@ namespace eval punk::console { set is_raw 0 return [list stdin [list from $oldmode to $newmode]] } elseif {[set sttycmd [auto_execok stty]] ne ""} { + #stty can return info on windows - but doesn't seem to be able to set anything. + #review - is returned info even valid? + set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] ne ""} { exec {*}$sttycmd [set previous_stty_state_$channel] diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm index 63f32dee..872e4807 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -339,6 +339,92 @@ namespace eval punk::lib { set has_twapi [expr {![catch {package require twapi}]}] } + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + proc lswap {lvar a z} { + upvar $lvar l + if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { + #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + set a_index [lindex_resolve $l $a] + set a_msg "" + switch -- $a_index { + -2 { + "$a is greater th + } + -3 { + } + } + error "lswap cannot indices $a and $z $a is out of range" + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} #*** !doctools #[subsection {Namespace punk::lib}] @@ -347,6 +433,7 @@ namespace eval punk::lib { if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to} { lseq $from $to @@ -1009,24 +1096,28 @@ namespace eval punk::lib { } set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds - if {${lower_resolve} == -1} { + if {${lower_resolve} == -2} { + ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max - } elseif {$lower_resolve == -2} { + } elseif {$lower_resolve == -3} { + ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve $dval $b] - if {$upper == -2} { + if {$upper == -3} { + ##x #upper bound is below list range - - if {$lower_resolve >=-1} { + if {$lower_resolve >=-2} { + ##x set upper 0 } else { continue } - } elseif {$upper == -1} { + } elseif {$upper == -2} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists @@ -1669,7 +1760,8 @@ namespace eval punk::lib { error "bad expression '$expression': must be integer?\[+-\]integer?" } } - + + # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bunds on upper vs lower side proc lindex_resolve {list index} { #*** !doctools #[call [fun lindex_resolve] [arg list] [arg index]] @@ -1679,11 +1771,13 @@ namespace eval punk::lib { #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: - #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) - #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] a) -3 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -2 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr #if {![llength $list]} { @@ -1694,9 +1788,9 @@ namespace eval punk::lib { if {[string is integer -strict $index]} { #can match +i -i if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } else { #integer may still have + sign - normalize with expr return [expr {$index}] @@ -1708,14 +1802,14 @@ namespace eval punk::lib { set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { - return -1 + return -2 } } else { - #end + #index is 'end' set index [expr {[llength $list]-1}] if {$index < 0} { - #special case - end with empty list - treat end like a positive number out of bounds - return -1 + #special case - 'end' with empty list - treat end like a positive number out of bounds + return -2 } else { return $index } @@ -1723,7 +1817,7 @@ namespace eval punk::lib { if {$offset == 0} { set index [expr {[llength $list]-1}] if {$index < 0} { - return -1 ;#special case + return -2 ;#special case as above } else { return $index } @@ -1732,7 +1826,7 @@ namespace eval punk::lib { set index [expr {([llength $list]-1) - $offset}] } if {$index < 0} { - return -2 + return -3 } else { return $index } @@ -1753,26 +1847,50 @@ namespace eval punk::lib { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } return $index } } } - proc lindex_resolve2 {list index} { - #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + proc lindex_resolve_basic {list index} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg list] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -1 for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ + # - which #for {set i 0} {$i < [llength $list]} {incr i} { # lappend indices $i #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < 0 || ($index >= [llength $list])} { + #even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } if {[llength $list]} { set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + #if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) } else { set indices [list] } set idx [lindex $indices $index] if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end return -1 } else { return $idx @@ -2334,13 +2452,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var - proc swapnumvars {namea nameb} { - upvar $namea a $nameb b - set a [expr {$a ^ $b}] - set b [expr {$a ^ $b}] - set a [expr {$a ^ $b}] - } #e.g linesort -decreasing $data proc linesort {args} { @@ -2956,7 +3067,7 @@ namespace eval punk::lib { # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { - set number [punk::objclone $unformattednumber] + set number [objclone $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm index 932c1db6..806b172e 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm @@ -4,6 +4,7 @@ package provide punk::mix::base [namespace eval punk::mix::base { }] package require punk::path +package require punk::lib ;#format_number etc #base internal plumbing functions namespace eval punk::mix::base { @@ -657,16 +658,38 @@ namespace eval punk::mix::base { #temp emission to stdout.. todo - repl telemetry channel puts stdout "cksum_path: creating temporary tar archive for $path" - puts stdout " at: $archivename .." - tar::create $archivename $target + puts -nonewline stdout " at: $archivename ..." + set tsstart [clock millis] + if {[set tarpath [auto_execok tar]] ne ""} { + #using an external binary is *significantly* faster than tar::create - but comes with some risks + #review - need to check behaviour/flag variances across platforms + #don't use -z flag. On at least some tar versions the zipped file will contain a timestamped subfolder of filename.tar - which ruins the checksum + #also - tar is generally faster without the compression (although this may vary depending on file size and disk speed?) + exec {*}$tarpath -cf $archivename $target ;#{*} needed in case spaces in tarpath + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar -cf done ($ms ms)" + } else { + set tsstart [clock millis] ;#don't include auto_exec search time for tar::create + tar::create $archivename $target + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar::create done ($ms ms)" + puts stdout " NOTE: install tar executable for potentially *much* faster directory checksum processing" + } + if {$ftype eq "file"} { - set sizeinfo "(size [file size $target])" + set sizeinfo "(size [punk::lib::format_number [file size $target]] bytes)" } else { - set sizeinfo "(file type $ftype - size unknown)" + set sizeinfo "(file type $ftype - tarred size [punk::lib::format_number [file size $archivename]] bytes)" } - puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set tsstart [clock millis] + puts -nonewline stdout "cksum_path: calculating cksum for $target $sizeinfo ... " set cksum [{*}$cksum_command $archivename] - #puts stdout "cksum_path: cleaning up.. " + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " cksum done ($ms ms)" + puts stdout " cksum: $cksum" file delete -force $archivename cd $startdir diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm index 9afc685c..80cab2a7 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm @@ -157,6 +157,9 @@ namespace eval punk::mix::commandset::project { set opt_force [dict get $opts -force] set opt_confirm [string tolower [dict get $opts -confirm]] # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- set opt_modules [dict get $opts -modules] if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { #if not specified - add a single module matching project name @@ -169,9 +172,6 @@ namespace eval punk::mix::commandset::project { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_layout [dict get $opts -layout] - set opt_update [dict get $opts -update] - # -- --- --- --- --- --- --- --- --- --- --- --- --- #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm index cf0bf70c..10250a9b 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1707,6 +1707,7 @@ tcl::namespace::eval punk::ns { lassign [internal::get_run_opts {-vars -nowarnings} {} $args] _r runopts _c cmdargs set use_vars [expr {"-vars" in $runopts}] set no_warnings [expr {"-nowarnings" in $runopts}] + set ver "" #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns @@ -1717,15 +1718,68 @@ tcl::namespace::eval punk::ns { } default { if {[string match ::* $pkg_or_existing_ns]} { + set pkg_unqualified [string range $pkg_or_existing_ns 2 end] if {![tcl::namespace::exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + set ver [package require $pkg_unqualified] } else { set ver "" } set ns $pkg_or_existing_ns } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns + set pkg_unqualified $pkg_or_existing_ns + set ver [package require $pkg_unqualified] + set ns ::$pkg_unqualified + } + #some packages don't create their namespace immediately and/or don't populate it with commands and instead put entries in ::auto_index + set previous_command_count 0 + if {[namespace exists $ns]} { + set previous_command_count [llength [info commands ${ns}::*]] + } + + + #also if a sub package was loaded first - then the namespace for the base or lower package may exist but have no commands + #for the purposes of pkguse - which most commonly interactive - we want the namespace populated + #It may still not be *fully* populated because we stop at first source that adds commands - REVIEW + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + + if {!$ns_populated} { + #we will catch-run an auto_index entry if any + #auto_index entry may or may not be prefixed with :: + set keys [list] + #first look for exact pkg_unqualified and ::pkg_unqualified + #leave these at beginning of keys list + if {[array exists ::auto_index($pkg_unqualified)]} { + lappend keys $pkg_unqualified + } + if {[array exists ::auto_index(::$pkg_unqualified)]} { + lappend keys ::$pkg_unqualified + } + #as auto_index is an array - we could get keys in arbitrary order + set matches [lsort [array names ::auto_index ${pkg_unqualified}::*]] + lappend keys {*}$matches + set matches [lsort [array names ::auto_index ::${pkg_unqualified}::*]] + lappend keys {*}$matches + set ns_populated 0 + set i 0 + set already_sourced [list] ;#often multiple triggers for the same source - don't waste time re-sourcing + set ns_depth [llength [punk::ns::nsparts [string trimleft $ns :]]] + while {!$ns_populated && $i < [llength $keys]} { + #todo - skip sourcing deeper entries from a subpkg which may have been loaded earlier than the base + #e.g if we are loading ::x::y + #only source for keys the same depth, or one deeper ie ::x::y, x::y, ::x::y::z not ::x or ::x::y::z::etc + set k [lindex $keys $i] + set k_depth [llength [punk::ns::nsparts [string trimleft $k :]]] + if {$k_depth == $ns_depth || $k_depth == $ns_depth + 1} { + set auto_source [set ::auto_index($k)] + if {$auto_source ni $already_sourced} { + uplevel 1 $auto_source + lappend already_sourced $auto_source + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + } + } + incr i + } + } } } @@ -1799,7 +1853,13 @@ tcl::namespace::eval punk::ns { return $out } } else { - error "Namespace $ns not found." + if {$ver eq ""} { + error "Namespace $ns not found. No package version found." + } else { + set out "(no package namespace found) remaining in [uplevel 1 {namespace current}]" + append out \n $ver + return $out + } } return $out } diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm index 2cb5fd1d..e056b14a 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/repo-0.1.1.tm @@ -468,7 +468,7 @@ namespace eval punk::repo { set path [string trim [string range $ln [string length "MISSING "] end]] dict set pathdict $path "missing" } - "EXTRA * " { + "EXTRA *" { #fossil will explicitly list files in a new folder - as opposed to git which shows just the folder set path [string trim [string range $ln [string length "EXTRA "] end]] dict set pathdict $path "extra" diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm index 4ea2ce3d..8405fae7 100644 Binary files a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm and b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm differ diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm index d85d4416..3e13e75d 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm @@ -19,12 +19,20 @@ #*** !doctools #[manpage_begin tomlish_module_tomlish 0 1.1.1] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] #[require tomlish] -#[keywords module] +#[keywords module parsing toml configuration] #[description] -#[para] - +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -71,17 +79,41 @@ package require logger namespace eval tomlish { namespace export {[a-z]*}; # Convention: export all lowercase variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + #ARRAY is analogous to a Tcl list #TABLE is analogous to a Tcl dict #WS = inline whitespace - #KEYVAL = bare key and value - #QKEYVAL = quoted key and value + #KEY = bare key and value + #QKEY = double quoted key and value + #SQKEY = single quoted key and value #ITABLE = inline table (*can* be anonymous table) # inline table values immediately create a table with the opening brace # inline tables are fully defined between their braces, as are dotted-key subtables defined within # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained - set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) set min_int -9223372036854775808 ;#-2^63 @@ -114,10 +146,13 @@ namespace eval tomlish { #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error ">>>_get_keyval_value doesn't seem to be a properly structured { = } list" + } foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -127,10 +162,10 @@ namespace eval tomlish { } } if {!$found_value} { - error "Failed to find value element in KEYVAL. '$keyval_element'" + error "Failed to find value element in KEY. '$keyval_element'" } if {$found_value > 1} { - error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + error "Found multiple value elements in KEY, expected exactly one. '$keyval_element'" } switch -exact -- $type { @@ -141,16 +176,28 @@ namespace eval tomlish { STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - LITSTRING { + LITERAL - LITERALPART { #REVIEW set result [list type $type value $value] } - TABLE - ITABLE - ARRAY - MULTISTRING { - #jmn2024 - added ITABLE - review + TABLE { + #invalid? + error "_get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::get_dict [list $found_sub]] + } + ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! set result [list type $type value [::tomlish::get_dict [list $found_sub]]] } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } default { error "Unexpected value type '$type' found in keyval '$keyval_element'" } @@ -158,6 +205,48 @@ namespace eval tomlish { return $result } + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "_get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } #get_dict is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. # get_dict is primarily for reading toml data. @@ -193,9 +282,12 @@ namespace eval tomlish { set tag [lindex $item 0] #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL { + KEY - QKEY - SQKEY { log::debug "--> processing $tag: $item" set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } #!todo - normalize key. (may be quoted/doublequoted) if {[dict exists $datastructure $key]} { @@ -206,6 +298,43 @@ namespace eval tomlish { set keyval_dict [_get_keyval_value $item] dict set datastructure $key $keyval_dict } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "get_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } TABLE { set tablename [lindex $item 1] set tablename [::tomlish::utils::tablename_trim $tablename] @@ -220,21 +349,20 @@ namespace eval tomlish { #toml spec rule - all segments mst be non-empty #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - set key_hierarchy [list] - set key_hierarchy_raw [list] + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] foreach rawseg $name_segments { set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes - set c1 [::string index $rawseg 0] - set c2 [::string index $rawseg end] + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes are processed within it. - set seg [::string range $rawseg 1 end-1] + set seg [tcl::string::range $rawseg 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. - set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] - #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] } else { set seg $rawseg } @@ -243,15 +371,16 @@ namespace eval tomlish { #if {$rawseg eq ""} { # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" #} - lappend key_hierarchy $seg - lappend key_hierarchy_raw $rawseg + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg - if {[dict exists $datastructure {*}$key_hierarchy]} { + if {[dict exists $datastructure {*}$table_key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, - # but not if it was defined as a keyval/qkeyval + # but not if it was defined as a key/qkey/skey ? - set testkey [join $key_hierarchy_raw .] - set testkey_length [llength $key_hierarchy_raw] + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { set found_testkey 1 @@ -267,11 +396,12 @@ namespace eval tomlish { # instead compare {a b.c d} with {a b c d} # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #dots within table segments might seem like an 'edge case' - # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" if {$testkey eq $seen_match} { set found_testkey 1 } @@ -279,35 +409,81 @@ namespace eval tomlish { } if {$found_testkey == 0} { - #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg } } } + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } #We must do this after the key-collision test above! lappend tablenames_seen $tablename - - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -320,16 +496,36 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { - set keyval_key [lindex $element 1] + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure $keyval_key $keyval_dict + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -350,12 +546,16 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ARRAY - MULTISTRING { + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } - WS - SEP { - #ignore whitespace and commas + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments } default { error "Unexpected value type '$type' found in array" @@ -363,6 +563,49 @@ namespace eval tomlish { } } } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } MULTISTRING { #triple dquoted string log::debug "--> processing multistring: $item" @@ -372,7 +615,14 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "doulbe quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } @@ -533,8 +783,8 @@ namespace eval tomlish::encode { proc float {f} { #convert any non-lower case variants of special values to lowercase for Toml - if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { - return [list FLOAT [string tolower $f]] + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] @@ -553,44 +803,56 @@ namespace eval tomlish::encode { proc boolean {b} { #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false - if {![string is boolean -strict $b]} { + if {![tcl::string::is boolean -strict $b]} { error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { if {[expr {$b && 1}]} { - return [list BOOL true] + return [::list BOOL true] } else { - return [list BOOL false] + return [::list BOOL false] } } } + + #TODO #Take tablename followed by - # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types proc table {name args} { set pairs [list] foreach t $args { - if {[llength $t] == 3} { - if {[lindex $t 0] ne "KEYVAL"} { - error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" } - lappend pairs $t + lappend pairs [list KEY $keystr = $valuepart] } elseif {[llength $t] == 2} { #!todo - type heuristics lassign $t n v - lappend pairs [list KEYVAL $n [list STRING $v]] + lappend pairs [list KEY $n = [list STRING $v]] } else { - error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" foreach part [lrange $item 1 end] { - append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$litstring''' + append toml '''$literal''' } INT - BOOL - @@ -777,6 +1079,7 @@ namespace eval tomlish::decode { # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. # For this reason, we also do absolutely no line-ending transformations based on platform. # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { #*** !doctools #[call [fun toml] [arg s]] @@ -835,11 +1138,12 @@ namespace eval tomlish::decode { set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. - set state "key-space" - ::tomlish::parse::spacestack push {space key-space} + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) set linenum 1 - + + set ::tomlish::parse::state_list [list] try { while {$r} { set r [::tomlish::parse::tok $s] @@ -851,31 +1155,162 @@ namespace eval tomlish::decode { #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" #puts "-->tok: $tok tokenType='$tokenType'" set prevstate $state - ##### - set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below - set state $nextstate - if {$state eq "err"} { - error "State error - aborting parse. [tomlish::parse::report_line]" + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) } - - if {$last_space_action eq "pop"} { + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } newline { incr linenum lappend v($nest) [list NEWLINE $tok] } tablename { #note: a tablename only 'pops' if we are greater than zero - error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" } tablearrayname { #!review - tablearrayname different to tablename regarding push/pop? #note: a tablename only 'pops' if we are greater than zero - error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" } endarray { #nothing to do here. @@ -885,29 +1320,74 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - puts stderr "endinlinetable" + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } endmultiquote { - puts stderr "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" } default { - error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } - set parentlevel [expr {$nest -1}] - lappend v($parentlevel) [set v($nest)] + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + incr nest -1 } elseif {$last_space_action eq "push"} { + set prevnest $nest incr nest 1 set v($nest) [list] # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } barekey { - set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" } quotedkey - itablequotedkey { - set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! @@ -922,7 +1402,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -940,21 +1420,31 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - puts stderr "push trigger tokenType startmultiquote (todo)" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE - #JMN ??? - #set next_tokenType_known 1 - #::tomlish::parse::set_tokenType "multistring" - #set tok "" + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL } default { - error "push trigger tokenType '$tokenType' not yet implemented" + error "---- push trigger tokenType '$tokenType' not yet implemented" } } } else { #no space level change switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } starttablename { #$tok is triggered by the opening bracket and sends nothing to output } @@ -962,40 +1452,69 @@ namespace eval tomlish::decode { #$tok is triggered by the double opening brackets and sends nothing to output } tablename - tablenamearray { - error "did not expect 'tablename/tablearrayname' without space level change" + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" #set v($nest) [list TABLE $tok] } endtablename - endtablearrayname { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startlinetable without space level change" + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } startquote { - switch -exact -- $nextstate { - string { + switch -exact -- $newstate { + string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quotedkey { + quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "quotedkey" set tok "" } - itablequotedkey { + itable-quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablequotedkey" set tok "" } default { - error "startquote switch case not implemented for nextstate: $nextstate" + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startmultiquote { #review - puts stderr "no space level change - got startmultiquote" + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" set next_tokenType_known 1 ::tomlish::parse::set_tokenType "stringpart" set tok "" @@ -1004,27 +1523,53 @@ namespace eval tomlish::decode { #nothing to do? set tok "" } + endsquote { + set tok "" + } endmultiquote { #JMN!! set tok "" } string { - lappend v($nest) [list STRING $tok] + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes } - stringpart { - lappend v($nest) [list STRINGPART $tok] + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } } multistring { #review lappend v($nest) [list MULTISTRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST } itablequotedkey { } - untyped-value { + untyped_value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -1035,9 +1580,10 @@ namespace eval tomlish::decode { } elseif {[::tomlish::utils::is_datetime $tok]} { set tag DATETIME } else { - error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } lappend v($nest) [list $tag $tok] + } comment { #puts stdout "----- comment token returned '$tok'------" @@ -1068,17 +1614,18 @@ namespace eval tomlish::decode { #!todo - check previous tokens are complete/valid? } default { - error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" } } } if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } - if {$state eq "end"} { + if {$state eq "end-state"} { break } @@ -1108,8 +1655,6 @@ namespace eval tomlish::decode { } finally { set is_parsing 0 } - - return $v(0) } @@ -1136,31 +1681,84 @@ namespace eval tomlish::utils { set segments [tablename_split $tablename false] set trimmed_segments [list] foreach seg $segments { - lappend trimmed_segments [::string trim $seg [list " " \t]] + lappend trimmed_segments [::string trim $seg " \t"] } return [join $trimmed_segments .] } + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + #utils::tablename_split proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set i 0 - set sLen [::string length $tablename] + set sLen [tcl::string::length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" - for {} {$i < $sLen} {} { + for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { - set lastChar [::string index $tablename [expr {$i - 1}]] + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $tablename $i] - incr i + set c [tcl::string::index $tablename $i] if {$c eq "."} { switch -exact -- $mode { @@ -1188,7 +1786,7 @@ namespace eval tomlish::utils { } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { - if {[::string trim $seg] ne ""} { + if {[tcl::string::trim $seg] ne ""} { #we don't allow a quote in the middle of a bare key error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" } @@ -1240,7 +1838,7 @@ namespace eval tomlish::utils { } append seg $c } - if {$i == $sLen} { + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { @@ -1251,13 +1849,13 @@ namespace eval tomlish::utils { if {$normalize} { lappend segments $seg } else { - lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong } } litquoted { - set trimmed_seg [::string trim $seg] - if {[::string index $trimmed_seg end] ne "\'"} { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" } lappend segments $seg @@ -1275,14 +1873,14 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg [list " " \t]] + set trimmed [tcl::string::trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" #} if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename'" + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" } } return $segments @@ -1294,7 +1892,7 @@ namespace eval tomlish::utils { # is a valid 'unicode scalar value' # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} - if {[::string match {\\u*} $slashu]} { + if {[tcl::string::match {\\u*} $slashu]} { set exp {^\\u([0-9a-fA-F]{4}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %4x dec] != 1} { @@ -1306,7 +1904,7 @@ namespace eval tomlish::utils { } else { return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] } - } elseif {[::string match {\\U*} $slashu]} { + } elseif {[tcl::string::match {\\U*} $slashu]} { set exp {^\\U([0-9a-fA-F]{8}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %8x dec] != 1} { @@ -1340,7 +1938,7 @@ namespace eval tomlish::utils { set buffer4 "" ;#buffer for 4 hex characters following a \u set buffer8 "" ;#buffer for 8 hex characters following a \u - set sLen [::string length $str] + set sLen [tcl::string::length $str] #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc set slash_active 0 @@ -1352,12 +1950,12 @@ namespace eval tomlish::utils { set i 0 for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [::string index $str [expr {$i - 1}]] + set lastChar [tcl::string::index $str [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $str $i] + set c [tcl::string::index $str $i] ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { @@ -1380,10 +1978,10 @@ namespace eval tomlish::utils { } } else { if {$unicode4_active} { - if {[::string length $buffer4] < 4} { + if {[tcl::string::length $buffer4] < 4} { append buffer4 $c } - if {[::string length $buffer4] == 4} { + if {[tcl::string::length $buffer4] == 4} { #we have a \uHHHH to test set unicode4_active 0 set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] @@ -1394,10 +1992,10 @@ namespace eval tomlish::utils { } } } elseif {$unicode8_active} { - if {[::string length $buffer8] < 8} { + if {[tcl::string::length $buffer8] < 8} { append buffer8 $c } - if {[::string length $buffer8] == 8} { + if {[tcl::string::length $buffer8] == 8} { #we have a \UHHHHHHHH to test set unicode8_active 0 set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] @@ -1409,7 +2007,7 @@ namespace eval tomlish::utils { } } elseif {$slash_active} { set slash_active 0 - set ctest [string map {{"} dq} $c] + set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { set e "\\\"" @@ -1453,15 +2051,15 @@ namespace eval tomlish::utils { } proc normalize_key {rawkey} { - set c1 [::string index $rawkey 0] - set c2 [::string index $rawkey end] + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes allowed within it. - set key [::string range $rawkey 1 end-1] + set key [tcl::string::range $rawkey 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. # - set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only set key [::tomlish::utils::unescape_string $keydata] #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. } else { @@ -1497,11 +2095,11 @@ namespace eval tomlish::utils { #check if str is valid for use as a toml bare key proc is_barekey {str} { - if {[::string length $str] == 0} { + if {[tcl::string::length $str] == 0} { return 0 } else { set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters match the regexp return 1 } else { @@ -1512,7 +2110,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'integer'. proc int_validchars1 {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 } else { @@ -1521,7 +2119,7 @@ namespace eval tomlish::utils { } #add support for hex,octal,binary 0x.. 0o.. 0b... proc int_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { return 1 } else { @@ -1538,22 +2136,22 @@ namespace eval tomlish::utils { # --------------------------------------- #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } # --------------------------------------- #check +,- only occur in the first position. - if {[::string last - $str] > 0} { + if {[tcl::string::last - $str] > 0} { return 0 } - if {[::string last + $str] > 0} { + if {[tcl::string::last + $str] > 0} { return 0 } - set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) - if {![string is integer -strict $numeric_value]} { + if {![tcl::string::is integer -strict $numeric_value]} { return 0 } #!todo - check bounds only based on some config value @@ -1575,7 +2173,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'float'. proc float_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { @@ -1595,7 +2193,7 @@ namespace eval tomlish::utils { return 1 } - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) #Toml spec also disallows leading zeros in the exponent part @@ -1603,12 +2201,12 @@ namespace eval tomlish::utils { #for now we will allow leading zeros in exponents #!todo - configure 'strict' option to disallow? #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] set r {([0-9])*} regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E set z {([0])*} regexp $z $intpart leadingzeros - if {[::string length $leadingzeros] > 1} { + if {[tcl::string::length $leadingzeros] > 1} { return 0 } #for floats, +,- may occur in multiple places @@ -1616,9 +2214,9 @@ namespace eval tomlish::utils { #!todo - check bounds ? #strip underscores for tcl double check - set check [::string map {_ ""} $str] + set check [tcl::string::map {_ ""} $str] #string is double accepts inf nan +NaN etc. - if {![::string is double $check]} { + if {![tcl::string::is double $check]} { return 0 } @@ -1631,7 +2229,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'datetime'. proc datetime_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { return 1 } else { @@ -1639,19 +2237,37 @@ namespace eval tomlish::utils { } } + #review - we proc is_datetime {str} { - #e.g 1979-05-27T00:32:00-07:00 + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart #!todo - what if the value is 'time only'? - - if {[catch {clock scan $datepart} err]} { - puts stderr "tcl clock scan failed err:'$err'" - return 0 - } + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + #!todo - verify time part is reasonable } else { return 0 @@ -1670,174 +2286,434 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state - # states: - # key-space, curly-space, array-space - # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... # # notes: - # key-space i # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack - # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax # #stateMatrix defines for each state, actions to take for each possible token. - #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. - #dual-element actions are a push command and the name of the space to push on the stack. - # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases - #test variable stateMatrix set stateMatrix [dict create] - dict set stateMatrix\ - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + #xxx-space vs xxx-syntax inadequately documented - TODO + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# dict set stateMatrix\ - curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ } - #REVIEW - #toml spec looks like heading towards allowing newlines within inline tables - #https://github.com/toml-lang/toml/issues/781 + #itable-space/ curly-syntax : itables dict set stateMatrix\ - curly-syntax {\ - whitespace "curly-syntax"\ - newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-ok"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ } dict set stateMatrix\ - value-expected {\ - whitespace "value-expected"\ - newline "err"\ - eof "err"\ - untyped-value "samespace"\ - startquote "string"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - comment "err"\ - comma "err"\ - startarray {pushspace array-space}\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ } + + # ' = ' portion of keyval dict set stateMatrix\ - array-space {\ - whitespace "array-space"\ - newline "array-space"\ - eof "err"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "array-space"\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ } + #### dict set stateMatrix\ - array-syntax {\ - whitespace "array-syntax"\ - newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "err"\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - - + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} dict set stateMatrix\ - itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + leading-squote-space {\ + squote_seq "POPSPACE"\ + } #dict set stateMatrix\ - # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + dict set stateMatrix\ - itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ - itablekeyval-space {} + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW dict set stateMatrix\ - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } dict set stateMatrix\ - keyval-space {} + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } dict set stateMatrix\ - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + dict set stateMatrix\ - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } dict set stateMatrix\ - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } dict set stateMatrix\ - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + dict set stateMatrix\ - baretablename {whitespace "NA" newline "err" equal "value-expected"} + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } dict set stateMatrix\ - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - end {} - - #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push - variable stateMatrix_orig { - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} - value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} - array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} - array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - keyval-space {} - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - end {} + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] tcl::dict::for {s transitions} $stateMatrix { tcl::dict::for {token transition_to} $transitions { - set action [lindex $transition_to 0] - switch -exact -- $action { - pushspace - zeropoppushspace { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { if {$token ni $push_trigger_tokens} { lappend push_trigger_tokens $token } @@ -1845,84 +2721,139 @@ namespace eval tomlish::parse { } } } - puts stdout "push_trigger_tokens: $push_trigger_tokens" - #!todo - hard code once stateMatrix finalised? + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. - #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail + array-space array-syntax } - variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - key-space tablename + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail - variable state_list + variable state_list ;#reset every tomlish::decode::toml namespace export tomlish toml namespace ensemble create - proc getNextState {tokentype currentstate} { + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state variable nest variable v - + + set prevstate $currentstate + + variable spacePopTransitions variable spacePushTransitions - variable last_space_action "none" - variable last_space_type "none" + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" variable state_list set result "" + set starttok "" if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { - popspace { + POPSPACE { spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + set last_space_action "pop" set last_space_type $type - - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" } else { - set next $target + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } } set result $next } - samespace { - #note the same data as popspace (spacePopTransitions) is used here. - set parent [spacestack peek] - ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" - lassign $parent type target - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" } else { - set next $target + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } } set result $next } zeropoppushspace { if {$nest > 0} { - #pop back down to the root level (key-space) + #pop back down to the root level (table-space) spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + set last_space_action "pop" set last_space_type $type @@ -1935,36 +2866,72 @@ namespace eval tomlish::parse { } #re-entrancy - #set next [list pushspace [lindex $transition_to 1]] + #set next [list PUSHSPACE [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" - set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] } - pushspace { - set target [lindex $transition_to 1] - spacestack push [list space $target] + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + set last_space_action "push" set last_space_type "space" - - #puts $::tomlish::parse::spacePushTransitions - if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { - set next [dict get $::tomlish::parse::spacePushTransitions $target] - ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" } else { - set next $target + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } } set result $next } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } default { - set result $transition_to + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word } } } else { - set result "nostate-err" - + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" } - lappend state_list $result - return $result + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] } proc report_line {{line ""}} { @@ -1988,7 +2955,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2023,12 +2990,13 @@ namespace eval tomlish::parse { proc _shortcircuit_startquotesequence {} { variable tok variable i - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { set_tokenType "startquote" incr i -1 return -level 2 1 } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" set_tokenType "startquote" set tok "\"" incr i -2 @@ -2036,8 +3004,81 @@ namespace eval tomlish::parse { } } - #return a list of 0 1 or 2 tokens + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + proc tok {s} { variable nest variable v @@ -2046,14 +3087,12 @@ namespace eval tomlish::parse { variable type ;#character type variable state ;#FSM - set resultlist [list] variable tokenType variable tokenType_list variable endToken - set sLen [::string length $s] variable lastChar @@ -2063,400 +3102,601 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof variable token_waiting - if {[dict size $token_waiting]} { - set tokenType [dict get $token_waiting type] - set tok [dict get $token_waiting tok] - dict unset token_waiting type - dict unset token_waiting tok + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] return 1 } #------------------------------ + set resultlist [list] + set sLen [tcl::string::length $s] + set slash_active 0 set quote 0 set c "" set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [string index $s [expr {$i - 1}]] + set lastChar [tcl::string::index $s [expr {$i - 1}]] } else { set lastChar "" } - set c [string index $s $i] + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" - incr i ;#must incr here because we do'returns'inside the loop + incr i ;#must incr here because we do returns inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" } whitespace { # hash marks end of whitespace token #do a return for the whitespace, set token_waiting - #dict set token_waiting type comment - #dict set token_waiting tok "" + #set_token_waiting type comment value "" complete 1 incr i -1 ;#leave comment for next run return 1 } - untyped-value { + untyped_value { #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? - #we will accept a comment marker as an immediate terminator of the untyped-value. + #we will accept a comment marker as an immediate terminator of the untyped_value. incr i -1 return 1 } + starttablename - starttablearrayname { + #fix! + error "Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } default { - #quotedkey, string, multistring + #quotedkey, itablequotedkey, string,literal, multistring append tok $c } } } else { - #$slash_active not relevant when no tokenType - #start of token if we're not in a token - set_tokenType comment - set tok "" ;#The hash is not part of the comment data + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } } } lc { - set multi_dquote "" ;#!! - #test jmn2024 #left curly brace - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - error "unexpected tablename problem" - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - switch -exact -- $state { - value-expected { - #switch last key to tablename?? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - multistring-space { - set_tokenType "stringpart" - if {$slash_active} { - set tok "\\\{" - } else { - set tok "\{" - } - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - array-space - array-syntax { - #nested anonymous inline table - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - default { - error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rc { - set multi_dquote "" ;#!! #right curly brace - try { - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endinlinetable - dict set token_waiting tok "" - return 1 - } - tablearrayname { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } - itablevaltail { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - } - default { - #end any other token - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" - - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - curly-syntax - curly-space { - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - array-syntax - array-space { - #invalid - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - itablevaltail { - set_tokenType "endinlinetable" - set tok "" - #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 - incr i -1 - return 1 - } - itablekeyval-syntax { - error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" - } - default { - #JMN2024b keytail? - error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" - } + startquotesequence { + _shortcircuit_startquotesequence } - } - } on error {em} { - error $em - } finally { - set slash_active 0 - } - - } - lb { - set multi_dquote "" ;#!! - #left square bracket - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart { - if {$slash_active} {append tok "\\"} - append tok $c - } - starttablename { - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - comment { - if {$slash_active} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - set_tokenType "startarray" - set tok "\[" - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } - key-space { - #table name - #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray - #note that a starttablearrayname token may contain whitespace between the brackets - # e.g \[ \[ - set_tokenType "starttablename" - set tok "" ;#there is no output into the tomlish list for this token - } - array-space - array-syntax { - #nested array? - set_tokenType "startarray" - set tok "\[" - return 1 - #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } } - default { - error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } rb { - set multi_dquote "" ;#!! #right square bracket - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } - tablearraynames { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 - } - default { + } else { incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 } } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endarray" - set tok "\]" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endarray" - set tok "\]" - return 1 + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endtablename" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endtablearray" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - array-syntax - array-space { - set_tokenType "endarray" - set tok "\]" - return 1 - } - default { - error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } bsl { set dquotes $multi_dquote set multi_dquote "" ;#!! #backslash - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - litstring - multilitstring - comment - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2474,13 +3714,15 @@ namespace eval tomlish::parse { set slash_active 1 } } - whitespace { - if {$state eq "multistring-space"} { - #end whitespace token - incr i -1 - return 1 + starttablename - starttablearrayname { + error "backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" } else { - error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + set slash_active 1 } } barekey { @@ -2491,206 +3733,448 @@ namespace eval tomlish::parse { } } } else { - if {$state eq "multistring-space"} { - set slash_active 1 - } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } } - dq { - #double quote - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { append tok $c - } elseif {$toklen == 2} { + if {$existingtoklen > 2} { + error "tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { append tok $c - set_tokenType "startmultiquote" - return 1 - } else { - error "unexpected token length in 'startquotesequence'" + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" } } - endquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { append tok $c - } elseif {$toklen == 2} { + } + 2 { + #switch? append tok $c - set_tokenType "endmultiquote" + set_tokenType triple_squote return 1 - } else { - error "unexpected token length in 'endquotesequence'" + } + default { + error "unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } - string { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 return 1 } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "unexpected _start_squote_sequence length $toklen" + } } - stringpart { - #sub element of multistring - if {$slash_active} { - append tok "\\" - append tok $c + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + append multi_dquote "\"" } } - whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } - } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - #append multi_dquote "\"" + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } } } - default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 - } } - } - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - quotedkey - itablequotedkey { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 } } - tablename - tablearrayname { - if {$slash_active} {append tok "\\"} + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" append tok $c - } - starttablename - starttablearrayname { - incr i -1 ;## + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } - default { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" - } } - } else { - #$slash_active not relevant when no tokenType - #token is string only if we're expecting a value at this point - switch -exact -- $state { - value-expected - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - multistring-space { - #REVIEW + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 } else { append multi_dquote "\"" } } - key-space { - set tokenType startquote - set tok $c - return 1 - } - curly-space { - set tokenType startquote - set tok $c - return 1 - } - tablename - tablearrayname { - set_tokenType $state - set tok $c - } - default { - error "Unexpected quote during state '$state' [tomlish::parse::report_line]" - } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } = { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + if {$had_slash} {append tok "\\"} append tok $dquotes$c } whitespace { - dict set token_waiting type equal - dict set token_waiting tok = - return 1 + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } } barekey { - dict set token_waiting type equal - dict set token_waiting tok = + #set_token_waiting type equal value = complete 1 + incr i -1 return 1 } + starttablename - starttablearrayname { + error "Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } default { error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" } @@ -2698,11 +4182,24 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok ${dquotes}= + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 } default { - set_tokenType equal + set_tokenType "equal" set tok = return 1 } @@ -2710,19 +4207,47 @@ namespace eval tomlish::parse { } } cr { + #REVIEW! set dquotes $multi_dquote set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warning "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } stringpart { append tok $dquotes$c } + starttablename - starttablearrayname { + error "Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } default { #!todo - error out if cr inappropriate for tokenType append tok $c @@ -2731,24 +4256,46 @@ namespace eval tomlish::parse { } else { #lf may be appended if next #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) - set_tokenType newline + set_tokenType "newline" set tok cr } } lf { + # \n newline set dquotes $multi_dquote set multi_dquote "" ;#!! - # \n newline - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } newline { + #review #this lf is the trailing part of a crlf - append tok lf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } stringpart { @@ -2757,11 +4304,20 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf - return 1 + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } } } + starttablename - tablename - tablearrayname - starttablearrayname { + error "Character is invalid in $tokenType. [tomlish::parse::report_line]" + } default { #newline ends all other tokens. #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) @@ -2770,60 +4326,130 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } } else { - set had_slash $slash_active - set slash_active 0 - if {$had_slash} { - set_tokenType "continuation" - set tok "\\" - incr i -1 - return 1 - } else { - set_tokenType newline - set tok lf - return 1 + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} } } , { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - tablename - tablearrayname { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } default { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "," + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," } multiliteral-space { - set_tokenType literalpart + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" set tok "," } default { - set_tokenType comma + set_tokenType "comma" set tok "," return 1 } @@ -2831,47 +4457,106 @@ namespace eval tomlish::parse { } } . { + set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - string - stringpart - comment - quotedkey - untyped-value { + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" append tok $c } - baretablename - tablename - tablearrayname { + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { #subtable - split later - review append tok $c } barekey { + #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review - switch_tokenType tablename - incr i -1 - - #error "barekey period unimplemented" + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 } default { - error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" - #dict set token_waiting type period - #dict set token_waiting tok "." + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 #return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "." + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." } multiliteral-space { - set_tokenType literalpart + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" set tok "." + return 1 } default { - set_tokenType untyped-value + set_tokenType "untyped_value" set tok "." } } @@ -2881,24 +4566,38 @@ namespace eval tomlish::parse { " " { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { + #todo had_slash - emit token or error #whitespace is a terminator for bare keys - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - untyped-value { + untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } @@ -2906,37 +4605,45 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $c - } - quotedkey - string { - if {$had_slash} { - append tok "\\" - } - #if {$dquotes eq "\""} { - #} - append tok $c + append tok $dquotes$c } - whitespace { + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } append tok $c } stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) if {$had_slash} { #REVIEW + #emit the stringpart - go back to the slash incr i -2 return 1 } else { #split into STRINGPART aaa WS " " - #keeping WS separate allows easier processing of CONT stripping append tok $dquotes incr i -1 return 1 } } - starttablename { - incr i -1 - return 1 + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } } - starttablearrayname { + starttablename - starttablearrayname { incr i -1 return 1 } @@ -2951,19 +4658,21 @@ namespace eval tomlish::parse { } } else { set had_slash $slash_active - if {$slash_active} { - set slash_active 0 - } + set slash_active 0 switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - if {$had_slash} { - set tok "\\$c" - } else { - set tok $c - } + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c } multistring-space { if {$had_slash} { @@ -2976,12 +4685,16 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok $dquotes incr i -1 - return + return 1 } set_tokenType "whitespace" append tok $c } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { if {$had_slash} { error "unexpected backslash [tomlish::parse::report_line]" @@ -2996,35 +4709,58 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { #whitespace is a terminator for bare keys incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 return 1 } untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - quotedkey { + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { append tok $c } string - comment - whitespace { append tok $c } stringpart { - append tok $dquotes$c + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c } starttablename - starttablearrayname { incr i -1 @@ -3045,10 +4781,14 @@ namespace eval tomlish::parse { set slash_active 0 } switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname set tok $c } multistring-space { @@ -3069,6 +4809,10 @@ namespace eval tomlish::parse { } } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "whitespace" append tok $c @@ -3078,27 +4822,77 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - set_tokenType "bom" - set tok "\uFEFF" - return 1 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } } default { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - endquotesequence { - puts stderr "endquotesequence: $tok" + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } whitespace { - incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. - return 1 + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } } barekey { if {[tomlish::utils::is_barekey $c]} { @@ -3116,7 +4910,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3124,7 +4918,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - key-space - curly-space - curly-syntax { + table-space - itable-space { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -3133,6 +4927,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } multistring-space { set_tokenType "stringpart" if {$had_slash} { @@ -3142,16 +4945,25 @@ namespace eval tomlish::parse { set tok $dquotes$c } } - tablename { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { set_tokenType "tablename" set tok $c } - tablearrayname { + tablearrayname-state { set_tokenType "tablearrayname" set tok $c } + dottedkey-space { + set_tokenType barekey + set tok $c + } default { - set_tokenType "untyped-value" + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" set tok $c } } @@ -3162,32 +4974,48 @@ namespace eval tomlish::parse { } #run out of characters (eof) - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { #check for invalid ending tokens - #if {$state eq "err"} { + #if {$state eq "err-state"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - eror "eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" - return 1 + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } } } - dict set token_waiting type "eof" - dict set token_waiting tok "eof" + set_token_waiting type eof value eof complete 1 startindex $i ;#review return 1 } else { - ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } @@ -3312,7 +5140,7 @@ if {$argc > 0} { puts stderr "argc: $argc args: $argv" if {($argc == 1)} { - if {[::string tolower $argv] in {help -help h -h}} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { puts stdout "Usage: -app where appname one of:[tomlish::appnames]" exit 0 } else { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl b/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl index 9edd90b0..24206ba7 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl @@ -13,7 +13,7 @@ namespace eval ::punkmake { variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] variable non_help_flags [list -k] variable help_flags [list -help --help /?] - variable known_commands [list project get-project-info shell vendorupdate bootsupport vfscommonupdate] + variable known_commands [list project modules info check shell vendorupdate bootsupport vfscommonupdate] } if {"::try" ni [info commands ::try]} { puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" @@ -21,7 +21,7 @@ if {"::try" ni [info commands ::try]} { } #------------------------------------------------------------------------------ -#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#Module loading from src/bootsupport or [pwd]/modules if pwd is a 'src' folder #------------------------------------------------------------------------------ #If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files # - then it will attempt to preference these modules @@ -35,8 +35,10 @@ if {[file exists [file join $startdir src bootsupport]]} { set bootsupport_mod [file join $startdir bootsupport modules] set bootsupport_lib [file join $startdir bootsupport lib] } -if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + +set package_paths_modified 0 +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list set original_auto_path $::auto_path @@ -63,8 +65,18 @@ if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { } - if {[file exists [pwd]/modules]} { - tcl::tm::add [pwd]/modules + #we deliberately don't use [pwd]/modules because commonly the launch dir may be the project dir. + #The /modules are the very modules we are building - and may be in a broken state, which make.tcl then can't fix. + if {[file tail $startdir] eq "src"} { + if {[file exists $startdir/modules]} { + #launch from /modules /lib etc." \n \n + append h " $scriptname modules" \n + append h " - build modules from src/modules etc without scanning src/runtime and src/vfs folders to build kit/zipkit executables" \n \n append h " $scriptname bootsupport" \n - append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n + append h " - bootsupport modules are available to make.tcl" \n \n append h " $scriptname vendorupdate" \n - append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n append h " $scriptname vfscommonupdate" \n - append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib" \n \n - append h " $scriptname get-project-info" \n - append h " - show the name and base folder of the project to be built" \n + append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib etc" \n + append h " - before calling this (followed by make project) - you can test using '(.exe) dev'" \n + append h " this will load modules from your /module /lib paths instead of from the kit/zipkit" \n \n + append h " $scriptname info" \n + append h " - show the name and base folder of the project to be built" \n append h "" \n if {[llength $::punkmake::pkg_missing]} { append h "* ** NOTE ** ***" \n @@ -220,12 +232,68 @@ if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} } set sourcefolder $projectroot/src +if {$::punkmake::command eq "check"} { + puts stdout "- tcl::tm::list" + foreach fld [tcl::tm::list] { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + puts stdout "- auto_path" + foreach fld $::auto_path { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + set v [package require punk::mix::base] + puts stdout "punk::mix::base version $v\n[package ifneeded punk::mix::base $v]" + exit 0 +} + +if {$package_paths_modified} { + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] +} + + -if {$::punkmake::command eq "get-project-info"} { - puts stdout "- -- --- --- --- --- --- --- --- --- ---" - puts stdout "- -- get-project-info -- -" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" +if {$::punkmake::command eq "info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" + puts stdout "- -- info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" puts stdout "- projectroot : $projectroot" + set sourcefolder $projectroot/src + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + puts stdout "- vendorlib folders: ([llength $vendorlibfolders])" + foreach fld $vendorlibfolders { + puts stdout " src/$fld" + } + puts stdout "- vendormodule folders: ([llength $vendormodulefolders])" + foreach fld $vendormodulefolders { + puts stdout " src/$fld" + } + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] + puts stdout "- source module paths: [llength $source_module_folderlist]" + foreach fld $source_module_folderlist { + puts stdout " $fld" + } + set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] + lappend projectlibfolders lib + puts stdout "- source libary paths: [llength $projectlibfolders]" + foreach fld $projectlibfolders { + puts stdout " src/$fld" + } if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { set vc "fossil" set rev [punk::repo::fossil_revision $scriptfolder] @@ -241,8 +309,11 @@ if {$::punkmake::command eq "get-project-info"} { } puts stdout "- version control : $vc" puts stdout "- revision : $rev" - puts stdout "- remote : $rem" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- remote" + foreach ln [split $rem \n] { + puts stdout " $ln" + } + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" exit 0 } @@ -564,7 +635,7 @@ if {$::punkmake::command eq "bootsupport"} { -if {$::punkmake::command ne "project"} { +if {$::punkmake::command ni {project modules}} { puts stderr "Command $::punkmake::command not implemented - aborting." flush stderr after 100 @@ -803,6 +874,19 @@ if {[punk::repo::is_fossil_root $projectroot]} { $installer destroy } +if {$::punkmake::command ne "project"} { + #command = modules + puts stdout "vfs folders not checked" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into base vfs folder" + puts stdout " - use 'make.tcl project' to build executable kits/zipkits from vfs folders as well if you have runtimes installed" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" + puts stdout "-done-" + exit 0 +} + + set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] if {$buildfolder ne "$sourcefolder/_build"} { puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" @@ -832,10 +916,12 @@ if {![llength $runtimes]} { exit 0 } +set has_sdx 1 if {[catch {exec sdx help} errM]} { puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" puts stderr "err: $errM" - exit 1 + #exit 1 + set has_sdx 0 } # -- --- --- --- --- --- --- --- --- --- @@ -1025,6 +1111,8 @@ foreach runtimefile $runtimes { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set failed_kits [list] set installed_kits [list] +set skipped_kits [list] +set skipped_kit_installs [list] proc ::make_file_traversal_error {args} { error "file_traverse error: $args" @@ -1304,30 +1392,39 @@ foreach vfstail $vfs_tails { } } kit { - if {[catch { - if {$rtname ne "-"} { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose - } else { - exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose - } - } result]} { - if {$rtname ne "-"} { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" - } else { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" - } - puts stderr "sdx wrap $targetkit failed" - lappend failed_kits [list kit $targetkit reason $sdxmsg] + if {!$has_sdx} { + puts stderr "no sdx available to wrap $targetkit" + lappend failed_kits [list kit $targetkit reason "sdx_executable_unavailable"] $vfs_event targetset_end FAILED $vfs_event destroy $vfs_installer destroy continue - } else { - puts stdout "ok - finished sdx" - set separator [string repeat = 40] - puts stdout $separator - puts stdout $result - puts stdout $separator + } else { + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose + } + } result]} { + if {$rtname ne "-"} { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" + } else { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" + } + puts stderr "sdx wrap $targetkit failed" + lappend failed_kits [list kit $targetkit reason $sdxmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } } } } @@ -1435,6 +1532,7 @@ foreach vfstail $vfs_tails { set skipped_vfs_build 1 puts stderr "." puts stdout "Skipping build for vfs $vfstail with runtime $rtname - no change detected" + lappend skipped_kits [list kit $targetkit reason "no change detected"] $vfs_event targetset_end SKIPPED } $vfs_event destroy @@ -1489,6 +1587,7 @@ foreach vfstail $vfs_tails { set skipped_kit_install 1 puts stderr "." puts stdout "Skipping kit install for $targetkit with vfs $vfstail runtime $rtname - no change detected" + lappend skipped_kit_installs [list kit $targetkit reason "no change detected"] $bin_event targetset_end SKIPPED } $bin_event destroy @@ -1510,8 +1609,21 @@ if {[llength $failed_kits]} { punk::lib::showdict -channel stderr -roottype list $failed_kits */@*.@* #puts stderr [join $failed_kits \n] } - -puts stdout "done" +set had_kits [expr {[llength $installed_kits] || [llength $failed_kits] || [llength $skipped_kits]}] +if {$had_kits} { + puts stdout " module builds and kit/zipkit builds processed (vfs config: src/runtime/mapvfs.config)" + puts stdout " - use 'make.tcl modules' to build modules without scanning/building the vfs folders into executable kits/zipkits" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into the base vfs folder" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" +} else { + puts stdout " module builds processed" + puts stdout "" + puts stdout " If kit/zipkit based executables required - create src/vfs/.vfs folders containing lib,modules,modules_tcl9 etc folders" + puts stdout " Also ensure appropriate executables exist in src/runtime along with src/runtime/mapvfs.config" +} +puts stdout "-done-" exit 0 diff --git a/src/project_layouts/vendor/punk/project-0.1/src/make.tcl b/src/project_layouts/vendor/punk/project-0.1/src/make.tcl index 7f1c661a..24206ba7 100644 --- a/src/project_layouts/vendor/punk/project-0.1/src/make.tcl +++ b/src/project_layouts/vendor/punk/project-0.1/src/make.tcl @@ -13,7 +13,7 @@ namespace eval ::punkmake { variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] variable non_help_flags [list -k] variable help_flags [list -help --help /?] - variable known_commands [list project get-project-info shell vendorupdate bootsupport] + variable known_commands [list project modules info check shell vendorupdate bootsupport vfscommonupdate] } if {"::try" ni [info commands ::try]} { puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" @@ -21,7 +21,7 @@ if {"::try" ni [info commands ::try]} { } #------------------------------------------------------------------------------ -#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#Module loading from src/bootsupport or [pwd]/modules if pwd is a 'src' folder #------------------------------------------------------------------------------ #If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files # - then it will attempt to preference these modules @@ -35,8 +35,10 @@ if {[file exists [file join $startdir src bootsupport]]} { set bootsupport_mod [file join $startdir bootsupport modules] set bootsupport_lib [file join $startdir bootsupport lib] } -if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + +set package_paths_modified 0 +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list set original_auto_path $::auto_path @@ -63,8 +65,18 @@ if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { } - if {[file exists [pwd]/modules]} { - tcl::tm::add [pwd]/modules + #we deliberately don't use [pwd]/modules because commonly the launch dir may be the project dir. + #The /modules are the very modules we are building - and may be in a broken state, which make.tcl then can't fix. + if {[file tail $startdir] eq "src"} { + if {[file exists $startdir/modules]} { + #launch from /modules /lib etc." \n \n + append h " $scriptname modules" \n + append h " - build modules from src/modules etc without scanning src/runtime and src/vfs folders to build kit/zipkit executables" \n \n append h " $scriptname bootsupport" \n - append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n + append h " - bootsupport modules are available to make.tcl" \n \n append h " $scriptname vendorupdate" \n - append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n - append h " $scriptname get-project-info" \n - append h " - show the name and base folder of the project to be built" \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " $scriptname vfscommonupdate" \n + append h " - update the src/vfs/_vfscommon from compiled src/modules and src/lib etc" \n + append h " - before calling this (followed by make project) - you can test using '(.exe) dev'" \n + append h " this will load modules from your /module /lib paths instead of from the kit/zipkit" \n \n + append h " $scriptname info" \n + append h " - show the name and base folder of the project to be built" \n append h "" \n if {[llength $::punkmake::pkg_missing]} { append h "* ** NOTE ** ***" \n @@ -217,12 +232,68 @@ if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} } set sourcefolder $projectroot/src +if {$::punkmake::command eq "check"} { + puts stdout "- tcl::tm::list" + foreach fld [tcl::tm::list] { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + puts stdout "- auto_path" + foreach fld $::auto_path { + if {[file exists $fld]} { + puts stdout " $fld" + } else { + puts stdout " $fld (not present)" + } + } + set v [package require punk::mix::base] + puts stdout "punk::mix::base version $v\n[package ifneeded punk::mix::base $v]" + exit 0 +} + +if {$package_paths_modified} { + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] +} + -if {$::punkmake::command eq "get-project-info"} { - puts stdout "- -- --- --- --- --- --- --- --- --- ---" - puts stdout "- -- get-project-info -- -" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + +if {$::punkmake::command eq "info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" + puts stdout "- -- info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" puts stdout "- projectroot : $projectroot" + set sourcefolder $projectroot/src + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + puts stdout "- vendorlib folders: ([llength $vendorlibfolders])" + foreach fld $vendorlibfolders { + puts stdout " src/$fld" + } + puts stdout "- vendormodule folders: ([llength $vendormodulefolders])" + foreach fld $vendormodulefolders { + puts stdout " src/$fld" + } + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] + puts stdout "- source module paths: [llength $source_module_folderlist]" + foreach fld $source_module_folderlist { + puts stdout " $fld" + } + set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] + lappend projectlibfolders lib + puts stdout "- source libary paths: [llength $projectlibfolders]" + foreach fld $projectlibfolders { + puts stdout " src/$fld" + } if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { set vc "fossil" set rev [punk::repo::fossil_revision $scriptfolder] @@ -238,8 +309,11 @@ if {$::punkmake::command eq "get-project-info"} { } puts stdout "- version control : $vc" puts stdout "- revision : $rev" - puts stdout "- remote : $rem" - puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- remote" + foreach ln [split $rem \n] { + puts stdout " $ln" + } + puts stdout "- -- --- --- --- --- --- --- --- --- -- -" exit 0 } @@ -253,6 +327,41 @@ if {$::punkmake::command eq "shell"} { exit 1 } +if {$::punkmake::command eq "vfscommonupdate"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + puts stdout "Updating vfs/_vfscommon" + + puts stdout "REPLACE src/vfs/_vfscommon/* with project's modules and libs?? y|n" + if {[gets stdin] eq "y"} { + puts proceeding... + + proc vfscommonupdate {projectroot} { + file delete -force $projectroot/src/vfs/_vfscommon/modules + file copy $projectroot/modules $projectroot/src/vfs/_vfscommon/ + #temp? (avoid zipfs mkimg windows dotfile bug) + file delete $projectroot/src/vfs/_vfscommon/modules/.punkcheck + + file delete -force $projectroot/src/vfs/_vfscommon/lib + file copy $projectroot/lib $projectroot/src/vfs/_vfscommon/ + #temp? + file delete $projectroot/src/vfs/_vfscommon/lib/.punkcheck + + } + vfscommonupdate $projectroot + + } else { + puts aborting... + } + + + + puts stdout "\nvfscommonupdate done " + flush stderr + flush stdout + ::exit 0 +} + if {$::punkmake::command eq "vendorupdate"} { puts "projectroot: $projectroot" puts "script: [info script]" @@ -295,18 +404,22 @@ if {$::punkmake::command eq "vendorupdate"} { puts stderr "Unable to use punkcheck for vendormodules$which update. Error: $errM" set installation_event "" } - foreach {relpath module} $local_modules { - set module [string trim $module :] - set module_subpath [string map {:: /} [namespace qualifiers $module]] + foreach {relpath requested_module} $local_modules { + set requested_module [string trim $requested_module :] + set module_subpath [string map {:: /} [namespace qualifiers $requested_module]] set srclocation [file join $projectroot $relpath $module_subpath] #puts stdout "$relpath $module $module_subpath $srclocation" - set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + + #todo - check if requested_module has version extension and allow explicit versions instead of just latest + #allow modulename-* literal in .config to request all versions + + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $requested_module]-*] #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 if {![llength $pkgmatches]} { - puts stderr "Missing local source for vendor module $module - not found in $srclocation" + puts stderr "Missing local source for requested vendor module $requested_module - not found in $srclocation" continue } - set latestfile [lindex $pkgmatches 0] + set latestfile [lindex $pkgmatches 0] ;#default set latestver [lindex [split [file rootname $latestfile] -] 1] foreach m $pkgmatches { lassign [split [file rootname $m] -] _pkg ver @@ -316,6 +429,7 @@ if {$::punkmake::command eq "vendorupdate"} { set latestfile $m } } + set srcfile [file join $srclocation $latestfile] set tgtfile [file join $targetroot $module_subpath $latestfile] if {$installation_event ne ""} { @@ -521,7 +635,7 @@ if {$::punkmake::command eq "bootsupport"} { -if {$::punkmake::command ne "project"} { +if {$::punkmake::command ni {project modules}} { puts stderr "Command $::punkmake::command not implemented - aborting." flush stderr after 100 @@ -536,7 +650,6 @@ if {$::punkmake::command ne "project"} { set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] lappend vendorlibfolders vendorlib - foreach lf $vendorlibfolders { if {[file exists $sourcefolder/$lf]} { lassign [split $lf _] _vm tclx @@ -547,7 +660,6 @@ foreach lf $vendorlibfolders { } set target_lib_folder $projectroot/lib$which file mkdir $projectroot/lib$which - #exclude README.md from source folder - but only the root one #-antiglob_paths takes relative patterns e.g # */test.txt will only match test.txt exactly one level deep. @@ -556,7 +668,6 @@ foreach lf $vendorlibfolders { set antipaths [list\ README.md\ ] - puts stdout "VENDORLIB$which: copying from $sourcefolder/$lf to $target_lib_folder (if source file changed)" set resultdict [punkcheck::install $sourcefolder/$lf $target_lib_folder -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] puts stdout [punkcheck::summarize_install_resultdict $resultdict] @@ -658,6 +769,34 @@ foreach layoutbase $layout_bases { } } ######################################################## +set projectlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails lib_tcl*] +lappend projectlibfolders lib +foreach lf $projectlibfolders { + if {[file exists $sourcefolder/$lf]} { + lassign [split $lf _] _vm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + set target_lib_folder $projectroot/lib$which + file mkdir $projectroot/lib$which + #exclude README.md from source folder - but only the root one + #-antiglob_paths takes relative patterns e.g + # */test.txt will only match test.txt exactly one level deep. + # */*/*.foo will match any path ending in .foo that is exactly 2 levels deep. + # **/test.txt will match at any level below the root (but not in the root) + set antipaths [list\ + README.md\ + ] + puts stdout "PROJECTLIB$which: copying from $sourcefolder/$lf to $target_lib_folder (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/$lf $target_lib_folder -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + } +} +if {![llength $projectlibfolders]} { + puts stderr "PROJECTLIB: No src/lib or src/lib_tcl* folder found." +} #consolidated /modules /modules_tclX folder used for target where X is tcl major version #the make process will process for any _tclX not just the major version of the current interpreter @@ -735,6 +874,19 @@ if {[punk::repo::is_fossil_root $projectroot]} { $installer destroy } +if {$::punkmake::command ne "project"} { + #command = modules + puts stdout "vfs folders not checked" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into base vfs folder" + puts stdout " - use 'make.tcl project' to build executable kits/zipkits from vfs folders as well if you have runtimes installed" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" + puts stdout "-done-" + exit 0 +} + + set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] if {$buildfolder ne "$sourcefolder/_build"} { puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" @@ -764,10 +916,12 @@ if {![llength $runtimes]} { exit 0 } +set has_sdx 1 if {[catch {exec sdx help} errM]} { puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" puts stderr "err: $errM" - exit 1 + #exit 1 + set has_sdx 0 } # -- --- --- --- --- --- --- --- --- --- @@ -806,31 +960,26 @@ if {[file exists $mapfile]} { lappend missing $runtime } } - foreach vfspair $vfs_specs { - switch -- [llength $vfspair] { - 1 { - set vfs [lindex $vfspair 0] - if {![file isdirectory [file join $sourcefolder $vfs]]} { - puts stderr "WARNING: Missing vfs folder [file join $sourcefolder $vfs] specified in mapvfs.config for runtime $runtime" - lappend missing $vfs - } else { - set appname [file rootname $vfs] - dict lappend vfs_runtime_map $vfs [list $runtime $appname] - } - } - 2 { - lassign $vfspair vfs appname - if {![file isdirectory [file join $sourcefolder $vfs]]} { - puts stderr "WARNING: Missing vfs folder [file join $sourcefolder $vfs] specified in mapvfs.config for runtime $runtime" - lappend missing $vfs + foreach vfsconfig $vfs_specs { + switch -- [llength $vfsconfig] { + 1 - 2 - 3 { + lassign $vfsconfig vfstail appname kit_type + if {![file isdirectory [file join $sourcefolder vfs $vfstail]]} { + puts stderr "WARNING: Missing vfs folder [file join $sourcefolder vfs $vfstail] specified in mapvfs.config for runtime $runtime" + lappend missing $vfstail } else { - dict lappend vfs_runtime_map $vfs [list $runtime $appname] + if {$appname eq ""} { + set appname [file rootname $vfstail] + } + dict lappend vfs_runtime_map $vfstail [list $runtime $appname $kit_type] } } default { - puts stderr "bad entry in mapvfs.config - expected each entry after the runtime name to be of length 1 or length 2. got: $vfspair ([llength $vfspair])" + puts stderr "bad entry in mapvfs.config - expected each entry after the runtime name to be of length 1 or length 2. got: $vfsconfig ([llength $vfsconfig])" } } + + } if {[dict exists $runtime_vfs_map $runtime]} { puts stderr "CONFIG FILE ERROR. runtime: $runtime was specified more than once in $mapfile." @@ -881,23 +1030,30 @@ foreach runtime [dict keys $runtime_vfs_map] { puts -nonewline stdout $caps exit 0 } - lassign [punk::lib::invoke [list $rtfolder/$runtime <<$capscript]] stdout stderr exitcode - if {$exitcode == 0} { - dict set runtime_caps $runtime $stdout + #invoke can fail if runtime not an executable file for the current platform + if {![catch { + lassign [punk::lib::invoke [list $rtfolder/$runtime <<$capscript]] stdout stderr exitcode + } errM]} { + if {$exitcode == 0} { + dict set runtime_caps $runtime $stdout + } + dict set runtime_caps $runtime exitcode $exitcode + } else { + dict set runtime_caps $runtime exitcode -1 error "launch-fail" } } puts stdout "Runtime capabilities:" punk::lib::pdict runtime_caps -set vfs_folders [glob -nocomplain -dir $sourcefolder -types d -tail *.vfs] +set vfs_tails [glob -nocomplain -dir $sourcefolder/vfs -types d -tail *.vfs] #add any extra .vfs folders found in runtime/mapvfs.config file (e.g myotherruntimes/something.vfs) -dict for {vfs -} $vfs_runtime_map { - if {$vfs ni $vfs_folders} { - lappend vfs_folders $vfs +dict for {vfstail -} $vfs_runtime_map { + if {$vfstail ni $vfs_tails} { + lappend vfs_tails $vfstail } } -if {![llength $vfs_folders]} { - puts stdout "No .vfs folders found at '$sourcefolder' - no kits to build" +if {![llength $vfs_tails]} { + puts stdout "No .vfs folders found at '$sourcefolder/vfs' - no kits to build" puts stdout " -done- " exit 0 } @@ -955,13 +1111,66 @@ foreach runtimefile $runtimes { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set failed_kits [list] set installed_kits [list] -# -# loop over vfs_folders and for each one, loop over configured (or matching) runtimes - build with sdx if source .vfs or source runtime exe has changed. -# we are using punkcheck to install result to buildfolder so we create a .punkcheck file at the target folder to store metadata. -# punkcheck allows us to not rely purely on timestamps (which may be unreliable) -# +set skipped_kits [list] +set skipped_kit_installs [list] + +proc ::make_file_traversal_error {args} { + error "file_traverse error: $args" +} +proc merge_over {sourcedir targetdir} { + package require fileutil + package require fileutil::traverse + package require control + + if {![file exists $sourcedir]} { + puts stderr "merge_over sourcedir '$sourcedir' not found" + return + } + if {![file exists $targetdir]} { + puts stderr "merge_over targetdir '$targetdir' not found - target folder must already exist" + return + } + puts stdout "merge vfs $sourcedir over $targetdir STARTING" + + #The tails should be unique enough for clarity in progress emissions to stdout + set sourcename [file tail $sourcedir] + set targetname [file tail $targetdir] + + set t [fileutil::traverse %AUTO% $sourcedir -errorcmd ::make_file_traversal_error] + set last_type "-" + $t foreach file_or_dir { + set relpath [fileutil::stripPath $sourcedir $file_or_dir] + set target [file join $targetdir $relpath] + set this_type [file type $file_or_dir] + switch -exact -- $this_type { + directory { + if {$last_type ne "directory"} { + puts -nonewline stdout \n + } + if {![file exists $target]} { + #puts stdout "-- mkdir $target" + puts stdout "$sourcename -> $targetname mkdir $relpath" + file mkdir $target + file mtime $target [file mtime $file_or_dir] + } else { + puts stdout "$sourcename -> $targetname existing dir $relpath" + } + } + file { + puts -nonewline stdout "." + file copy -force $file_or_dir $target + } + default { + puts stderr "merge vfs $sourcedir !!! unhandled file type $this_type !!!" + } + } + set last_type $this_type + } + $t destroy + puts stdout "\nmerge vfs $sourcedir over $targetdir done." +} set startdir [pwd] -puts stdout "Found [llength $vfs_folders] .vfs folders - building executable for each..." +puts stdout "Found [llength $vfs_tails] .vfs folders - checking each for executables that may need to be built" cd [file dirname $buildfolder] #root folder mtime is insufficient for change detection. Tree mtime of folders only is a barely passable mechanism for vfs change detection in some circumstances - e.g if files added/removed but never edited in place #a hash of full tree file & dir mtime may be more reasonable - but it remains to be seen if just tar & checksum is any/much slower. @@ -969,11 +1178,18 @@ cd [file dirname $buildfolder] #Using first mtime encountered that is later than target is another option - but likely to be highly variable in speed. Last file in the tree could happen to be the latest, and this mechanism doesn't handle build on reversion to older source. set exe_names_seen [list] set path_cksum_cache [dict create] -foreach vfs $vfs_folders { +dict set path_cksum_cache {*}[punk::mix::base::lib::get_relativecksum_from_base $basedir $sourcefolder/vfs/_vfscommon] + +# +# loop over vfs_tails and for each one, loop over configured (or matching) runtimes - build with sdx or zipfs if source .vfs or source runtime exe has changed. +# we are using punkcheck to install result to buildfolder so we create a .punkcheck file at the target folder to store metadata. +# punkcheck allows us to not rely purely on timestamps (which may be unreliable) +# +foreach vfstail $vfs_tails { - set vfsname [file rootname $vfs] - puts stdout " Processing vfs $sourcefolder/$vfs" + set vfsname [file rootname $vfstail] puts stdout " ------------------------------------" + puts stdout " checking vfs $sourcefolder/vfs/$vfstail for configured runtimes" set skipped_vfs_build 0 # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set basedir $buildfolder @@ -981,14 +1197,12 @@ foreach vfs $vfs_folders { -make-step build_vfs\ ] - #e.g punk87.vfs {cksum xxxx cksum_all_opts {-cksum_content 1 ... -cksum_algorithm sha1}} - dict set path_cksum_cache {*}[punk::mix::base::lib::get_relativecksum_from_base $basedir $sourcefolder/$vfs] set runtimes [list] - if {[dict exists $vfs_runtime_map $vfs]} { - #set runtimes [dict get $vfs_runtime_map $vfs] + if {[dict exists $vfs_runtime_map $vfstail]} { + #set runtimes [dict get $vfs_runtime_map $vfstail] #runtimes in vfs_runtime_map_vfs dict are unsuffixed (.exe stripped or was not present) - set applist [dict get $vfs_runtime_map $vfs] + set applist [dict get $vfs_runtime_map $vfstail] foreach rt_app $applist { lappend runtimes [lindex $rt_app 0] } @@ -1004,7 +1218,7 @@ foreach vfs $vfs_folders { } } else { #only match this vfs to a correspondingly named runtime if there was no explicit entry for that runtime - set matchrt [file rootname [file tail $vfs]] ;#e.g project.vfs -> project + set matchrt [file rootname [file tail $vfstail]] ;#e.g project.vfs -> project if {![dict exists $runtime_vfs_map $matchrt]} { if {"windows" eq $::tcl_platform(platform)} { if {[file exists $rtfolder/$matchrt.exe]} { @@ -1017,7 +1231,7 @@ foreach vfs $vfs_folders { } #assert $runtimes is a list of executable names suffixed with .exe if on windows - whether or not specified with .exe in the mapvfs.config - puts " vfs: [file tail $vfs] runtimes to process: $runtimes" + puts " vfs: $vfstail runtimes to process ([llength $runtimes]): $runtimes" #todo - non kit based - zipkit? # $runtimes may now include a dash entry "-" (from mapvfs.config file) foreach runtime_fullname $runtimes { @@ -1032,13 +1246,16 @@ foreach vfs $vfs_folders { if {[dict exists $runtime_vfs_map $rtname]} { set applist [dict get $runtime_vfs_map $rtname] foreach vfs_app $applist { - lassign $vfs_app configured_vfs appname - if {$configured_vfs ne $vfs} { + lassign $vfs_app configured_vfs appname kit_type + if {$configured_vfs ne $vfstail} { continue } if {$appname eq ""} { set appname $vfsname } + if {$kit_type eq ""} { + set kit_type "kit" ;#review - we should probably move to defaulting to zip (zipkit) + } if {$rtname eq "-"} { set targetkit $appname.kit } else { @@ -1055,68 +1272,167 @@ foreach vfs $vfs_folders { } } lappend exe_names_seen $targetkit - lappend targetkits $targetkit + lappend targetkits [list $targetkit $kit_type] } } - puts stdout " vfs: [file tail $vfs] runtime: $rtname targetkits: $targetkits" - foreach targetkit $targetkits { + puts stdout " vfs: $vfstail runtime: $rtname targetkits: $targetkits" + foreach targetkit_info $targetkits { + puts stdout " processing targetkit: $targetkit_info" + lassign $targetkit_info targetkit kit_type # -- ---------- set vfs_installer [punkcheck::installtrack new $installername $basedir/.punkcheck] $vfs_installer set_source_target $sourcefolder $buildfolder set vfs_event [$vfs_installer start_event {-make-step build_vfs}] $vfs_event targetset_init INSTALL $buildfolder/$targetkit + set relvfs [punkcheck::lib::path_relative $basedir $sourcefolder/vfs/$vfstail] + if {![dict exists $path_cksum_cache $relvfs]} { + #e.g ../vfs/punk87.vfs {cksum xxxx cksum_all_opts {-cksum_content 1 ... -cksum_algorithm sha1}} + dict set path_cksum_cache {*}[punk::mix::base::lib::get_relativecksum_from_base $basedir $sourcefolder/vfs/$vfstail] + } $vfs_event targetset_cksumcache_set $path_cksum_cache ;#cached cksum entries for .vfs folder - $vfs_event targetset_addsource $sourcefolder/$vfs + $vfs_event targetset_addsource $sourcefolder/vfs/_vfscommon + $vfs_event targetset_addsource $sourcefolder/vfs/$vfstail if {$rtname ne "-"} { - $vfs_event targetset_addsource $buildfolder/build_$runtime_fullname + $vfs_event targetset_addsource $buildfolder/build_$runtime_fullname ;#working copy of runtime executable } # -- ---------- + set rtmountpoint //zipfs:/rtmounts/$runtime_fullname + set changed_unchanged [$vfs_event targetset_source_changes] + set vfs_or_runtime_changed [expr {[llength [dict get $changed_unchanged changed]] || [llength [$vfs_event get_targets_exist]] < [llength [$vfs_event get_targets]]}] - if {[llength [dict get $changed_unchanged changed]] || [llength [$vfs_event get_targets_exist]] < [llength [$vfs_event get_targets]]} { + if {$vfs_or_runtime_changed} { #source .vfs folder has changes $vfs_event targetset_started # -- --- --- --- --- --- - #use if {[file exists $buildfolder/$vfsname.new]} { puts stderr "deleting existing $buildfolder/$vfsname.new" file delete $buildfolder/$vfsname.new } - puts stdout "building $vfsname.new with sdx.. vfsdir:$vfs cwd: [pwd]" + package require fileutil + package require fileutil::traverse + package require control + set targetvfs $buildfolder/buildvfs_$targetkit.vfs + file delete -force $targetvfs + + switch -- $kit_type { + zip { + #for a zipkit - we need to extract the existing vfs from the runtime + #zipfs mkimg replaces the entire zipped vfs in the runtime - so we need the original data to be part of our targetvfs. + puts stdout "building $vfsname.new with zipfs vfsdir:$vfstail cwd: [pwd]" + file mkdir $targetvfs + + if {![file exists $rtmountpoint]} { + if {[catch { + tcl::zipfs::mount $buildfolder/build_$runtime_fullname rtmounts/$runtime_fullname + } errM]} { + tcl::zipfs::mount rtmounts/$runtime_fullname $buildfolder/build_$runtime_fullname + } + } + + if {[file exists $rtmountpoint]} { + merge_over $rtmountpoint $targetvfs + } + + merge_over $sourcefolder/vfs/_vfscommon $targetvfs - if {[catch { - if {$rtname ne "-"} { - exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$runtime_fullname -verbose - } else { - exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose } - } result]} { - if {$rtname ne "-"} { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" - } else { - set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose failed with msg: $result" + kit { + #for a kit, we don't need to extract the existing vfs from the runtime. + # - the sdx merge process can merge our .vfs folder with the existing contents. + puts stdout "building $vfsname.new with sdx.. vfsdir:$vfstail cwd: [pwd]" + if {[file exists $sourcefolder/vfs/_vfscommon]} { + file copy $sourcefolder/vfs/_vfscommon $targetvfs + } else { + file mkdir $targetvfs + } + } + } + + + set sourcevfs [file join $sourcefolder vfs $vfstail] + merge_over $sourcevfs $targetvfs + + #set wrapvfs $sourcefolder/$vfs + set wrapvfs $targetvfs + switch -- $kit_type { + zip { + if {[catch { + if {[dict exists $runtime_caps $rtname]} { + if {[dict get $runtime_caps $rtname exitcode] == 0} { + if {![dict get $runtime_caps $rtname has_zipfs]} { + error "runtime $rtname doesn't have zipfs capability" + } + } else { + #could be runtime for another platform + puts stderr "RUNTIME capabilities unknown. Unsure if zip supported. trying anyway.." + } + } + #note - as at 2024-08 - there is some discussion about the interface to mkimg - it is considered unstable (may change to -option value syntax) + puts stderr "calling: tcl::zipfs::mkimg $buildfolder/$vfsname.new $wrapvfs $wrapvfs \"\" $buildfolder/build_$runtime_fullname" + tcl::zipfs::mkimg $buildfolder/$vfsname.new $wrapvfs $wrapvfs "" $buildfolder/build_$runtime_fullname + } result ]} { + set failmsg "zipfs mkimg failed with msg: $result" + puts stderr "tcl::zipfs::mkimg $targetkit failed" + lappend failed_kits [list kit $targetkit reason $failmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished tcl::zipfs::mkimg" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } + } + kit { + if {!$has_sdx} { + puts stderr "no sdx available to wrap $targetkit" + lappend failed_kits [list kit $targetkit reason "sdx_executable_unavailable"] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose + } + } result]} { + if {$rtname ne "-"} { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -runtime $buildfolder/build_$runtime_fullname -verbose failed with msg: $result" + } else { + set sdxmsg "sdx wrap $buildfolder/$vfsname.new -vfs $wrapvfs -verbose failed with msg: $result" + } + puts stderr "sdx wrap $targetkit failed" + lappend failed_kits [list kit $targetkit reason $sdxmsg] + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } + } } - puts stderr "sdx wrap $targetkit failed" - lappend failed_kits [list kit $targetkit reason $sdxmsg] - $vfs_event targetset_end FAILED - $vfs_event destroy - $vfs_installer destroy - continue - } else { - puts stdout "ok - finished sdx" - set separator [string repeat = 40] - puts stdout $separator - puts stdout $result - puts stdout $separator } + if {![file exists $buildfolder/$vfsname.new]} { - puts stderr "|err> make.tcl build didn't seem to produce output at $sourcefolder/_build/$vfsname.new" - lappend failed_kits [list kit $targetkit reason "build failed to produce output at $sourcefolder/_build/$vfsname.new"] + puts stderr "|err> make.tcl build didn't seem to produce output at $buildfolder/$vfsname.new" + lappend failed_kits [list kit $targetkit reason "build failed to produce output at $buildfolder/$vfsname.new"] $vfs_event targetset_end FAILED $vfs_event destroy $vfs_installer destroy @@ -1135,9 +1451,10 @@ foreach vfs $vfs_folders { if {![catch { exec $pscmd | grep $targetkit } still_running]} { - - puts stdout "found $targetkit instances still running\n" + set still_running_lines [split [string trim $still_running] \n] + puts stdout "found ([llength $still_running_lines]) $targetkit instances still running\n" set count_killed 0 + set num_to_kill [llength $still_running_lines] foreach ln [split $still_running \n] { puts stdout " $ln" @@ -1170,9 +1487,6 @@ foreach vfs $vfs_folders { #review - *no running instance* works with windows taskkill - "*No such process*" works with kill -9 on FreeBSD and linux - other platforms? if {![string match "*no running instance*" $errMsg] && ![string match "*No such process*" $errMsg]} { lappend failed_kits [list kit $targetkit reason "could not kill running process for $targetkit (using '$killcmd')"] - $vfs_event targetset_end FAILED - $vfs_event destroy - $vfs_installer destroy continue } } else { @@ -1180,10 +1494,15 @@ foreach vfs $vfs_folders { incr count_killed } } - if {$count_killed > 0} { - puts stderr "\nKilled $count_killed processes. Waiting a short time before attempting to delete executable" - after 1000 + if {$count_killed < $num_to_kill} { + $vfs_event targetset_end FAILED + $vfs_event destroy + $vfs_installer destroy + continue } + + puts stderr "\nKilled $count_killed processes. Waiting a short time before attempting to delete executable" + after 1000 } else { puts stderr "Ok.. no running '$targetkit' processes found" } @@ -1203,28 +1522,42 @@ foreach vfs $vfs_folders { } } - #WINDOWS filesystem 'tunneling' (file replacement within 15secs) could cause targetkit to copy ctime & shortname metadata from previous file! + #WINDOWS filesystem 'tunnelling' (file replacement within 15secs) could cause targetkit to copy ctime & shortname metadata from previous file! #This is probably harmless - but worth being aware of. file rename $buildfolder/$vfsname.new $buildfolder/$targetkit # -- --- --- --- --- --- $vfs_event targetset_end OK + } else { + set skipped_vfs_build 1 + puts stderr "." + puts stdout "Skipping build for vfs $vfstail with runtime $rtname - no change detected" + lappend skipped_kits [list kit $targetkit reason "no change detected"] + $vfs_event targetset_end SKIPPED + } + $vfs_event destroy + $vfs_installer destroy - after 200 - set deployment_folder [file dirname $sourcefolder]/bin - file mkdir $deployment_folder + after 200 + set deployment_folder [file dirname $sourcefolder]/bin + file mkdir $deployment_folder - # -- ---------- - set bin_installer [punkcheck::installtrack new "make.tcl" $deployment_folder/.punkcheck] - $bin_installer set_source_target $buildfolder $deployment_folder - set bin_event [$bin_installer start_event {-make-step final_kit_install}] - $bin_event targetset_init INSTALL $deployment_folder/$targetkit - #todo - move final deployment step outside of the build vfs loop? (final deployment can fail and then isn't rerun even though _build and deployed versions differ, unless .vfs modified again) - #set last_completion [$bin_event targetset_last_complete] - - $bin_event targetset_addsource $buildfolder/$targetkit - $bin_event targetset_started - # -- ---------- + # -- ---------- + set bin_installer [punkcheck::installtrack new "make.tcl" $deployment_folder/.punkcheck] + $bin_installer set_source_target $buildfolder $deployment_folder + set bin_event [$bin_installer start_event {-make-step final_kit_install}] + $bin_event targetset_init INSTALL $deployment_folder/$targetkit + #todo - move final deployment step outside of the build vfs loop? (final deployment can fail and then isn't rerun even though _build and deployed versions differ, unless .vfs modified again) + #set last_completion [$bin_event targetset_last_complete] + + $bin_event targetset_addsource $deployment_folder/$targetkit ;#add target as a source of metadata for change detection + $bin_event targetset_addsource $buildfolder/$targetkit + $bin_event targetset_started + # -- ---------- + + set changed_unchanged [$bin_event targetset_source_changes] + set built_or_installed_kit_changed [expr {[llength [dict get $changed_unchanged changed]] || [llength [$bin_event get_targets_exist]] < [llength [$bin_event get_targets]]}] + if {$built_or_installed_kit_changed} { if {[file exists $deployment_folder/$targetkit]} { puts stderr "deleting existing deployed at $deployment_folder/$targetkit" @@ -1250,19 +1583,17 @@ foreach vfs $vfs_folders { # -- ---------- $bin_event targetset_end OK # -- ---------- - $bin_event destroy - $bin_installer destroy - } else { - set skipped_vfs_build 1 + set skipped_kit_install 1 puts stderr "." - puts stdout "Skipping build for vfs $vfs with runtime $rtname - no change detected" - $vfs_event targetset_end SKIPPED + puts stdout "Skipping kit install for $targetkit with vfs $vfstail runtime $rtname - no change detected" + lappend skipped_kit_installs [list kit $targetkit reason "no change detected"] + $bin_event targetset_end SKIPPED } + $bin_event destroy + $bin_installer destroy - $vfs_event destroy - $vfs_installer destroy } ;#end foreach targetkit } ;#end foreach rtname in runtimes @@ -1278,8 +1609,21 @@ if {[llength $failed_kits]} { punk::lib::showdict -channel stderr -roottype list $failed_kits */@*.@* #puts stderr [join $failed_kits \n] } - -puts stdout "done" +set had_kits [expr {[llength $installed_kits] || [llength $failed_kits] || [llength $skipped_kits]}] +if {$had_kits} { + puts stdout " module builds and kit/zipkit builds processed (vfs config: src/runtime/mapvfs.config)" + puts stdout " - use 'make.tcl modules' to build modules without scanning/building the vfs folders into executable kits/zipkits" + puts stdout " - use 'make.tcl vfscommonupdate' to copy built modules into the base vfs folder" + puts stdout " Note that without the vfscommonupdate step, 'make.tcl project' will include any manual changes in the *custom* vfs folders but" + puts stdout " without the latest built modules." + puts stdout " calling 'builtexename(.exe) dev' will allow testing of built modules before they are put into the kits/zipkits via 'vfscommonupdate' then 'project'" +} else { + puts stdout " module builds processed" + puts stdout "" + puts stdout " If kit/zipkit based executables required - create src/vfs/.vfs folders containing lib,modules,modules_tcl9 etc folders" + puts stdout " Also ensure appropriate executables exist in src/runtime along with src/runtime/mapvfs.config" +} +puts stdout "-done-" exit 0 diff --git a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules/include_modules.config b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules/include_modules.config index a5d7f24f..87e4533a 100644 --- a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules/include_modules.config +++ b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules/include_modules.config @@ -1,19 +1,19 @@ - - -#e.g -#set local_modules [list\ -# c:/repo/jn/tclmodules/gridplus/modules gridplus\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ -#] - - - -set local_modules [list\ -] - -set fossil_modules [dict create\ -] - -set git_modules [dict create\ + + +#e.g +#set local_modules [list\ +# c:/repo/jn/tclmodules/gridplus/modules gridplus\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ +#] + + + +set local_modules [list\ +] + +set fossil_modules [dict create\ +] + +set git_modules [dict create\ ] \ No newline at end of file diff --git a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl8/include_modules.config b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl8/include_modules.config index a5d7f24f..87e4533a 100644 --- a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl8/include_modules.config +++ b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl8/include_modules.config @@ -1,19 +1,19 @@ - - -#e.g -#set local_modules [list\ -# c:/repo/jn/tclmodules/gridplus/modules gridplus\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ -#] - - - -set local_modules [list\ -] - -set fossil_modules [dict create\ -] - -set git_modules [dict create\ + + +#e.g +#set local_modules [list\ +# c:/repo/jn/tclmodules/gridplus/modules gridplus\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ +#] + + + +set local_modules [list\ +] + +set fossil_modules [dict create\ +] + +set git_modules [dict create\ ] \ No newline at end of file diff --git a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl9/include_modules.config b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl9/include_modules.config index a5d7f24f..87e4533a 100644 --- a/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl9/include_modules.config +++ b/src/project_layouts/vendor/punk/project-0.1/src/vendormodules_tcl9/include_modules.config @@ -1,19 +1,19 @@ - - -#e.g -#set local_modules [list\ -# c:/repo/jn/tclmodules/gridplus/modules gridplus\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist\ -# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ -#] - - - -set local_modules [list\ -] - -set fossil_modules [dict create\ -] - -set git_modules [dict create\ + + +#e.g +#set local_modules [list\ +# c:/repo/jn/tclmodules/gridplus/modules gridplus\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist\ +# c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ +#] + + + +set local_modules [list\ +] + +set fossil_modules [dict create\ +] + +set git_modules [dict create\ ] \ No newline at end of file diff --git a/src/vendormodules/fauxlink-0.1.0.tm b/src/vendormodules/fauxlink-0.1.0.tm new file mode 100644 index 00000000..a7b1e264 --- /dev/null +++ b/src/vendormodules/fauxlink-0.1.0.tm @@ -0,0 +1,336 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application fauxlink 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin fauxlink_module_fauxlink 0 0.1.0] +#[copyright "2024"] +#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[require fauxlink] +#[keywords module] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of fauxlink +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by fauxlink +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::class { + #*** !doctools + #[subsection {Namespace fauxlink::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink { + namespace export {[a-z]*}; # Convention: export all lowercase + + #todo - enforce utf-8 + + #literal unicode chars supported by modern filesystems - leave as is - REVIEW + + + variable encode_map + variable decode_map + #most filesystems don't allow NULL - map to empty string + + #Make sure % is not in encode_map + set encode_map [dict create\ + \x00 ""\ + { } %20\ + \t %09\ + + %2B\ + # %23\ + * %2A\ + ? %3F\ + \\ %5C\ + / %2F\ + | %7C\ + : %3A\ + {;} %3B\ + {"} %22\ + < %3C\ + > %3E\ + ] + #must_encode + # + # * ? \ / | : ; " < > \t + # also NUL to empty string + # also ctrl chars 01 to 1F (1..31) + for {set i 1} {$i < 32} {incr i} { + set ch [format %c $i] + set enc "%[format %02X $i]" + set enc_lower [string tolower $enc] + dict set encode_map $ch $enc + dict set decode_map $enc $ch + dict set decode_map $enc_lower $ch + } + + variable must_encode + set must_encode [dict keys $encode_map] + + + set decode_map [dict create\ + %20 { }\ + %21 "!"\ + %22 {"}\ + %23 "#"\ + %24 "$"\ + %25 "%"\ + %26 "&"\ + %27 "'"\ + %28 "("\ + %29 ")"\ + %2A "*"\ + %2a "*"\ + %2B "+"\ + %2b "+"\ + %2C ","\ + %2c ","\ + %2F "/"\ + %2f "/"\ + %3A ":"\ + %3a ":"\ + %3B {;}\ + %3b {;}\ + %3D "="\ + %3C "<"\ + %3c "<"\ + %3d "="\ + %3E ">"\ + %3e ">"\ + %3F "?"\ + %3f "?"\ + %40 "@"\ + %5B "\["\ + %5b "\["\ + %5C "\\"\ + %5c "\\"\ + %5D "\]"\ + %5d "\]"\ + %5E "^"\ + %5e "^"\ + %60 "`"\ + %7B "{"\ + %7b "{"\ + %7C "|"\ + %7c "|"\ + %7D "}"\ + %7d "}"\ + %7E "~"\ + %7e "~"\ + ] + + + + #*** !doctools + #[subsection {Namespace fauxlink}] + #[para] Core API functions for fauxlink + #[list_begin definitions] + + proc resolve {link} { + variable decode_map + variable encode_map + variable must_encode + set ftail [file tail $link] + if {[file extension $ftail] ne ".fauxlink"} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fauxlink" + } + set linkspec [file rootname $ftail] + # - any # or + within the target path or name should have been uri encoded as %23 and %2b + if {[tcl::string::first # $linkspec] < 0} { + error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" + } + #only the 1st 2 parts of split on # are significant. + #if there are more # chars present - the subsequent parts are effectively a comment + + #check namepec already has required chars encoded + lassign [split $linkspec #] namespec targetspec + #puts stderr "-->namespec $namespec" + set nametest [tcl::string::map $encode_map $namespec] + #puts stderr "-->nametest $nametest" + #nothing should be changed - if there are unencoded chars that must be encoded it is an error + if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { + set err "fauxlink::resolve invalid chars in name part (section prior to first #)" + set idx 0 + foreach ch [split $namespec ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + append err " char $idx should be encoded as $enc" \n + } + incr idx + } + error $err + } + set name [tcl::string::map $decode_map $namespec] + #puts stderr "-->name: $name" + + set targetsegment [split $targetspec +] + #check each + delimited part of targetspec already has required chars encoded + set s 0 ;#segment index + set result_segments [list] + foreach segment $targetsegment { + set targettest [tcl::string::map $encode_map $segment] + if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { + set err "fauxlink::resolve invalid chars in targetpath (section following first #)" + set idx 0 + foreach ch [split $segment ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + append err " segment $s char $idx should be encoded as $enc" \n + } + incr idx + } + error $err + } + lappend result_segments [tcl::string::map $decode_map $segment] + incr s + } + set targetpath [join $result_segments /] + + return [dict create name $name targetpath $targetpath] + } + + proc link_as {name target} { + + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace fauxlink::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval fauxlink::system { + #*** !doctools + #[subsection {Namespace fauxlink::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide fauxlink [namespace eval fauxlink { + variable pkg fauxlink + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vendormodules/include_modules.config b/src/vendormodules/include_modules.config index f131f187..e388ada1 100644 --- a/src/vendormodules/include_modules.config +++ b/src/vendormodules/include_modules.config @@ -8,7 +8,8 @@ set local_modules [list\ c:/repo/jn/tclmodules/tablelist/modules tablelist\ c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ c:/repo/jn/tclmodules/tomlish/modules tomlish\ - c:/repo/jn/tclmodules/tomlish/modules test::tomlish\ + c:/repo/jn/tclmodules/overtype/modules overtype\ + c:/repo/jn/tclmodules/fauxlink/modules fauxlink\ ] set fossil_modules [dict create\ diff --git a/src/vendormodules/overtype-1.6.5.tm b/src/vendormodules/overtype-1.6.5.tm index 492341d6..3c200d26 100644 --- a/src/vendormodules/overtype-1.6.5.tm +++ b/src/vendormodules/overtype-1.6.5.tm @@ -439,7 +439,8 @@ tcl::namespace::eval overtype { if {[llength $lflines]} { lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] } - set inputchunks $lflines[unset lflines] + #set inputchunks $lflines[unset lflines] + set inputchunks [lindex [list $lflines [unset lflines]] 0] } } @@ -2115,6 +2116,7 @@ tcl::namespace::eval overtype { if {[llength $undercols]< $opt_width} { set diff [expr {$opt_width- [llength $undercols]}] if {$diff > 0} { + #set undercols [list {*}$undercols {*}[lrepeat $diff "\u0000"]] ;#2024 - much slower lappend undercols {*}[lrepeat $diff "\u0000"] lappend understacks {*}[lrepeat $diff $cs] lappend understacks_gx {*}[lrepeat $diff $gs] @@ -3889,7 +3891,19 @@ tcl::namespace::eval overtype { #OSC 4 - set colour palette #can take multiple params #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ - set params [tcl::string::range $code_content 1 end] + set params [tcl::string::range $code_content 2 end] ;#strip 4 and first semicolon + set cmap [dict create] + foreach {cnum spec} [split $params {;}] { + if {$cnum >= 0 and $cnum <= 255} { + #todo - parse spec from names like 'red' to RGB + #todo - accept rgb:ab/cd/ef as well as rgb:/a/b/c (as alias for aa/bb/cc) + #also - what about rgb:abcd/defg/hijk and 12-bit abc/def/ghi ? + dict set cmap $cnum $spec + } else { + #todo - log + puts stderr "overtype::renderline OSC 4 set colour palette - bad color number: $cnum must be from 0 to 255. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" diff --git a/src/vendormodules/test/tomlish-1.1.1.tm b/src/vendormodules/test/tomlish-1.1.1.tm index ae68fad9..8405fae7 100644 Binary files a/src/vendormodules/test/tomlish-1.1.1.tm and b/src/vendormodules/test/tomlish-1.1.1.tm differ diff --git a/src/vendormodules/tomlish-1.1.1.tm b/src/vendormodules/tomlish-1.1.1.tm index 617f7f9e..3e13e75d 100644 --- a/src/vendormodules/tomlish-1.1.1.tm +++ b/src/vendormodules/tomlish-1.1.1.tm @@ -19,12 +19,20 @@ #*** !doctools #[manpage_begin tomlish_module_tomlish 0 1.1.1] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] #[require tomlish] -#[keywords module] +#[keywords module parsing toml configuration] #[description] -#[para] - +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -71,17 +79,41 @@ package require logger namespace eval tomlish { namespace export {[a-z]*}; # Convention: export all lowercase variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + #ARRAY is analogous to a Tcl list #TABLE is analogous to a Tcl dict #WS = inline whitespace - #KEYVAL = bare key and value - #QKEYVAL = quoted key and value + #KEY = bare key and value + #QKEY = double quoted key and value + #SQKEY = single quoted key and value #ITABLE = inline table (*can* be anonymous table) # inline table values immediately create a table with the opening brace # inline tables are fully defined between their braces, as are dotted-key subtables defined within # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained - set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL SQKEYVAL STRING STRINGPART MULTISTRING STRINGLIT MULTISTRINGLIT INT FLOAT BOOL DATETIME] + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) set min_int -9223372036854775808 ;#-2^63 @@ -114,10 +146,13 @@ namespace eval tomlish { #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error ">>>_get_keyval_value doesn't seem to be a properly structured { = } list" + } foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - STRINGLIT - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -127,10 +162,10 @@ namespace eval tomlish { } } if {!$found_value} { - error "Failed to find value element in KEYVAL. '$keyval_element'" + error "Failed to find value element in KEY. '$keyval_element'" } if {$found_value > 1} { - error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + error "Found multiple value elements in KEY, expected exactly one. '$keyval_element'" } switch -exact -- $type { @@ -141,16 +176,28 @@ namespace eval tomlish { STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - STRINGLIT { + LITERAL - LITERALPART { #REVIEW - set result [list type STRINGLIT value $value] + set result [list type $type value $value] + } + TABLE { + #invalid? + error "_get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::get_dict [list $found_sub]] } - TABLE - ITABLE - ARRAY - MULTISTRING { - #jmn2024 - added ITABLE - review + ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! set result [list type $type value [::tomlish::get_dict [list $found_sub]]] } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } default { error "Unexpected value type '$type' found in keyval '$keyval_element'" } @@ -158,6 +205,48 @@ namespace eval tomlish { return $result } + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "_get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } #get_dict is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. # get_dict is primarily for reading toml data. @@ -193,10 +282,10 @@ namespace eval tomlish { set tag [lindex $item 0] #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL - SQKEYVAL { + KEY - QKEY - SQKEY { log::debug "--> processing $tag: $item" set key [lindex $item 1] - if {$tag eq "QKEYVAL"} { + if {$tag eq "QKEY"} { set key [::tomlish::utils::unescape_string $key] } #!todo - normalize key. (may be quoted/doublequoted) @@ -209,6 +298,43 @@ namespace eval tomlish { set keyval_dict [_get_keyval_value $item] dict set datastructure $key $keyval_dict } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "get_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } TABLE { set tablename [lindex $item 1] set tablename [::tomlish::utils::tablename_trim $tablename] @@ -223,21 +349,20 @@ namespace eval tomlish { #toml spec rule - all segments mst be non-empty #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - set key_hierarchy [list] - set key_hierarchy_raw [list] + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] foreach rawseg $name_segments { set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes - set c1 [::string index $rawseg 0] - set c2 [::string index $rawseg end] + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes are processed within it. - set seg [::string range $rawseg 1 end-1] + set seg [tcl::string::range $rawseg 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. - set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] - #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] } else { set seg $rawseg } @@ -246,15 +371,16 @@ namespace eval tomlish { #if {$rawseg eq ""} { # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" #} - lappend key_hierarchy $seg - lappend key_hierarchy_raw $rawseg + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg - if {[dict exists $datastructure {*}$key_hierarchy]} { + if {[dict exists $datastructure {*}$table_key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, - # but not if it was defined as a keyval/qkeyval + # but not if it was defined as a key/qkey/skey ? - set testkey [join $key_hierarchy_raw .] - set testkey_length [llength $key_hierarchy_raw] + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { set found_testkey 1 @@ -270,11 +396,12 @@ namespace eval tomlish { # instead compare {a b.c d} with {a b c d} # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #dots within table segments might seem like an 'edge case' - # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" if {$testkey eq $seen_match} { set found_testkey 1 } @@ -282,21 +409,26 @@ namespace eval tomlish { } if {$found_testkey == 0} { - #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg } } } #ensure empty tables are still represented in the datastructure - set subkey [list] - foreach k $key_hierarchy { - lappend subkey $k - if {![dict exists $datastructure {*}$subkey]} { - dict set datastructure {*}$subkey [list] + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] } else { - tomlish::log::notice "get_dict datastructure at subkey $subkey already had data: [dict get $datastructure {*}$subkey]" + tomlish::log::notice "get_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" } } @@ -304,26 +436,54 @@ namespace eval tomlish { lappend tablenames_seen $tablename - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL - SQKEYVAL { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? set keyval_key [lindex $element 1] - if {$type eq "QKEYVAL"} { + if {$type eq "QKEY"} { set keyval_key [::tomlish::utils::unescape_string $keyval_key] } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] - dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -336,19 +496,36 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL - SQKEYVAL { - set keyval_key [lindex $element 1] - set keyval_dict [_get_keyval_value $element] - if {$type eq "QKEYVAL"} { - set keyval_key [::tomlish::utils::unescape_string $keyval_key] + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." } - dict set datastructure $keyval_key $keyval_dict + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -369,11 +546,11 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - STRINGLIT { + LITERAL { set value [lindex $element 1] lappend datastructure [list type $type value $value] } - ITABLE - TABLE - ARRAY - MULTISTRING { + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } @@ -386,6 +563,49 @@ namespace eval tomlish { } } } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } MULTISTRING { #triple dquoted string log::debug "--> processing multistring: $item" @@ -395,7 +615,14 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "doulbe quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } @@ -556,8 +783,8 @@ namespace eval tomlish::encode { proc float {f} { #convert any non-lower case variants of special values to lowercase for Toml - if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { - return [list FLOAT [string tolower $f]] + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] @@ -587,33 +814,45 @@ namespace eval tomlish::encode { } } + + #TODO #Take tablename followed by - # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types proc table {name args} { set pairs [list] foreach t $args { - if {[llength $t] == 3} { - if {[lindex $t 0] ne "KEYVAL"} { - error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" } - lappend pairs $t + lappend pairs [list KEY $keystr = $valuepart] } elseif {[llength $t] == 2} { #!todo - type heuristics lassign $t n v - lappend pairs [list KEYVAL $n [list STRING $v]] + lappend pairs [list KEY $n = [list STRING $v]] } else { - error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" foreach part [lrange $item 1 end] { - append stringlit [::tomlish::encode::tomlish [list $part] $nextcontext] + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$stringlit''' + append toml '''$literal''' } INT - BOOL - @@ -804,6 +1079,7 @@ namespace eval tomlish::decode { # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. # For this reason, we also do absolutely no line-ending transformations based on platform. # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { #*** !doctools #[call [fun toml] [arg s]] @@ -862,11 +1138,12 @@ namespace eval tomlish::decode { set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. - set state "key-space" - ::tomlish::parse::spacestack push {space key-space} + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) set linenum 1 - + + set ::tomlish::parse::state_list [list] try { while {$r} { set r [::tomlish::parse::tok $s] @@ -878,31 +1155,162 @@ namespace eval tomlish::decode { #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" #puts "-->tok: $tok tokenType='$tokenType'" set prevstate $state - ##### - set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "STATE TRANSITION tokenType: '$tokenType' tok: $tok triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below - set state $nextstate - if {$state eq "err"} { - error "State error for tokenType: $tokenType tok: $tok - aborting parse. [tomlish::parse::report_line]" + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) } - - if {$last_space_action eq "pop"} { + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } newline { incr linenum lappend v($nest) [list NEWLINE $tok] } tablename { #note: a tablename only 'pops' if we are greater than zero - error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" } tablearrayname { #!review - tablearrayname different to tablename regarding push/pop? #note: a tablename only 'pops' if we are greater than zero - error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" } endarray { #nothing to do here. @@ -912,32 +1320,74 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - ::tomlish::log::debug "endinlinetable for last_space_action pop" + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } endmultiquote { - ::tomlish::log::debug "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" } default { - error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } - set parentlevel [expr {$nest -1}] - lappend v($parentlevel) [set v($nest)] + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + incr nest -1 } elseif {$last_space_action eq "push"} { + set prevnest $nest incr nest 1 set v($nest) [list] # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } barekey { - set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" } quotedkey - itablequotedkey { - set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + set v($nest) [list QKEY $tok] ;#$tok is the keyname } - squotedkey - itablesquotedkey { - set v($nest) [list SQKEYVAL $tok] ;#$tok is the keyname + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! @@ -952,7 +1402,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -970,17 +1420,31 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - ::tomlish::log::debug "push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL } default { - error "push trigger tokenType '$tokenType' not yet implemented" + error "---- push trigger tokenType '$tokenType' not yet implemented" } } } else { #no space level change switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } starttablename { #$tok is triggered by the opening bracket and sends nothing to output } @@ -988,62 +1452,69 @@ namespace eval tomlish::decode { #$tok is triggered by the double opening brackets and sends nothing to output } tablename - tablenamearray { - error "did not expect 'tablename/tablearrayname' without space level change" + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" #set v($nest) [list TABLE $tok] } endtablename - endtablearrayname { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startinlinetable without space level change" + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } startquote { - switch -exact -- $nextstate { - string { + switch -exact -- $newstate { + string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quotedkey { + quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "quotedkey" set tok "" } - itablequotedkey { + itable-quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablequotedkey" set tok "" } default { - error "startquote switch case not implemented for nextstate: $nextstate" + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startsquote { - switch -exact -- $nextstate { - stringlit { + switch -exact -- $newstate { + literal-state { set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringlit" + ::tomlish::parse::set_tokenType "literal" set tok "" } - squotedkey { + squoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "squotedkey" set tok "" } - itablesquotedkey { + itable-squoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablesquotedkey" set tok "" } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } default { - error "startsquote switch case not implemented for nextstate: $nextstate" + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startmultiquote { #review - puts stderr "no space level change - got startmultiquote" + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" set next_tokenType_known 1 ::tomlish::parse::set_tokenType "stringpart" set tok "" @@ -1060,25 +1531,45 @@ namespace eval tomlish::decode { set tok "" } string { - lappend v($nest) [list STRING $tok] + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes } - stringpart { - lappend v($nest) [list STRINGPART $tok] + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } } multistring { #review lappend v($nest) [list MULTISTRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST } itablequotedkey { } - stringlit { - lappend v($nest) [list STRINGLIT $tok] - } - untyped-value { + untyped_value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -1089,9 +1580,10 @@ namespace eval tomlish::decode { } elseif {[::tomlish::utils::is_datetime $tok]} { set tag DATETIME } else { - error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } lappend v($nest) [list $tag $tok] + } comment { #puts stdout "----- comment token returned '$tok'------" @@ -1122,18 +1614,18 @@ namespace eval tomlish::decode { #!todo - check previous tokens are complete/valid? } default { - error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" } } } if {!$next_tokenType_known} { - ::tomlish::log::notice "tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } - if {$state eq "end"} { + if {$state eq "end-state"} { break } @@ -1197,7 +1689,7 @@ namespace eval tomlish::utils { #basic generic quote matching for single and double quotes #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes proc tok_in_quotedpart {tok} { - set sLen [::string length $tok] + set sLen [tcl::string::length $tok] set quote_type "" set had_slash 0 for {set i 0} {$i < $sLen} {incr i} { @@ -1208,7 +1700,7 @@ namespace eval tomlish::utils { #leave slash_mode because even if current char is slash - it is escaped set had_slash 0 } else { - set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] switch -- $ctype { dq { set quote_type dq @@ -1227,7 +1719,7 @@ namespace eval tomlish::utils { #leave slash_mode because even if current char is slash - it is escaped set had_slash 0 } else { - set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] switch -- $ctype { dq { if {$quote_type eq "dq"} { @@ -1253,7 +1745,7 @@ namespace eval tomlish::utils { proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set sLen [::string length $tablename] + set sLen [tcl::string::length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) @@ -1261,12 +1753,12 @@ namespace eval tomlish::utils { for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { - set lastChar [::string index $tablename [expr {$i - 1}]] + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $tablename $i] + set c [tcl::string::index $tablename $i] if {$c eq "."} { switch -exact -- $mode { @@ -1294,7 +1786,7 @@ namespace eval tomlish::utils { } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { - if {[::string trim $seg] ne ""} { + if {[tcl::string::trim $seg] ne ""} { #we don't allow a quote in the middle of a bare key error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" } @@ -1357,13 +1849,13 @@ namespace eval tomlish::utils { if {$normalize} { lappend segments $seg } else { - lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong } } litquoted { - set trimmed_seg [::string trim $seg] - if {[::string index $trimmed_seg end] ne "\'"} { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" } lappend segments $seg @@ -1381,7 +1873,7 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg " \t"] + set trimmed [tcl::string::trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { @@ -1400,7 +1892,7 @@ namespace eval tomlish::utils { # is a valid 'unicode scalar value' # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} - if {[::string match {\\u*} $slashu]} { + if {[tcl::string::match {\\u*} $slashu]} { set exp {^\\u([0-9a-fA-F]{4}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %4x dec] != 1} { @@ -1412,7 +1904,7 @@ namespace eval tomlish::utils { } else { return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] } - } elseif {[::string match {\\U*} $slashu]} { + } elseif {[tcl::string::match {\\U*} $slashu]} { set exp {^\\U([0-9a-fA-F]{8}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %8x dec] != 1} { @@ -1446,7 +1938,7 @@ namespace eval tomlish::utils { set buffer4 "" ;#buffer for 4 hex characters following a \u set buffer8 "" ;#buffer for 8 hex characters following a \u - set sLen [::string length $str] + set sLen [tcl::string::length $str] #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc set slash_active 0 @@ -1458,12 +1950,12 @@ namespace eval tomlish::utils { set i 0 for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [::string index $str [expr {$i - 1}]] + set lastChar [tcl::string::index $str [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $str $i] + set c [tcl::string::index $str $i] ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { @@ -1486,10 +1978,10 @@ namespace eval tomlish::utils { } } else { if {$unicode4_active} { - if {[::string length $buffer4] < 4} { + if {[tcl::string::length $buffer4] < 4} { append buffer4 $c } - if {[::string length $buffer4] == 4} { + if {[tcl::string::length $buffer4] == 4} { #we have a \uHHHH to test set unicode4_active 0 set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] @@ -1500,10 +1992,10 @@ namespace eval tomlish::utils { } } } elseif {$unicode8_active} { - if {[::string length $buffer8] < 8} { + if {[tcl::string::length $buffer8] < 8} { append buffer8 $c } - if {[::string length $buffer8] == 8} { + if {[tcl::string::length $buffer8] == 8} { #we have a \UHHHHHHHH to test set unicode8_active 0 set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] @@ -1515,7 +2007,7 @@ namespace eval tomlish::utils { } } elseif {$slash_active} { set slash_active 0 - set ctest [string map {{"} dq} $c] + set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { set e "\\\"" @@ -1559,15 +2051,15 @@ namespace eval tomlish::utils { } proc normalize_key {rawkey} { - set c1 [::string index $rawkey 0] - set c2 [::string index $rawkey end] + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes allowed within it. - set key [::string range $rawkey 1 end-1] + set key [tcl::string::range $rawkey 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. # - set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only set key [::tomlish::utils::unescape_string $keydata] #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. } else { @@ -1603,11 +2095,11 @@ namespace eval tomlish::utils { #check if str is valid for use as a toml bare key proc is_barekey {str} { - if {[::string length $str] == 0} { + if {[tcl::string::length $str] == 0} { return 0 } else { set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters match the regexp return 1 } else { @@ -1618,7 +2110,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'integer'. proc int_validchars1 {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 } else { @@ -1627,7 +2119,7 @@ namespace eval tomlish::utils { } #add support for hex,octal,binary 0x.. 0o.. 0b... proc int_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { return 1 } else { @@ -1644,22 +2136,22 @@ namespace eval tomlish::utils { # --------------------------------------- #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } # --------------------------------------- #check +,- only occur in the first position. - if {[::string last - $str] > 0} { + if {[tcl::string::last - $str] > 0} { return 0 } - if {[::string last + $str] > 0} { + if {[tcl::string::last + $str] > 0} { return 0 } - set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) - if {![string is integer -strict $numeric_value]} { + if {![tcl::string::is integer -strict $numeric_value]} { return 0 } #!todo - check bounds only based on some config value @@ -1681,7 +2173,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'float'. proc float_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { @@ -1701,7 +2193,7 @@ namespace eval tomlish::utils { return 1 } - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) #Toml spec also disallows leading zeros in the exponent part @@ -1709,12 +2201,12 @@ namespace eval tomlish::utils { #for now we will allow leading zeros in exponents #!todo - configure 'strict' option to disallow? #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] set r {([0-9])*} regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E set z {([0])*} regexp $z $intpart leadingzeros - if {[::string length $leadingzeros] > 1} { + if {[tcl::string::length $leadingzeros] > 1} { return 0 } #for floats, +,- may occur in multiple places @@ -1722,9 +2214,9 @@ namespace eval tomlish::utils { #!todo - check bounds ? #strip underscores for tcl double check - set check [::string map {_ ""} $str] + set check [tcl::string::map {_ ""} $str] #string is double accepts inf nan +NaN etc. - if {![::string is double $check]} { + if {![tcl::string::is double $check]} { return 0 } @@ -1737,7 +2229,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'datetime'. proc datetime_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { return 1 } else { @@ -1745,6 +2237,7 @@ namespace eval tomlish::utils { } } + #review - we proc is_datetime {str} { #e.g 1979-05-27 #e.g 1979-05-27T00:32:00Z @@ -1752,8 +2245,17 @@ namespace eval tomlish::utils { #e.g 1979-05-27 00:32:00+10:00 #e.g 1979-05-27 00:32:00.999999-07:00 + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart @@ -1784,58 +2286,248 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state - # states: - # key-space, curly-space, array-space - # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... # # notes: - # key-space i # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack - # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax # #stateMatrix defines for each state, actions to take for each possible token. - #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. - #dual-element actions are a push command and the name of the space to push on the stack. - # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases - #test variable stateMatrix set stateMatrix [dict create] + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-ok"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ - key-space { - whitespace "key-space"\ - newline "key-space"\ - bom "key-space"\ - barekey {pushspace "keyval-space"}\ - startquote "quotedkey"\ - startsquote "squotedkey"\ - comment "key-space"\ - starttablename "tablename"\ - starttablearrayname "tablearrayname"\ - startmultiquote "err"\ - endquote "err"\ - comma "err"\ - eof "end"\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value dict set stateMatrix\ - curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - itablesquotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - startsquote "itablesquotedkey"\ - comma "curly-space"\ - comment "err"\ - eof "err"\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ } + #dottedkeyend "POPSPACE" + + + #REVIEW #toml spec looks like heading towards allowing newlines within inline tables @@ -1844,227 +2536,184 @@ namespace eval tomlish::parse { curly-syntax {\ whitespace "curly-syntax"\ newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - comment "curly-space"\ - eof "err"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ } - #review comment "err" vs comment "curly-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? - dict set stateMatrix\ - value-expected {\ - whitespace "value-expected"\ - untyped-value "samespace"\ - startquote "string"\ - startsquote "stringlit"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - startarray {pushspace array-space}\ - comment "err"\ - comma "err"\ - newline "err"\ - eof "err"\ - } + #JMN REVIEW dict set stateMatrix\ array-space {\ whitespace "array-space"\ newline "array-space"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startmultiquote {pushspace multistring-space}\ - startinlinetable {pushspace curly-space}\ - startquote "string"\ - startsquote "stringlit"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ comma "array-space"\ comment "array-space"\ - eof "err"\ + eof "err-state-array-space-got-eof"\ } dict set stateMatrix\ array-syntax {\ whitespace "array-syntax"\ newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startmultiquote {pushspace multistring-space}\ - startquote "string"\ - startsquote "stringlit"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ comma "array-space"\ - comment "err"\ + comment "err-state"\ } + + + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space dict set stateMatrix\ - itablekeyval-syntax {\ - whitespace "itablekeyval-syntax"\ - endquote "itablekeyval-syntax"\ - endsquote "itablekeyval-syntax"\ - newline "err"\ - equal "value-expected"\ - eof "err"\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ } dict set stateMatrix\ - itablekeyval-space {} - - dict set stateMatrix\ - itablevaltail {\ - whitespace "itablevaltail"\ - endinlinetable "popspace"\ - comma "popspace"\ - newline "itablevaltail"\ - comment "itablevaltail"\ - eof "err"\ + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ } + # endsquote {PUSHSPACE "keyval-space"} + dict set stateMatrix\ - itablequotedkey {\ + string-state {\ whitespace "NA"\ - itablequotedkey {pushspace "itablekeyval-space"}\ - newline "err"\ - endquote "itablekeyval-syntax"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ } dict set stateMatrix\ - itablesquotedkey {\ + literal-state {\ whitespace "NA"\ - itablesquotedkey {pushspace "itablekeyval-space"}\ - newline "err"\ - endsquote "itablekeyval-syntax"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ } + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } dict set stateMatrix\ - keyval-space {\ - } - dict set stateMatrix\ - keyval-syntax {\ - whitespace "keyval-syntax"\ - endquote "keyval-syntax"\ - endsquote "keyval-syntax"\ - equal "value-expected"\ - comma "err"\ - newline "err"\ - eof "err"\ + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ } - dict set stateMatrix\ - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - - #quotedkey & squotedkey need to pushspace from self to keyval-space + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ - quotedkey {\ - whitespace "NA"\ - quotedkey {pushspace "keyval-space"}\ - newline "err"\ - endquote "keyval-syntax"\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - squotedkey {\ - whitespace "NA"\ - squotedkey {pushspace "keyval-space"}\ - newline "err"\ - endsquote "keyval-syntax"\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ } - + + dict set stateMatrix\ - string {\ + tablename-state {\ whitespace "NA"\ - string "string"\ - endquote "samespace"\ - newline "err"\ - eof "err"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ } dict set stateMatrix\ - stringlit {\ + tablearrayname-state {\ whitespace "NA"\ - stringlit "stringlit"\ - endsquote "samespace"\ - newline "err"\ - eof "err"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ } - dict set stateMatrix\ - stringpart {\ - continuation "samespace"\ - endmultiquote "popspace"\ - eof "err"\ + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ } - #dict set stateMatrix\ - # multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} dict set stateMatrix\ - multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "popspace"\ - eof "err"\ + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ } - #multistring "multistring-space" - - - dict set stateMatrix\ - tablename {\ - whitespace "NA"\ - tablename {zeropoppushspace key-space}\ - tablename2 {pushspace key-space}\ - endtablename "tablenametail"\ - comma "err"\ - newline "err"\ + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} } - dict set stateMatrix\ - tablearrayname {\ - whitespace "NA"\ - tablearrayname {zeropoppushspace key-space}\ - tablearrayname2 {pushspace key-space}\ - endtablearray "tablearraynametail"\ - comma "err"\ - newline "err"\ - } - - dict set stateMatrix\ - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - dict set stateMatrix\ - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - dict set stateMatrix\ - end {} - - #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push - variable stateMatrix_orig { - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} - value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} - array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} - array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - keyval-space {} - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - end {} } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] tcl::dict::for {s transitions} $stateMatrix { tcl::dict::for {token transition_to} $transitions { - set action [lindex $transition_to 0] - switch -exact -- $action { - pushspace - zeropoppushspace { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { if {$token ni $push_trigger_tokens} { lappend push_trigger_tokens $token } @@ -2072,84 +2721,139 @@ namespace eval tomlish::parse { } } } - puts stdout "push_trigger_tokens: $push_trigger_tokens" - #!todo - hard code once stateMatrix finalised? + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. - #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail + array-space array-syntax } - variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - key-space tablename + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail - variable state_list + variable state_list ;#reset every tomlish::decode::toml namespace export tomlish toml namespace ensemble create - proc getNextState {tokentype currentstate} { + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state variable nest variable v - + + set prevstate $currentstate + + variable spacePopTransitions variable spacePushTransitions + variable spaceSameTransitions + variable last_space_action "none" variable last_space_type "none" variable state_list set result "" + set starttok "" if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { - popspace { + POPSPACE { spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + set last_space_action "pop" set last_space_type $type - - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" } else { - set next $target + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } } set result $next } - samespace { - #note the same data as popspace (spacePopTransitions) is used here. - set parent [spacestack peek] - ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" - lassign $parent type target - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" } else { - set next $target + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } } set result $next } zeropoppushspace { if {$nest > 0} { - #pop back down to the root level (key-space) + #pop back down to the root level (table-space) spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + set last_space_action "pop" set last_space_type $type @@ -2162,36 +2866,72 @@ namespace eval tomlish::parse { } #re-entrancy - #set next [list pushspace [lindex $transition_to 1]] + #set next [list PUSHSPACE [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" - set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] } - pushspace { - set target [lindex $transition_to 1] - spacestack push [list space $target] + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + set last_space_action "push" set last_space_type "space" - - #puts $::tomlish::parse::spacePushTransitions - if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { - set next [dict get $::tomlish::parse::spacePushTransitions $target] - ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" } else { - set next $target + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } } set result $next } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } default { - set result $transition_to + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word } } } else { - set result "nostate-err" - + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" } - lappend state_list $result - return $result + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] } proc report_line {{line ""}} { @@ -2215,7 +2955,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL SQKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2250,7 +2990,7 @@ namespace eval tomlish::parse { proc _shortcircuit_startquotesequence {} { variable tok variable i - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { set_tokenType "startquote" incr i -1 @@ -2264,8 +3004,81 @@ namespace eval tomlish::parse { } } + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + #returns 0 or 1 #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + proc tok {s} { variable nest variable v @@ -2274,14 +3087,12 @@ namespace eval tomlish::parse { variable type ;#character type variable state ;#FSM - set resultlist [list] variable tokenType variable tokenType_list variable endToken - set sLen [::string length $s] variable lastChar @@ -2291,33 +3102,41 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof variable token_waiting - if {[dict size $token_waiting]} { - set tokenType [dict get $token_waiting type] - set tok [dict get $token_waiting tok] - dict unset token_waiting type - dict unset token_waiting tok + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] return 1 } #------------------------------ + set resultlist [list] + set sLen [tcl::string::length $s] + set slash_active 0 set quote 0 set c "" set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [string index $s [expr {$i - 1}]] + set lastChar [tcl::string::index $s [expr {$i - 1}]] } else { set lastChar "" } - set c [string index $s $i] + set c [tcl::string::index $s $i] + set cindex $i tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" - incr i ;#must incr here because we do'returns'inside the loop + incr i ;#must incr here because we do returns inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote @@ -2327,12 +3146,16 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -2343,14 +3166,13 @@ namespace eval tomlish::parse { whitespace { # hash marks end of whitespace token #do a return for the whitespace, set token_waiting - #dict set token_waiting type comment - #dict set token_waiting tok "" + #set_token_waiting type comment value "" complete 1 incr i -1 ;#leave comment for next run return 1 } - untyped-value { + untyped_value { #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? - #we will accept a comment marker as an immediate terminator of the untyped-value. + #we will accept a comment marker as an immediate terminator of the untyped_value. incr i -1 return 1 } @@ -2363,24 +3185,24 @@ namespace eval tomlish::parse { append tok $c } default { - #quotedkey, itablequotedkey, string,stringlit, multistring + #quotedkey, itablequotedkey, string,literal, multistring append tok $c } } } else { switch -- $state { - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType stringpart - } else { - set_tokenType stringlit ;#review - } + multistring-space { + set_tokenType stringpart set tok "" if {$had_slash} { append tok "\\" } append tok "$dquotes#" } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } default { #start of token if we're not in a token set_tokenType comment @@ -2395,88 +3217,86 @@ namespace eval tomlish::parse { set multi_dquote "" set had_slash $slash_active set slash_active 0 - #test jmn2024 - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - startsquotesequence { - incr i -[tcl::string::length $tok] - set_tokenType "startsquote" - return 1 - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart - stringlit { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename - starttablearrayname { - #*bare* tablename can only contain letters,digits underscores - error "Invalid tablename first character \{ [tomlish::parse::report_line]" - } - tablename - tablearrayname { - #valid in quoted parts - append tok $c - } - comment { - if {$had_slash} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - switch -exact -- $state { - value-expected { - #switch last key to tablename?? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - array-space - array-syntax { - #nested anonymous inline table - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType stringpart - } else { - set_tokenType stringlit ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" - } - append tok "$dquotes\{" - } - default { - error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } @@ -2487,115 +3307,127 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey - comment { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename - tablename { - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endinlinetable - dict set token_waiting tok "" - return 1 - } - starttablearrayname - tablearrayname { - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } - itablevaltail { - #review - error "right-curly in itablevaltail" - } - default { - #end any other token - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - #error "unexpected tablename problem" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - curly-syntax - curly-space { - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - array-syntax - array-space { - #invalid - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - itablevaltail { - set_tokenType "endinlinetable" - set tok "" - #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 - incr i -1 - return 1 - } - itablekeyval-syntax { - error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" - } else { - set_tokenType "stringlit" ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" - } - append tok "$dquotes\}" - } - default { - #JMN2024b keytail? - error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" } } + } } lb { @@ -2605,101 +3437,104 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - startsquotesequence { - incr i -[tcl::string::length $tok] - set_tokenType "startsquote" - return 1 - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename { - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - tablename { - #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token - if {$had_slash} { - #resultant tablename may be invalid - but leave for datastructure loading stage to catch - append tok "\\[" - } else { - if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { - #invalid at this point - state machine should disallow table -> starttablearrayname - dict set token_waiting type starttablearrayname - dict set token_waiting tok "" - return 1 - } else { - #we appear to still be in single or double quoted section - append tok "\[" - } - } - } - comment { - if {$had_slash} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - set_tokenType "startarray" - set tok "\[" - return 1 - } - key-space { - #table name - #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray - #note that a starttablearrayname token may contain whitespace between the brackets - # e.g \[ \[ - set_tokenType "starttablename" - set tok "" ;#there is no output into the tomlish list for this token - } - array-space - array-syntax { - #nested array? - set_tokenType "startarray" - set tok "\[" - return 1 - #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 } else { - set_tokenType "stringlit" ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" + #we appear to still be in single or double quoted section + append tok "\[" } - append tok "$dquotes\[" } - default { - error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } + } } rb { #right square bracket @@ -2708,31 +3543,35 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit - squotedkey - itablesquotedkey { + literal - literalpart - squotedkey - itablesquotedkey { append tok $c } string - quotedkey - itablequotedkey { if {$had_slash} {append tok "\\"} append tok $c } - comment { - if {$had_slash} {append tok "\\"} - append tok $c - } stringpart { if {$had_slash} {append tok "\\"} append tok $dquotes$c } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } whitespace { if {$state eq "multistring-space"} { #???? @@ -2752,8 +3591,7 @@ namespace eval tomlish::parse { append tok "\\]" } else { if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { - dict set token_waiting type endtablename - dict set token_waiting tok "" + set_token_waiting type endtablename value "" complete 1 startindex $cindex return 1 } else { #we appear to still be in single or double quoted section @@ -2765,8 +3603,7 @@ namespace eval tomlish::parse { #todo? if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex return 1 } default { @@ -2783,13 +3620,13 @@ namespace eval tomlish::parse { set tok "\]" return 1 } - key-space { + table-space { #invalid - but allow parser statemachine to report it. ? set_tokenType "endarray" set tok "\]" return 1 } - tablename { + tablename-state { #e.g [] - empty tablename - allowed or not? #empty tablename/tablearrayname ? #error "unexpected tablename problem" @@ -2798,7 +3635,7 @@ namespace eval tomlish::parse { set tok "" ;#no output into the tomlish list for this token return 1 } - tablearrayname { + tablearrayname-state { error "unexpected tablearrayname problem" set_tokenType "endtablearray" set tok "" ;#no output into the tomlish list for this token @@ -2809,18 +3646,18 @@ namespace eval tomlish::parse { set tok "\]" return 1 } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" - } else { - set_tokenType "stringlit" ;#review - } + multistring-space { + set_tokenType "stringpart" set tok "" if {$had_slash} { append tok "\\" } append tok "$dquotes\]" } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } default { error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } @@ -2831,12 +3668,16 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! #backslash - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -2850,8 +3691,8 @@ namespace eval tomlish::parse { error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" } } - stringlit - squotedkey - itablesquotedkey { - #never need to set slash_active true when in stringlit + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens append tok "\\" set slash_active 0 } @@ -2884,28 +3725,36 @@ namespace eval tomlish::parse { set slash_active 1 } } - barekey { - error "Unexpected backslash during barekey. [tomlish::parse::report_line]" + barekey { + error "Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" } default { - error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" - } - } - } else { - if {$state eq "multistring-space"} { - if {$slash_active} { - set_tokenType "stringpart" - set tok "\\\\" - set slash_active 0 - } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } - set slash_active 1 + error "tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" } - } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" } } } @@ -2913,8 +3762,35 @@ namespace eval tomlish::parse { #single quote set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } whitespace { #end whitespace incr i -1 ;#reprocess sq @@ -2923,7 +3799,8 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { + #temp token creatable only during value-expected or array-space switch -- [tcl::string::length $tok] { 1 { append tok $c @@ -2931,24 +3808,36 @@ namespace eval tomlish::parse { 2 { #switch? append tok $c - set_tokenType startmultisquote + set_tokenType triple_squote return 1 } default { - error "unexpected token length [tcl::string::length $tok] in 'startsquotesequence'" + error "unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } - stringlit { + literal { #slash_active always false - #terminate the stringlit - dict set token_waiting type endsquote - dict set token_waiting tok "'" + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex return 1 } - squotedkey - itablesquotedkey { - dict set token_waiting type endsquote - dict set token_waiting tok "'" + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 return 1 } starttablename - starttablearrayname { @@ -2966,33 +3855,54 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { value-expected - array-space { - #todo - multilitstring startsquotesequence? - set_tokenType "startsquotesequence" + set_tokenType "_start_squote_sequence" set tok "'" } - key-space { - set_tokenType "startsquote" - set tok $c + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" return 1 } - curly-space { - set_tokenType "startsquote" - set tok $c + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" return 1 } - tablename - tablearrayname { - #first char in tablename/tablearrayname state - set_tokenType $state ;#token name matches state name for tablename/tablearrayname + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname append tok "'" } - stringlit { - tomlish::log::debug "sq during stringlit state with no tokentype - empty stringlit?" - set_tokenType stringlit + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal incr -1 return 1 } multistring-space { - + error "unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey } default { error "unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -3006,10 +3916,14 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { append tok $c } elseif {$toklen == 2} { @@ -3021,7 +3935,7 @@ namespace eval tomlish::parse { error "unexpected token length $toklen in 'startquotesequence'" } } - startsquotesequence { + _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { 1 { @@ -3035,11 +3949,11 @@ namespace eval tomlish::parse { return 1 } default { - error "unexpected startsquotesequence length $toklen" + error "unexpected _start_squote_sequence length $toklen" } } } - stringlit { + literal - literalpart { append tok $c } string { @@ -3047,8 +3961,7 @@ namespace eval tomlish::parse { append tok "\\" $c } else { #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3060,8 +3973,7 @@ namespace eval tomlish::parse { #incr i -1 if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] set multi_dquote "" return 1 } else { @@ -3079,8 +3991,7 @@ namespace eval tomlish::parse { } else { switch -- [tcl::string::length $multi_dquote] { 2 { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] set multi_dquote "" return 1 } @@ -3095,21 +4006,23 @@ namespace eval tomlish::parse { } } } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} } default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" + set_token_waiting type startquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3123,8 +4036,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3147,7 +4059,7 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - value-expected - array-space { + keyval-value-expected - value-expected - array-space { #!? start looking for possible multistartquote #set_tokenType startquote #set tok $c @@ -3164,12 +4076,11 @@ namespace eval tomlish::parse { set multi_dquote "" } else { if {$multi_dquote eq "\"\""} { - tomlish::log::debug "---> endmultiquote" + tomlish::log::debug "- tokloop char dq ---> endmultiquote" set_tokenType "endmultiquote" set tok "\"\"\"" return 1 - #dict set token_waiting type endmultiquote - #dict set token_waiting tok "\"\"\"" + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 #set multi_dquote "" #return 1 } else { @@ -3177,18 +4088,30 @@ namespace eval tomlish::parse { } } } - key-space { + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { set_tokenType "startquote" set tok $c return 1 } - curly-space { + itable-space { set_tokenType "startquote" set tok $c return 1 } - tablename - tablearrayname { - set_tokenType $state + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin set tok $c } default { @@ -3203,17 +4126,21 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit - squotedkey { + literal - literalpart - squotedkey { #assertion had_slash 0, multi_dquote "" append tok $c } @@ -3227,19 +4154,18 @@ namespace eval tomlish::parse { append tok $dquotes$c } whitespace { - if {$state in {multistring-space multiliteral-space}} { + if {$state eq "multistring-space"} { set backlen [expr {[tcl::string::length $dquotes] + 1}] incr i -$backlen return 1 } else { - dict set token_waiting type equal - dict set token_waiting tok = + set_token_waiting type equal value = complete 1 startindex $cindex return 1 } } barekey { - dict set token_waiting type equal - dict set token_waiting tok = + #set_token_waiting type equal value = complete 1 + incr i -1 return 1 } starttablename - starttablearrayname { @@ -3263,7 +4189,15 @@ namespace eval tomlish::parse { } append tok ${dquotes}= } - + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } default { set_tokenType "equal" set tok = @@ -3279,19 +4213,30 @@ namespace eval tomlish::parse { # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { + literal { append tok $c } + literalpart { + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warning "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } stringpart { append tok $dquotes$c } @@ -3321,23 +4266,36 @@ namespace eval tomlish::parse { set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { - #review - append tok $c + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } newline { + #review #this lf is the trailing part of a crlf - append tok lf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } stringpart { @@ -3348,13 +4306,11 @@ namespace eval tomlish::parse { } else { if {$had_slash} { #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - dict set token_waiting type continuation - dict set token_waiting tok \\ + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } @@ -3370,8 +4326,7 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } @@ -3396,6 +4351,12 @@ namespace eval tomlish::parse { return 1 } } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } default { #ignore slash? error? set_tokenType "newline" @@ -3421,12 +4382,22 @@ namespace eval tomlish::parse { set multi_dquote "" set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3439,28 +4410,27 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} append tok $c } - stringlit - squotedkey - itablesquotedkey { - #assert had_slash always 0, multi_dquote "" - append tok $c - } stringpart { + #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } whitespace { if {$state eq "multistring-space"} { set backlen [expr {[tcl::string::length $dquotes] + 1}] incr i -$backlen return 1 } else { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 startindex $cindex return 1 } } default { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 startindex $cindex if {$had_slash} {append tok "\\"} return 1 } @@ -3475,7 +4445,7 @@ namespace eval tomlish::parse { } multiliteral-space { #assert had_slash 0, multi_dquote "" - set_tokenType "stringlit" + set_tokenType "literalpart" set tok "," } default { @@ -3491,17 +4461,27 @@ namespace eval tomlish::parse { set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - comment - untyped-value { + comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } @@ -3509,24 +4489,31 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} append tok $c } - stringlit - squotedkey - itablesquotedkey { - #assert had_slash always 0, multi_dquote "" - append tok $c - } stringpart { if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } whitespace { - if {$state eq "multistring-space"} { - set backchars [expr {[tcl::string::length $dquotes] + 1}] - if {$had_slash} { - incr backchars 1 + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" } - incr i -$backchars - return 1 - } else { - error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" } } starttablename - starttablearrayname { @@ -3541,15 +4528,12 @@ namespace eval tomlish::parse { #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review #x is the tablename y is the key - switch_tokenType tablenamepluskey - incr i -1 - - #error "barekey period unimplemented" + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 } default { - error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" - #dict set token_waiting type period - #dict set token_waiting tok "." + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 #return 1 } } @@ -3563,12 +4547,16 @@ namespace eval tomlish::parse { } multiliteral-space { set_tokenType "literalpart" - set tok "" - if {$had_slash} {append tok "\\"} - append tok "$dquotes." + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 } default { - set_tokenType "untyped-value" + set_tokenType "untyped_value" set tok "." } } @@ -3578,14 +4566,24 @@ namespace eval tomlish::parse { " " { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3593,15 +4591,13 @@ namespace eval tomlish::parse { barekey { #todo had_slash - emit token or error #whitespace is a terminator for bare keys - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - untyped-value { + untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } @@ -3615,7 +4611,21 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } append tok $c } - stringlit - squotedkey - itablesquotedkey { + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { append tok $c } whitespace { @@ -3633,20 +4643,6 @@ namespace eval tomlish::parse { append tok $c } } - stringpart { - if {$had_slash} { - #REVIEW - #emit the stringpart - go back to the slash - incr i -2 - return 1 - } else { - #split into STRINGPART aaa WS " " - #keeping WS separate allows easier processing of CONT stripping - append tok $dquotes - incr i -1 - return 1 - } - } starttablename - starttablearrayname { incr i -1 return 1 @@ -3664,15 +4660,19 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - if {$had_slash} { - set tok "\\$c" - } else { - set tok $c - } + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c } multistring-space { if {$had_slash} { @@ -3691,6 +4691,10 @@ namespace eval tomlish::parse { append tok $c } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { if {$had_slash} { error "unexpected backslash [tomlish::parse::report_line]" @@ -3705,43 +4709,58 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { - append tok $c - } barekey { #whitespace is a terminator for bare keys incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 return 1 } untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - quotedkey - itablequotedkey { + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { append tok $c } string - comment - whitespace { append tok $c } stringpart { - append tok $dquotes$c + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c } starttablename - starttablearrayname { incr i -1 @@ -3762,10 +4781,14 @@ namespace eval tomlish::parse { set slash_active 0 } switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname set tok $c } multistring-space { @@ -3786,6 +4809,10 @@ namespace eval tomlish::parse { } } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "whitespace" append tok $c @@ -3795,41 +4822,58 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - if {[string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - startsquotesequence { + _start_squote_sequence { + #assert - tok will be one or two squotes only incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { + literal - literalpart { append tok $c } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" return 1 } } - } else { - set_tokenType "bom" - set tok "\uFEFF" - return 1 } } default { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { - puts stdout "HERE $c" + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3866,7 +4910,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/stringlit/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3874,7 +4918,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - key-space - curly-space - curly-syntax { + table-space - itable-space { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -3883,6 +4927,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } multistring-space { set_tokenType "stringpart" if {$had_slash} { @@ -3892,17 +4945,25 @@ namespace eval tomlish::parse { set tok $dquotes$c } } - tablename { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { set_tokenType "tablename" set tok $c } - tablearrayname { + tablearrayname-state { set_tokenType "tablearrayname" set tok $c } + dottedkey-space { + set_tokenType barekey + set tok $c + } default { - tomlish::log::debug "char '$c' setting to untyped-value while state:$state" - set_tokenType "untyped-value" + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" set tok $c } } @@ -3913,14 +4974,14 @@ namespace eval tomlish::parse { } #run out of characters (eof) - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { #check for invalid ending tokens - #if {$state eq "err"} { + #if {$state eq "err-state"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { startquotesequence { - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { #invalid #eof with open string @@ -3930,34 +4991,31 @@ namespace eval tomlish::parse { #we ended in a double quote, not actually a startquoteseqence - effectively an empty string switch_tokenType "startquote" incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" + #set_token_waiting type string value "" complete 1 return 1 } } - startsquotesequence { - set toklen [::string length $tok] + _start_squote_sequence { + set toklen [tcl::string::length $tok] switch -- $toklen { 1 { - #invalid eof with open stringlit + #invalid eof with open literal error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - dict set token_waiting type endsquote - dict set token_waiting tok "'" - ### - set_tokenType "stringlit" + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" set tok "" return 1 } } } } - dict set token_waiting type "eof" - dict set token_waiting tok "eof" + set_token_waiting type eof value eof complete 1 startindex $i ;#review return 1 } else { - ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } @@ -4082,7 +5140,7 @@ if {$argc > 0} { puts stderr "argc: $argc args: $argv" if {($argc == 1)} { - if {[::string tolower $argv] in {help -help h -h}} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { puts stdout "Usage: -app where appname one of:[tomlish::appnames]" exit 0 } else { diff --git a/src/vfs/_vfscommon/modules/calculator_test-0.1.tm b/src/vfs/_vfscommon/modules/calculator_test-0.1.tm new file mode 100644 index 00000000..c0eb027e --- /dev/null +++ b/src/vfs/_vfscommon/modules/calculator_test-0.1.tm @@ -0,0 +1,540 @@ +## -*- tcl -*- +## +## OO-based Tcl/PARAM implementation of the parsing +## expression grammar +## +## calculator grammar +## +## Generated from file calctest.tcl +## for user jnoble +## +# # ## ### ##### ######## ############# ##################### +## Requirements + +package require Tcl 8.5 9 +package require TclOO +package require pt::rde::oo ; # OO-based implementation of the + # PARAM virtual machine + # underlying the Tcl/PARAM code + # used below. + +# # ## ### ##### ######## ############# ##################### +## + +oo::class create calculator_test { + # # ## ### ##### ######## ############# + ## Public API + + superclass pt::rde::oo ; # TODO - Define this class. + # Or can we inherit from a snit + # class too ? + + method parse {channel} { + my reset $channel + my MAIN ; # Entrypoint for the generated code. + return [my complete] + } + + method parset {text} { + my reset {} + my data $text + my MAIN ; # Entrypoint for the generated code. + return [my complete] + } + + # # ## ### ###### ######## ############# + ## BEGIN of GENERATED CODE. DO NOT EDIT. + + # + # Grammar Start Expression + # + + method MAIN {} { + my sym_Expression + return + } + + # + # value Symbol 'AddOp' + # + + method sym_AddOp {} { + # [+-] + + my si:void_symbol_start AddOp + my si:next_class +- + my si:void_leaf_symbol_end AddOp + return + } + + # + # value Symbol 'Digit' + # + + method sym_Digit {} { + # [0123456789] + + my si:void_symbol_start Digit + my si:next_class 0123456789 + my si:void_leaf_symbol_end Digit + return + } + + # + # value Symbol 'Expression' + # + + method sym_Expression {} { + # x + # (Term) + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:value_symbol_start Expression + my sequence_18 + my si:reduce_symbol_end Expression + return + } + + method sequence_18 {} { + # x + # (Term) + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:value_state_push + my sym_Term + my si:valuevalue_part + my kleene_16 + my si:value_state_merge + return + } + + method kleene_16 {} { + # * + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + while {1} { + my si:void2_state_push + my sequence_14 + my si:kleene_close + } + return + } + + method sequence_14 {} { + # x + # * + # '' + # (AddOp) + # * + # '' + # (Term) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_AddOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Term + my si:value_state_merge + return + } + + method kleene_8 {} { + # * + # '' + + while {1} { + my si:void2_state_push + my si:next_char \40 + my si:kleene_close + } + return + } + + # + # value Symbol 'Factor' + # + + method sym_Factor {} { + # x + # (Fragment) + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:value_symbol_start Factor + my sequence_32 + my si:reduce_symbol_end Factor + return + } + + method sequence_32 {} { + # x + # (Fragment) + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:value_state_push + my sym_Fragment + my si:valuevalue_part + my kleene_30 + my si:value_state_merge + return + } + + method kleene_30 {} { + # * + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + while {1} { + my si:void2_state_push + my sequence_28 + my si:kleene_close + } + return + } + + method sequence_28 {} { + # x + # * + # '' + # (PowOp) + # * + # '' + # (Fragment) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_PowOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Fragment + my si:value_state_merge + return + } + + # + # value Symbol 'Fragment' + # + + method sym_Fragment {} { + # / + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + # (Number) + # (Var) + + my si:value_symbol_start Fragment + my choice_46 + my si:reduce_symbol_end Fragment + return + } + + method choice_46 {} { + # / + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + # (Number) + # (Var) + + my si:value_state_push + my sequence_42 + my si:valuevalue_branch + my sym_Number + my si:valuevalue_branch + my sym_Var + my si:value_state_merge + return + } + + method sequence_42 {} { + # x + # '\(' + # * + # '' + # (Expression) + # * + # '' + # '\)' + + my si:void_state_push + my si:next_char \50 + my si:voidvoid_part + my kleene_8 + my si:voidvalue_part + my sym_Expression + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my si:next_char \51 + my si:value_state_merge + return + } + + # + # value Symbol 'MulOp' + # + + method sym_MulOp {} { + # [*/] + + my si:void_symbol_start MulOp + my si:next_class */ + my si:void_leaf_symbol_end MulOp + return + } + + # + # value Symbol 'Number' + # + + method sym_Number {} { + # x + # ? + # (Sign) + # + + # (Digit) + + my si:value_symbol_start Number + my sequence_57 + my si:reduce_symbol_end Number + return + } + + method sequence_57 {} { + # x + # ? + # (Sign) + # + + # (Digit) + + my si:value_state_push + my optional_52 + my si:valuevalue_part + my poskleene_55 + my si:value_state_merge + return + } + + method optional_52 {} { + # ? + # (Sign) + + my si:void2_state_push + my sym_Sign + my si:void_state_merge_ok + return + } + + method poskleene_55 {} { + # + + # (Digit) + + my i_loc_push + my sym_Digit + my si:kleene_abort + while {1} { + my si:void2_state_push + my sym_Digit + my si:kleene_close + } + return + } + + # + # value Symbol 'PowOp' + # + + method sym_PowOp {} { + # "**" + + my si:void_symbol_start PowOp + my si:next_str ** + my si:void_leaf_symbol_end PowOp + return + } + + # + # value Symbol 'Sign' + # + + method sym_Sign {} { + # [-+] + + my si:void_symbol_start Sign + my si:next_class -+ + my si:void_leaf_symbol_end Sign + return + } + + # + # value Symbol 'Term' + # + + method sym_Term {} { + # x + # (Factor) + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:value_symbol_start Term + my sequence_75 + my si:reduce_symbol_end Term + return + } + + method sequence_75 {} { + # x + # (Factor) + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:value_state_push + my sym_Factor + my si:valuevalue_part + my kleene_73 + my si:value_state_merge + return + } + + method kleene_73 {} { + # * + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + while {1} { + my si:void2_state_push + my sequence_71 + my si:kleene_close + } + return + } + + method sequence_71 {} { + # x + # * + # '' + # (MulOp) + # * + # '' + # (Factor) + + my si:void_state_push + my kleene_8 + my si:voidvalue_part + my sym_MulOp + my si:valuevalue_part + my kleene_8 + my si:valuevalue_part + my sym_Factor + my si:value_state_merge + return + } + + # + # value Symbol 'Var' + # + + method sym_Var {} { + # x + # '$' + # [xyz] + + my si:void_symbol_start Var + my sequence_80 + my si:void_leaf_symbol_end Var + return + } + + method sequence_80 {} { + # x + # '$' + # [xyz] + + my si:void_state_push + my si:next_char $ + my si:voidvoid_part + my si:next_class xyz + my si:void_state_merge + return + } + + ## END of GENERATED CODE. DO NOT EDIT. + # # ## ### ###### ######## ############# +} + +# # ## ### ##### ######## ############# ##################### +## Ready + +package provide calculator_test 0.1 +return diff --git a/src/vfs/_vfscommon/modules/modpodtest-0.1.0.tm b/src/vfs/_vfscommon/modules/modpodtest-0.1.0.tm index f2f62cb7..8f674478 100644 Binary files a/src/vfs/_vfscommon/modules/modpodtest-0.1.0.tm and b/src/vfs/_vfscommon/modules/modpodtest-0.1.0.tm differ diff --git a/src/vfs/_vfscommon/modules/overtype-1.6.5.tm b/src/vfs/_vfscommon/modules/overtype-1.6.5.tm index 492341d6..3c200d26 100644 --- a/src/vfs/_vfscommon/modules/overtype-1.6.5.tm +++ b/src/vfs/_vfscommon/modules/overtype-1.6.5.tm @@ -439,7 +439,8 @@ tcl::namespace::eval overtype { if {[llength $lflines]} { lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] } - set inputchunks $lflines[unset lflines] + #set inputchunks $lflines[unset lflines] + set inputchunks [lindex [list $lflines [unset lflines]] 0] } } @@ -2115,6 +2116,7 @@ tcl::namespace::eval overtype { if {[llength $undercols]< $opt_width} { set diff [expr {$opt_width- [llength $undercols]}] if {$diff > 0} { + #set undercols [list {*}$undercols {*}[lrepeat $diff "\u0000"]] ;#2024 - much slower lappend undercols {*}[lrepeat $diff "\u0000"] lappend understacks {*}[lrepeat $diff $cs] lappend understacks_gx {*}[lrepeat $diff $gs] @@ -3889,7 +3891,19 @@ tcl::namespace::eval overtype { #OSC 4 - set colour palette #can take multiple params #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ - set params [tcl::string::range $code_content 1 end] + set params [tcl::string::range $code_content 2 end] ;#strip 4 and first semicolon + set cmap [dict create] + foreach {cnum spec} [split $params {;}] { + if {$cnum >= 0 and $cnum <= 255} { + #todo - parse spec from names like 'red' to RGB + #todo - accept rgb:ab/cd/ef as well as rgb:/a/b/c (as alias for aa/bb/cc) + #also - what about rgb:abcd/defg/hijk and 12-bit abc/def/ghi ? + dict set cmap $cnum $spec + } else { + #todo - log + puts stderr "overtype::renderline OSC 4 set colour palette - bad color number: $cnum must be from 0 to 255. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" diff --git a/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm index e367ce9e..887888e8 100644 --- a/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm @@ -183,7 +183,9 @@ namespace eval punk::console { variable previous_stty_state_$channel set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] eq ""} { - set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + if {[catch {{*}$sttycmd -g <@$channel} previous_stty_state_$channel]} { + set previous_stty_state_$channel "" + } } exec {*}$sttycmd raw -echo <@$channel @@ -253,13 +255,21 @@ namespace eval punk::console { return "line" } } elseif {$raw_or_line eq "raw"} { - punk::console::enableRaw + if {[catch { + punk::console::enableRaw + } errM]} { + puts stderr "Warning punk::console::enableRaw failed - $errM" + } if {[can_ansi]} { punk::console::enableVirtualTerminal both } } elseif {$raw_or_line eq "line"} { #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) - punk::console::disableRaw + if {[catch { + punk::console::disableRaw + } errM]} { + puts stderr "Warning punk::console::disableRaw failed - $errM" + } if {[can_ansi]} { punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc punk::console::enableVirtualTerminal output ;#display/use ansi codes @@ -290,12 +300,15 @@ namespace eval punk::console { set loadstate [zzzload::pkg_require twapi] #loadstate could also be stuck on loading? - review - zzzload not very ripe - #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. - + #Twapi can be relatively slow to load (on some systems) - can be 1s plus in some cases - and much longer if there are disk performance issues. if {$loadstate ni [list failed]} { + #possibly still 'loading' #review zzzload usage #puts stdout "=========== console loading twapi =============" - zzzload::pkg_wait twapi + set loadstate [zzzload::pkg_wait twapi] ;#can return 'failed' will return version if already loaded or loaded during wait + } + + if {$loadstate ni [list failed]} { package require twapi ;#should be fast once twapi dll loaded in zzzload thread set ::punk::console::has_twapi 1 @@ -523,6 +536,9 @@ namespace eval punk::console { set is_raw 0 return [list stdin [list from $oldmode to $newmode]] } elseif {[set sttycmd [auto_execok stty]] ne ""} { + #stty can return info on windows - but doesn't seem to be able to set anything. + #review - is returned info even valid? + set sttycmd [auto_execok stty] if {[set previous_stty_state_$channel] ne ""} { exec {*}$sttycmd [set previous_stty_state_$channel] diff --git a/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm index 63f32dee..872e4807 100644 --- a/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm @@ -339,6 +339,92 @@ namespace eval punk::lib { set has_twapi [expr {![catch {package require twapi}]}] } + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + proc lswap {lvar a z} { + upvar $lvar l + if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { + #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + set a_index [lindex_resolve $l $a] + set a_msg "" + switch -- $a_index { + -2 { + "$a is greater th + } + -3 { + } + } + error "lswap cannot indices $a and $z $a is out of range" + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} #*** !doctools #[subsection {Namespace punk::lib}] @@ -347,6 +433,7 @@ namespace eval punk::lib { if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to} { lseq $from $to @@ -1009,24 +1096,28 @@ namespace eval punk::lib { } set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds - if {${lower_resolve} == -1} { + if {${lower_resolve} == -2} { + ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max - } elseif {$lower_resolve == -2} { + } elseif {$lower_resolve == -3} { + ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve $dval $b] - if {$upper == -2} { + if {$upper == -3} { + ##x #upper bound is below list range - - if {$lower_resolve >=-1} { + if {$lower_resolve >=-2} { + ##x set upper 0 } else { continue } - } elseif {$upper == -1} { + } elseif {$upper == -2} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists @@ -1669,7 +1760,8 @@ namespace eval punk::lib { error "bad expression '$expression': must be integer?\[+-\]integer?" } } - + + # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bunds on upper vs lower side proc lindex_resolve {list index} { #*** !doctools #[call [fun lindex_resolve] [arg list] [arg index]] @@ -1679,11 +1771,13 @@ namespace eval punk::lib { #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: - #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) - #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] a) -3 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -2 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr #if {![llength $list]} { @@ -1694,9 +1788,9 @@ namespace eval punk::lib { if {[string is integer -strict $index]} { #can match +i -i if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } else { #integer may still have + sign - normalize with expr return [expr {$index}] @@ -1708,14 +1802,14 @@ namespace eval punk::lib { set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { - return -1 + return -2 } } else { - #end + #index is 'end' set index [expr {[llength $list]-1}] if {$index < 0} { - #special case - end with empty list - treat end like a positive number out of bounds - return -1 + #special case - 'end' with empty list - treat end like a positive number out of bounds + return -2 } else { return $index } @@ -1723,7 +1817,7 @@ namespace eval punk::lib { if {$offset == 0} { set index [expr {[llength $list]-1}] if {$index < 0} { - return -1 ;#special case + return -2 ;#special case as above } else { return $index } @@ -1732,7 +1826,7 @@ namespace eval punk::lib { set index [expr {([llength $list]-1) - $offset}] } if {$index < 0} { - return -2 + return -3 } else { return $index } @@ -1753,26 +1847,50 @@ namespace eval punk::lib { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < 0} { - return -2 + return -3 } elseif {$index >= [llength $list]} { - return -1 + return -2 } return $index } } } - proc lindex_resolve2 {list index} { - #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + proc lindex_resolve_basic {list index} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg list] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -1 for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ + # - which #for {set i 0} {$i < [llength $list]} {incr i} { # lappend indices $i #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < 0 || ($index >= [llength $list])} { + #even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } if {[llength $list]} { set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + #if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) } else { set indices [list] } set idx [lindex $indices $index] if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end return -1 } else { return $idx @@ -2334,13 +2452,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var - proc swapnumvars {namea nameb} { - upvar $namea a $nameb b - set a [expr {$a ^ $b}] - set b [expr {$a ^ $b}] - set a [expr {$a ^ $b}] - } #e.g linesort -decreasing $data proc linesort {args} { @@ -2956,7 +3067,7 @@ namespace eval punk::lib { # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { - set number [punk::objclone $unformattednumber] + set number [objclone $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] diff --git a/src/vfs/_vfscommon/modules/punk/mix/base-0.1.tm b/src/vfs/_vfscommon/modules/punk/mix/base-0.1.tm index 932c1db6..806b172e 100644 --- a/src/vfs/_vfscommon/modules/punk/mix/base-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/mix/base-0.1.tm @@ -4,6 +4,7 @@ package provide punk::mix::base [namespace eval punk::mix::base { }] package require punk::path +package require punk::lib ;#format_number etc #base internal plumbing functions namespace eval punk::mix::base { @@ -657,16 +658,38 @@ namespace eval punk::mix::base { #temp emission to stdout.. todo - repl telemetry channel puts stdout "cksum_path: creating temporary tar archive for $path" - puts stdout " at: $archivename .." - tar::create $archivename $target + puts -nonewline stdout " at: $archivename ..." + set tsstart [clock millis] + if {[set tarpath [auto_execok tar]] ne ""} { + #using an external binary is *significantly* faster than tar::create - but comes with some risks + #review - need to check behaviour/flag variances across platforms + #don't use -z flag. On at least some tar versions the zipped file will contain a timestamped subfolder of filename.tar - which ruins the checksum + #also - tar is generally faster without the compression (although this may vary depending on file size and disk speed?) + exec {*}$tarpath -cf $archivename $target ;#{*} needed in case spaces in tarpath + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar -cf done ($ms ms)" + } else { + set tsstart [clock millis] ;#don't include auto_exec search time for tar::create + tar::create $archivename $target + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " tar::create done ($ms ms)" + puts stdout " NOTE: install tar executable for potentially *much* faster directory checksum processing" + } + if {$ftype eq "file"} { - set sizeinfo "(size [file size $target])" + set sizeinfo "(size [punk::lib::format_number [file size $target]] bytes)" } else { - set sizeinfo "(file type $ftype - size unknown)" + set sizeinfo "(file type $ftype - tarred size [punk::lib::format_number [file size $archivename]] bytes)" } - puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set tsstart [clock millis] + puts -nonewline stdout "cksum_path: calculating cksum for $target $sizeinfo ... " set cksum [{*}$cksum_command $archivename] - #puts stdout "cksum_path: cleaning up.. " + set tsend [clock millis] + set ms [expr {$tsend - $tsstart}] + puts stdout " cksum done ($ms ms)" + puts stdout " cksum: $cksum" file delete -force $archivename cd $startdir diff --git a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm index 9debf045..f40ff65d 100644 --- a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm @@ -416,7 +416,11 @@ proc repl::start {inchan args} { variable codethread_cond - tsv::unset codethread_$codethread + if {[catch { + tsv::unset codethread_$codethread + } errM]} { + puts stderr " repl::start temp warning - $errM" + } thread::cancel $codethread thread::cond destroy $codethread_cond ;#race if we destroy cond before child thread has exited - as it can send a -async quit set codethread "" diff --git a/src/vfs/_vfscommon/modules/punk/repo-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/repo-0.1.1.tm index 2cb5fd1d..e056b14a 100644 --- a/src/vfs/_vfscommon/modules/punk/repo-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/repo-0.1.1.tm @@ -468,7 +468,7 @@ namespace eval punk::repo { set path [string trim [string range $ln [string length "MISSING "] end]] dict set pathdict $path "missing" } - "EXTRA * " { + "EXTRA *" { #fossil will explicitly list files in a new folder - as opposed to git which shows just the folder set path [string trim [string range $ln [string length "EXTRA "] end]] dict set pathdict $path "extra" diff --git a/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm b/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm index 4f887fd5..b8f4dec0 100644 --- a/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm +++ b/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm @@ -1658,6 +1658,14 @@ namespace eval shellfilter { return [list $idout $iderr] } + #eg try: set v [list #a b c] + #vs set v {#a b c} + proc list_is_canonical l { + #courtesy DKF via wiki https://wiki.tcl-lang.org/page/BNF+for+Tcl + if {[catch {llength $l}]} {return 0} + string equal $l [list {*}$l] + } + #return a dict keyed on numerical list index showing info about each element # - particularly # 'wouldbrace' to indicate that the item would get braced by Tcl when added to another list diff --git a/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm b/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm index ae68fad9..bad6de44 100644 Binary files a/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm and b/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm differ diff --git a/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm b/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm index 617f7f9e..25ecc083 100644 --- a/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm +++ b/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm @@ -71,17 +71,41 @@ package require logger namespace eval tomlish { namespace export {[a-z]*}; # Convention: export all lowercase variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + #ARRAY is analogous to a Tcl list #TABLE is analogous to a Tcl dict #WS = inline whitespace - #KEYVAL = bare key and value - #QKEYVAL = quoted key and value + #KEY = bare key and value + #QKEY = double quoted key and value + #SQKEY = single quoted key and value #ITABLE = inline table (*can* be anonymous table) # inline table values immediately create a table with the opening brace # inline tables are fully defined between their braces, as are dotted-key subtables defined within # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained - set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL SQKEYVAL STRING STRINGPART MULTISTRING STRINGLIT MULTISTRINGLIT INT FLOAT BOOL DATETIME] + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) set min_int -9223372036854775808 ;#-2^63 @@ -117,7 +141,7 @@ namespace eval tomlish { foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - STRINGLIT - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -127,10 +151,10 @@ namespace eval tomlish { } } if {!$found_value} { - error "Failed to find value element in KEYVAL. '$keyval_element'" + error "Failed to find value element in KEY. '$keyval_element'" } if {$found_value > 1} { - error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + error "Found multiple value elements in KEY, expected exactly one. '$keyval_element'" } switch -exact -- $type { @@ -141,9 +165,9 @@ namespace eval tomlish { STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - STRINGLIT { + LITERAL - LITERALPART { #REVIEW - set result [list type STRINGLIT value $value] + set result [list type $type value $value] } TABLE - ITABLE - ARRAY - MULTISTRING { #jmn2024 - added ITABLE - review @@ -193,10 +217,10 @@ namespace eval tomlish { set tag [lindex $item 0] #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL - SQKEYVAL { + KEY - QKEY - SQKEY { log::debug "--> processing $tag: $item" set key [lindex $item 1] - if {$tag eq "QKEYVAL"} { + if {$tag eq "QKEY"} { set key [::tomlish::utils::unescape_string $key] } #!todo - normalize key. (may be quoted/doublequoted) @@ -209,6 +233,71 @@ namespace eval tomlish { set keyval_dict [_get_keyval_value $item] dict set datastructure $key $keyval_dict } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set compoundkey [lindex $item 1] ;#sequence of KEY|QKEY|SQKEY,DOTSEP,KEY|QKEY|SQKEY with possible WS + #if more than one KEY,and DOTSEP is missing then invalid toml + set name_segments [list] + set key_hierarchy [list] + set key_hierarchy_raw [list] + + set expect_sep 0 + foreach part $compoundkey { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$item'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + if {[llength $key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + } else { + set table_hierarchy [lrange $key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $key_hierarchy_raw 0 end-1] + + } + + #ensure empty tables are still represented in the datastructure + set subkey [list] + foreach k $table_hierarchy { + lappend subkey $k + if {![dict exists $datastructure {*}$subkey]} { + dict set datastructure {*}$subkey [list] + } else { + tomlish::log::notice "get_dict datastructure at subkey $subkey already had data: [dict get $datastructure {*}$subkey]" + } + } + } TABLE { set tablename [lindex $item 1] set tablename [::tomlish::utils::tablename_trim $tablename] @@ -229,15 +318,14 @@ namespace eval tomlish { foreach rawseg $name_segments { set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes - set c1 [::string index $rawseg 0] - set c2 [::string index $rawseg end] + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes are processed within it. - set seg [::string range $rawseg 1 end-1] + set seg [tcl::string::range $rawseg 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. - set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] - #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] } else { set seg $rawseg } @@ -251,9 +339,10 @@ namespace eval tomlish { if {[dict exists $datastructure {*}$key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, - # but not if it was defined as a keyval/qkeyval + # but not if it was defined as a key/qkey/skey ? set testkey [join $key_hierarchy_raw .] + set testkey_length [llength $key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { @@ -270,11 +359,12 @@ namespace eval tomlish { # instead compare {a b.c d} with {a b c d} # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #dots within table segments might seem like an 'edge case' - # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" if {$testkey eq $seen_match} { set found_testkey 1 } @@ -283,7 +373,12 @@ namespace eval tomlish { if {$found_testkey == 0} { #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + set msg "key [join $key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg } } @@ -311,19 +406,25 @@ namespace eval tomlish { foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL - SQKEYVAL { + KEY - QKEY - SQKEY { set keyval_key [lindex $element 1] - if {$type eq "QKEYVAL"} { + if {$type eq "QKEY"} { set keyval_key [::tomlish::utils::unescape_string $keyval_key] } + if {[dict exists $datastructure {*}$key_hierarchy $keyval_key]} { + error "Duplicate key '$key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } set keyval_dict [_get_keyval_value $element] dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict } + DOTTEDKEY { + error "todo dotted key in table context" + } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -336,10 +437,10 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL - SQKEYVAL { + KEY - QKEY - SQKEY { set keyval_key [lindex $element 1] set keyval_dict [_get_keyval_value $element] - if {$type eq "QKEYVAL"} { + if {$type eq "QKEY"} { set keyval_key [::tomlish::utils::unescape_string $keyval_key] } dict set datastructure $keyval_key $keyval_dict @@ -348,7 +449,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" } } } @@ -369,7 +470,7 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - STRINGLIT { + LITERAL { set value [lindex $element 1] lappend datastructure [list type $type value $value] } @@ -395,8 +496,9 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] + #todo - do away with STRINGPART switch -exact -- $type { - STRINGPART { + STRING - STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } CONT { @@ -556,8 +658,8 @@ namespace eval tomlish::encode { proc float {f} { #convert any non-lower case variants of special values to lowercase for Toml - if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { - return [list FLOAT [string tolower $f]] + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] @@ -587,33 +689,45 @@ namespace eval tomlish::encode { } } + + #TODO #Take tablename followed by - # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types proc table {name args} { set pairs [list] foreach t $args { - if {[llength $t] == 3} { - if {[lindex $t 0] ne "KEYVAL"} { - error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" } - lappend pairs $t + lappend pairs [list KEY $keystr = $valuepart] } elseif {[llength $t] == 2} { #!todo - type heuristics lassign $t n v - lappend pairs [list KEYVAL $n [list STRING $v]] + lappend pairs [list KEY $n = [list STRING $v]] } else { - error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERAL that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERAL parts, with the datastructure representation dropping the first one when building the value. + set literal "" foreach part [lrange $item 1 end] { - append stringlit [::tomlish::encode::tomlish [list $part] $nextcontext] + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$stringlit''' + append toml '''$literal''' } INT - BOOL - @@ -862,11 +1011,12 @@ namespace eval tomlish::decode { set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. - set state "key-space" - ::tomlish::parse::spacestack push {space key-space} + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) set linenum 1 - + + set ::tomlish::parse::state_list [list] try { while {$r} { set r [::tomlish::parse::tok $s] @@ -878,31 +1028,155 @@ namespace eval tomlish::decode { #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" #puts "-->tok: $tok tokenType='$tokenType'" set prevstate $state - ##### - set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "STATE TRANSITION tokenType: '$tokenType' tok: $tok triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below - set state $nextstate - if {$state eq "err"} { - error "State error for tokenType: $tokenType tok: $tok - aborting parse. [tomlish::parse::report_line]" + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) } - if {$last_space_action eq "pop"} { + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 + } + ''' { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 + } + } newline { incr linenum lappend v($nest) [list NEWLINE $tok] } tablename { #note: a tablename only 'pops' if we are greater than zero - error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" } tablearrayname { #!review - tablearrayname different to tablename regarding push/pop? #note: a tablename only 'pops' if we are greater than zero - error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" } endarray { #nothing to do here. @@ -912,32 +1186,57 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - ::tomlish::log::debug "endinlinetable for last_space_action pop" + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } endmultiquote { - ::tomlish::log::debug "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" } default { - error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } - set parentlevel [expr {$nest -1}] - lappend v($parentlevel) [set v($nest)] + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + incr nest -1 } elseif {$last_space_action eq "push"} { + set prevnest $nest incr nest 1 set v($nest) [list] # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + switch -exact -- $tokenType { - barekey { - set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + barekey - squotedkey { + #set v($nest) [list KEY $tok] ;#$tok is the keyname + if {$prevstate eq "table-space"} { + set v($nest) [list DOTTEDKEY] + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 ;#re-submit token in the newly pushed space + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" } quotedkey - itablequotedkey { - set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + set v($nest) [list QKEY $tok] ;#$tok is the keyname } - squotedkey - itablesquotedkey { - set v($nest) [list SQKEYVAL $tok] ;#$tok is the keyname + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! @@ -952,7 +1251,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -970,17 +1269,31 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - ::tomlish::log::debug "push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL } default { - error "push trigger tokenType '$tokenType' not yet implemented" + error "---- push trigger tokenType '$tokenType' not yet implemented" } } } else { #no space level change switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } starttablename { #$tok is triggered by the opening bracket and sends nothing to output } @@ -988,62 +1301,69 @@ namespace eval tomlish::decode { #$tok is triggered by the double opening brackets and sends nothing to output } tablename - tablenamearray { - error "did not expect 'tablename/tablearrayname' without space level change" + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" #set v($nest) [list TABLE $tok] } endtablename - endtablearrayname { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startinlinetable without space level change" + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } startquote { - switch -exact -- $nextstate { - string { + switch -exact -- $newstate { + string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quotedkey { + quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "quotedkey" set tok "" } - itablequotedkey { + itable-quoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablequotedkey" set tok "" } default { - error "startquote switch case not implemented for nextstate: $nextstate" + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startsquote { - switch -exact -- $nextstate { - stringlit { + switch -exact -- $newstate { + literal-state { set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringlit" + ::tomlish::parse::set_tokenType "literal" set tok "" } - squotedkey { + squoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "squotedkey" set tok "" } - itablesquotedkey { + itable-squoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "itablesquotedkey" set tok "" } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } default { - error "startsquote switch case not implemented for nextstate: $nextstate" + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" } } } startmultiquote { #review - puts stderr "no space level change - got startmultiquote" + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" set next_tokenType_known 1 ::tomlish::parse::set_tokenType "stringpart" set tok "" @@ -1060,25 +1380,45 @@ namespace eval tomlish::decode { set tok "" } string { - lappend v($nest) [list STRING $tok] + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes } - stringpart { - lappend v($nest) [list STRINGPART $tok] + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } } multistring { #review lappend v($nest) [list MULTISTRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST } itablequotedkey { } - stringlit { - lappend v($nest) [list STRINGLIT $tok] - } - untyped-value { + untyped_value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -1089,9 +1429,10 @@ namespace eval tomlish::decode { } elseif {[::tomlish::utils::is_datetime $tok]} { set tag DATETIME } else { - error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } lappend v($nest) [list $tag $tok] + } comment { #puts stdout "----- comment token returned '$tok'------" @@ -1122,18 +1463,18 @@ namespace eval tomlish::decode { #!todo - check previous tokens are complete/valid? } default { - error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" } } } if {!$next_tokenType_known} { - ::tomlish::log::notice "tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } - if {$state eq "end"} { + if {$state eq "end-state"} { break } @@ -1197,7 +1538,7 @@ namespace eval tomlish::utils { #basic generic quote matching for single and double quotes #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes proc tok_in_quotedpart {tok} { - set sLen [::string length $tok] + set sLen [tcl::string::length $tok] set quote_type "" set had_slash 0 for {set i 0} {$i < $sLen} {incr i} { @@ -1208,7 +1549,7 @@ namespace eval tomlish::utils { #leave slash_mode because even if current char is slash - it is escaped set had_slash 0 } else { - set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] switch -- $ctype { dq { set quote_type dq @@ -1227,7 +1568,7 @@ namespace eval tomlish::utils { #leave slash_mode because even if current char is slash - it is escaped set had_slash 0 } else { - set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] switch -- $ctype { dq { if {$quote_type eq "dq"} { @@ -1253,7 +1594,7 @@ namespace eval tomlish::utils { proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set sLen [::string length $tablename] + set sLen [tcl::string::length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) @@ -1261,12 +1602,12 @@ namespace eval tomlish::utils { for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { - set lastChar [::string index $tablename [expr {$i - 1}]] + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $tablename $i] + set c [tcl::string::index $tablename $i] if {$c eq "."} { switch -exact -- $mode { @@ -1294,7 +1635,7 @@ namespace eval tomlish::utils { } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { - if {[::string trim $seg] ne ""} { + if {[tcl::string::trim $seg] ne ""} { #we don't allow a quote in the middle of a bare key error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" } @@ -1357,13 +1698,13 @@ namespace eval tomlish::utils { if {$normalize} { lappend segments $seg } else { - lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong } } litquoted { - set trimmed_seg [::string trim $seg] - if {[::string index $trimmed_seg end] ne "\'"} { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" } lappend segments $seg @@ -1381,7 +1722,7 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg " \t"] + set trimmed [tcl::string::trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { @@ -1400,7 +1741,7 @@ namespace eval tomlish::utils { # is a valid 'unicode scalar value' # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} - if {[::string match {\\u*} $slashu]} { + if {[tcl::string::match {\\u*} $slashu]} { set exp {^\\u([0-9a-fA-F]{4}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %4x dec] != 1} { @@ -1412,7 +1753,7 @@ namespace eval tomlish::utils { } else { return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] } - } elseif {[::string match {\\U*} $slashu]} { + } elseif {[tcl::string::match {\\U*} $slashu]} { set exp {^\\U([0-9a-fA-F]{8}$)} if {[regexp $exp $slashu match hex]} { if {[scan $hex %8x dec] != 1} { @@ -1446,7 +1787,7 @@ namespace eval tomlish::utils { set buffer4 "" ;#buffer for 4 hex characters following a \u set buffer8 "" ;#buffer for 8 hex characters following a \u - set sLen [::string length $str] + set sLen [tcl::string::length $str] #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc set slash_active 0 @@ -1458,12 +1799,12 @@ namespace eval tomlish::utils { set i 0 for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [::string index $str [expr {$i - 1}]] + set lastChar [tcl::string::index $str [expr {$i - 1}]] } else { set lastChar "" } - set c [::string index $str $i] + set c [tcl::string::index $str $i] ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { @@ -1486,10 +1827,10 @@ namespace eval tomlish::utils { } } else { if {$unicode4_active} { - if {[::string length $buffer4] < 4} { + if {[tcl::string::length $buffer4] < 4} { append buffer4 $c } - if {[::string length $buffer4] == 4} { + if {[tcl::string::length $buffer4] == 4} { #we have a \uHHHH to test set unicode4_active 0 set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] @@ -1500,10 +1841,10 @@ namespace eval tomlish::utils { } } } elseif {$unicode8_active} { - if {[::string length $buffer8] < 8} { + if {[tcl::string::length $buffer8] < 8} { append buffer8 $c } - if {[::string length $buffer8] == 8} { + if {[tcl::string::length $buffer8] == 8} { #we have a \UHHHHHHHH to test set unicode8_active 0 set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] @@ -1515,7 +1856,7 @@ namespace eval tomlish::utils { } } elseif {$slash_active} { set slash_active 0 - set ctest [string map {{"} dq} $c] + set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { set e "\\\"" @@ -1559,15 +1900,15 @@ namespace eval tomlish::utils { } proc normalize_key {rawkey} { - set c1 [::string index $rawkey 0] - set c2 [::string index $rawkey end] + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] if {($c1 eq "'") && ($c2 eq "'")} { #single quoted segment. No escapes allowed within it. - set key [::string range $rawkey 1 end-1] + set key [tcl::string::range $rawkey 1 end-1] } elseif {($c1 eq "\"") && ($c2 eq "\"")} { #double quoted segment. Apply escapes. # - set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only set key [::tomlish::utils::unescape_string $keydata] #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. } else { @@ -1603,11 +1944,11 @@ namespace eval tomlish::utils { #check if str is valid for use as a toml bare key proc is_barekey {str} { - if {[::string length $str] == 0} { + if {[tcl::string::length $str] == 0} { return 0 } else { set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters match the regexp return 1 } else { @@ -1618,7 +1959,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'integer'. proc int_validchars1 {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 } else { @@ -1627,7 +1968,7 @@ namespace eval tomlish::utils { } #add support for hex,octal,binary 0x.. 0o.. 0b... proc int_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { return 1 } else { @@ -1644,22 +1985,22 @@ namespace eval tomlish::utils { # --------------------------------------- #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } # --------------------------------------- #check +,- only occur in the first position. - if {[::string last - $str] > 0} { + if {[tcl::string::last - $str] > 0} { return 0 } - if {[::string last + $str] > 0} { + if {[tcl::string::last + $str] > 0} { return 0 } - set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) - if {![string is integer -strict $numeric_value]} { + if {![tcl::string::is integer -strict $numeric_value]} { return 0 } #!todo - check bounds only based on some config value @@ -1681,7 +2022,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'float'. proc float_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { @@ -1701,7 +2042,7 @@ namespace eval tomlish::utils { return 1 } - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) #Toml spec also disallows leading zeros in the exponent part @@ -1709,12 +2050,12 @@ namespace eval tomlish::utils { #for now we will allow leading zeros in exponents #!todo - configure 'strict' option to disallow? #first strip any +, - or _ (just for this test) - set check [::string map {+ "" - "" _ ""} $str] + set check [tcl::string::map {+ "" - "" _ ""} $str] set r {([0-9])*} regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E set z {([0])*} regexp $z $intpart leadingzeros - if {[::string length $leadingzeros] > 1} { + if {[tcl::string::length $leadingzeros] > 1} { return 0 } #for floats, +,- may occur in multiple places @@ -1722,9 +2063,9 @@ namespace eval tomlish::utils { #!todo - check bounds ? #strip underscores for tcl double check - set check [::string map {_ ""} $str] + set check [tcl::string::map {_ ""} $str] #string is double accepts inf nan +NaN etc. - if {![::string is double $check]} { + if {![tcl::string::is double $check]} { return 0 } @@ -1737,7 +2078,7 @@ namespace eval tomlish::utils { #test only that the characters in str are valid for the toml specified type 'datetime'. proc datetime_validchars {str} { - set numchars [::string length $str] + set numchars [tcl::string::length $str] if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { return 1 } else { @@ -1753,7 +2094,7 @@ namespace eval tomlish::utils { #e.g 1979-05-27 00:32:00.999999-07:00 set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart @@ -1784,57 +2125,188 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] + #This is a very curly mix of a half-baked statemachine littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are not so trivial to parse using more standard methods either: + #a) some kind of backtracking required if using an ABNF parser + #b) Some of the rules of context-free grammars are violated by the spec. + + #I don't know what the technical name for this sort of parser is. probably something like "Dog's Breakfast" + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state - # states: - # key-space, curly-space, array-space - # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # states: + # table-space, curly-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string, literal-state, multistring... # # notes: - # key-space i # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack - # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax # #stateMatrix defines for each state, actions to take for each possible token. - #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. - #dual-element actions are a push command and the name of the space to push on the stack. - # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide (todo - make implementation match!) + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but use transition as if we popped there? strange concept - review usecases - #test variable stateMatrix set stateMatrix [dict create] + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + whitespace "table-space"\ + newline "table-space"\ + bom "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + startsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE curly-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE curly-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value dict set stateMatrix\ - key-space { - whitespace "key-space"\ - newline "key-space"\ - bom "key-space"\ - barekey {pushspace "keyval-space"}\ - startquote "quotedkey"\ - startsquote "squotedkey"\ - comment "key-space"\ - starttablename "tablename"\ - starttablearrayname "tablearrayname"\ - startmultiquote "err"\ - endquote "err"\ - comma "err"\ - eof "end"\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ } + #dottedkeyend "POPSPACE" + + + #curly-space/ curly-syntax : itables dict set stateMatrix\ curly-space {\ whitespace "curly-space"\ newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ + barekey {PUSHSPACE "itablekeyval-space"}\ itablequotedkey "itablekeyval-space"\ itablesquotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - startsquote "itablesquotedkey"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + startsquote "itable-squoted-key"\ comma "curly-space"\ - comment "err"\ - eof "err"\ + comment "err-state"\ + eof "err-state"\ } #REVIEW @@ -1844,60 +2316,48 @@ namespace eval tomlish::parse { curly-syntax {\ whitespace "curly-syntax"\ newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ + barekey {PUSHSPACE "itablekeyval-space"}\ itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ comma "curly-space"\ comment "curly-space"\ - eof "err"\ + eof "err-state"\ } - #review comment "err" vs comment "curly-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #review comment "err-state" vs comment "curly-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? - dict set stateMatrix\ - value-expected {\ - whitespace "value-expected"\ - untyped-value "samespace"\ - startquote "string"\ - startsquote "stringlit"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - startarray {pushspace array-space}\ - comment "err"\ - comma "err"\ - newline "err"\ - eof "err"\ - } + #JMN REVIEW dict set stateMatrix\ array-space {\ whitespace "array-space"\ newline "array-space"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startmultiquote {pushspace multistring-space}\ - startinlinetable {pushspace curly-space}\ - startquote "string"\ - startsquote "stringlit"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE curly-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ comma "array-space"\ comment "array-space"\ - eof "err"\ + eof "err-state-array-space-got-eof"\ } dict set stateMatrix\ array-syntax {\ whitespace "array-syntax"\ newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startmultiquote {pushspace multistring-space}\ - startquote "string"\ - startsquote "stringlit"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ comma "array-space"\ - comment "err"\ + comment "err-state"\ } dict set stateMatrix\ @@ -1905,9 +2365,9 @@ namespace eval tomlish::parse { whitespace "itablekeyval-syntax"\ endquote "itablekeyval-syntax"\ endsquote "itablekeyval-syntax"\ - newline "err"\ + newline "err-state"\ equal "value-expected"\ - eof "err"\ + eof "err-state"\ } dict set stateMatrix\ itablekeyval-space {} @@ -1915,156 +2375,161 @@ namespace eval tomlish::parse { dict set stateMatrix\ itablevaltail {\ whitespace "itablevaltail"\ - endinlinetable "popspace"\ - comma "popspace"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ newline "itablevaltail"\ comment "itablevaltail"\ - eof "err"\ + eof "err-state"\ } dict set stateMatrix\ - itablequotedkey {\ + itable-quoted-key {\ whitespace "NA"\ - itablequotedkey {pushspace "itablekeyval-space"}\ - newline "err"\ + itablequotedkey {PUSHSPACE "itablekeyval-space"}\ + newline "err-state"\ endquote "itablekeyval-syntax"\ } dict set stateMatrix\ - itablesquotedkey {\ + itable-squoted-key {\ whitespace "NA"\ - itablesquotedkey {pushspace "itablekeyval-space"}\ - newline "err"\ + itablesquotedkey {PUSHSPACE "itablekeyval-space"}\ + newline "err-state"\ endsquote "itablekeyval-syntax"\ } - dict set stateMatrix\ - keyval-space {\ - } - dict set stateMatrix\ - keyval-syntax {\ - whitespace "keyval-syntax"\ - endquote "keyval-syntax"\ - endsquote "keyval-syntax"\ - equal "value-expected"\ - comma "err"\ - newline "err"\ - eof "err"\ - } - dict set stateMatrix\ - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - - #quotedkey & squotedkey need to pushspace from self to keyval-space + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space dict set stateMatrix\ - quotedkey {\ + quoted-key {\ whitespace "NA"\ - quotedkey {pushspace "keyval-space"}\ - newline "err"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ endquote "keyval-syntax"\ } dict set stateMatrix\ - squotedkey {\ + squoted-key {\ whitespace "NA"\ - squotedkey {pushspace "keyval-space"}\ - newline "err"\ - endsquote "keyval-syntax"\ + squotedkey "squoted-key"\ + newline "err-state"\ + endsquote {PUSHSPACE "keyval-space"}\ } dict set stateMatrix\ - string {\ + string-state {\ whitespace "NA"\ - string "string"\ - endquote "samespace"\ - newline "err"\ - eof "err"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ } dict set stateMatrix\ - stringlit {\ + literal-state {\ whitespace "NA"\ - stringlit "stringlit"\ - endsquote "samespace"\ - newline "err"\ - eof "err"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ } - dict set stateMatrix\ - stringpart {\ - continuation "samespace"\ - endmultiquote "popspace"\ - eof "err"\ - } #dict set stateMatrix\ - # multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } dict set stateMatrix\ multistring-space {\ whitespace "multistring-space"\ continuation "multistring-space"\ stringpart "multistring-space"\ newline "multistring-space"\ - endmultiquote "popspace"\ - eof "err"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ } - #multistring "multistring-space" + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline + dict set stateMatrix\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes + dict set stateMatrix\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + dict set stateMatrix\ - tablename {\ + tablename-state {\ whitespace "NA"\ - tablename {zeropoppushspace key-space}\ - tablename2 {pushspace key-space}\ - endtablename "tablenametail"\ - comma "err"\ - newline "err"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ } dict set stateMatrix\ - tablearrayname {\ + tablearrayname-state {\ whitespace "NA"\ - tablearrayname {zeropoppushspace key-space}\ - tablearrayname2 {pushspace key-space}\ - endtablearray "tablearraynametail"\ - comma "err"\ - newline "err"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ } dict set stateMatrix\ - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } dict set stateMatrix\ - end {} - - #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push - variable stateMatrix_orig { - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} - value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} - array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} - array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} - keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - keyval-space {} - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} - tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} - tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} - end {} + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + #This seems hacky... + #see also spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] tcl::dict::for {s transitions} $stateMatrix { tcl::dict::for {token transition_to} $transitions { - set action [lindex $transition_to 0] - switch -exact -- $action { - pushspace - zeropoppushspace { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { if {$token ni $push_trigger_tokens} { lappend push_trigger_tokens $token } @@ -2072,84 +2537,135 @@ namespace eval tomlish::parse { } } } - puts stdout "push_trigger_tokens: $push_trigger_tokens" + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" #!todo - hard code once stateMatrix finalised? #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. - #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' - variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail - } + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces variable spacePushTransitions { keyval-space keyval-syntax itablekeyval-space itablekeyval-syntax array-space array-space curly-space curly-space - key-space tablename + table-space tablename-state + } + #Pop to, next + variable spacePopTransitions { + array-space array-syntax + curly-space curly-syntax + itablekeyval-space itablevaltail } + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + variable spaceSameTransitions { + array-space array-syntax + curly-space curly-syntax + itablekeyval-space itablevaltail + } - variable state_list + + variable state_list ;#reset every tomlish::decode::toml namespace export tomlish toml namespace ensemble create - proc getNextState {tokentype currentstate} { + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state variable nest variable v - + + set prevstate $currentstate + + variable spacePopTransitions variable spacePushTransitions + variable spaceSameTransitions + variable last_space_action "none" variable last_space_type "none" variable state_list set result "" + set starttok "" if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { - popspace { + POPSPACE { spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + set last_space_action "pop" set last_space_type $type - - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" } else { - set next $target + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } } set result $next } - samespace { - #note the same data as popspace (spacePopTransitions) is used here. - set parent [spacestack peek] - ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" - lassign $parent type target - if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { - set next [dict get $::tomlish::parse::spacePopTransitions $target] - ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" } else { - set next $target + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } } set result $next } zeropoppushspace { if {$nest > 0} { - #pop back down to the root level (key-space) + #pop back down to the root level (table-space) spacestack pop - set parent [spacestack peek] - lassign $parent type target + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + set last_space_action "pop" set last_space_type $type @@ -2162,36 +2678,72 @@ namespace eval tomlish::parse { } #re-entrancy - #set next [list pushspace [lindex $transition_to 1]] + #set next [list PUSHSPACE [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" - set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] } - pushspace { - set target [lindex $transition_to 1] - spacestack push [list space $target] + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + set last_space_action "push" set last_space_type "space" - - #puts $::tomlish::parse::spacePushTransitions - if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { - set next [dict get $::tomlish::parse::spacePushTransitions $target] - ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" } else { - set next $target + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } } set result $next } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } default { - set result $transition_to + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word } } } else { - set result "nostate-err" - + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" } - lappend state_list $result - return $result + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] } proc report_line {{line ""}} { @@ -2215,7 +2767,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL SQKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2250,7 +2802,7 @@ namespace eval tomlish::parse { proc _shortcircuit_startquotesequence {} { variable tok variable i - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { set_tokenType "startquote" incr i -1 @@ -2264,8 +2816,34 @@ namespace eval tomlish::parse { } } + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set token_waiting $k $v + } + value { + dict set token_waiting tok $v + } + default { + error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $token_waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $token_waiting complete]" + } + return + } + #returns 0 or 1 #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) proc tok {s} { variable nest variable v @@ -2274,14 +2852,12 @@ namespace eval tomlish::parse { variable type ;#character type variable state ;#FSM - set resultlist [list] variable tokenType variable tokenType_list variable endToken - set sLen [::string length $s] variable lastChar @@ -2291,33 +2867,38 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token + #This token_waiting mechanism only allows for a single *completed* token to be specified. variable token_waiting if {[dict size $token_waiting]} { set tokenType [dict get $token_waiting type] set tok [dict get $token_waiting tok] dict unset token_waiting type dict unset token_waiting tok + dict unset token_waiting complete return 1 } #------------------------------ + set resultlist [list] + set sLen [tcl::string::length $s] + set slash_active 0 set quote 0 set c "" set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { - set lastChar [string index $s [expr {$i - 1}]] + set lastChar [tcl::string::index $s [expr {$i - 1}]] } else { set lastChar "" } - set c [string index $s $i] + set c [tcl::string::index $s $i] tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" incr i ;#must incr here because we do'returns'inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote @@ -2327,12 +2908,16 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -2343,14 +2928,13 @@ namespace eval tomlish::parse { whitespace { # hash marks end of whitespace token #do a return for the whitespace, set token_waiting - #dict set token_waiting type comment - #dict set token_waiting tok "" + #set_token_waiting type comment value "" complete 1 incr i -1 ;#leave comment for next run return 1 } - untyped-value { + untyped_value { #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? - #we will accept a comment marker as an immediate terminator of the untyped-value. + #we will accept a comment marker as an immediate terminator of the untyped_value. incr i -1 return 1 } @@ -2363,24 +2947,24 @@ namespace eval tomlish::parse { append tok $c } default { - #quotedkey, itablequotedkey, string,stringlit, multistring + #quotedkey, itablequotedkey, string,literal, multistring append tok $c } } } else { switch -- $state { - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType stringpart - } else { - set_tokenType stringlit ;#review - } + multistring-space { + set_tokenType stringpart set tok "" if {$had_slash} { append tok "\\" } append tok "$dquotes#" } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } default { #start of token if we're not in a token set_tokenType comment @@ -2395,88 +2979,86 @@ namespace eval tomlish::parse { set multi_dquote "" set had_slash $slash_active set slash_active 0 - #test jmn2024 - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - startsquotesequence { - incr i -[tcl::string::length $tok] - set_tokenType "startsquote" - return 1 - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart - stringlit { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename - starttablearrayname { - #*bare* tablename can only contain letters,digits underscores - error "Invalid tablename first character \{ [tomlish::parse::report_line]" - } - tablename - tablearrayname { - #valid in quoted parts - append tok $c - } - comment { - if {$had_slash} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - switch -exact -- $state { - value-expected { - #switch last key to tablename?? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - array-space - array-syntax { - #nested anonymous inline table - set_tokenType "startinlinetable" - set tok "\{" - return 1 - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType stringpart - } else { - set_tokenType stringlit ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" - } - append tok "$dquotes\{" - } - default { - error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } @@ -2487,115 +3069,122 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey - comment { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename - tablename { - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endinlinetable - dict set token_waiting tok "" - return 1 - } - starttablearrayname - tablearrayname { - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } - itablevaltail { - #review - error "right-curly in itablevaltail" - } - default { - #end any other token - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - #error "unexpected tablename problem" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 + return 1 + } + itablevaltail { + #review + error "right-curly in itablevaltail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endinlinetable" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - curly-syntax - curly-space { - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - array-syntax - array-space { - #invalid - set_tokenType "endinlinetable" - set tok "\}" - return 1 - } - itablevaltail { - set_tokenType "endinlinetable" - set tok "" - #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 - incr i -1 - return 1 - } - itablekeyval-syntax { - error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" - } else { - set_tokenType "stringlit" ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" - } - append tok "$dquotes\}" - } - default { - #JMN2024b keytail? - error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + curly-syntax - curly-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itablevaltail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" } } + } } lb { @@ -2605,101 +3194,104 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - startsquotesequence { - incr i -[tcl::string::length $tok] - set_tokenType "startsquote" - return 1 - } - stringlit - squotedkey - itablesquotedkey { - append tok $c - } - string - quotedkey - itablequotedkey { - if {$had_slash} {append tok "\\"} - append tok $c - } - stringpart { - if {$had_slash} {append tok "\\"} - append tok $dquotes$c - } - starttablename { - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 - } - tablename { - #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token - if {$had_slash} { - #resultant tablename may be invalid - but leave for datastructure loading stage to catch - append tok "\\[" - } else { - if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { - #invalid at this point - state machine should disallow table -> starttablearrayname - dict set token_waiting type starttablearrayname - dict set token_waiting tok "" - return 1 - } else { - #we appear to still be in single or double quoted section - append tok "\[" - } - } - } - comment { - if {$had_slash} {append tok "\\"} - append tok "\[" - } - default { - #end any other token. - incr i -1 - return 1 - } + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - set_tokenType "startarray" - set tok "\[" - return 1 - } - key-space { - #table name - #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray - #note that a starttablearrayname token may contain whitespace between the brackets - # e.g \[ \[ - set_tokenType "starttablename" - set tok "" ;#there is no output into the tomlish list for this token - } - array-space - array-syntax { - #nested array? - set_tokenType "startarray" - set tok "\[" - return 1 - #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" - } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 + return 1 } else { - set_tokenType "stringlit" ;#review - } - set tok "" - if {$had_slash} { - append tok "\\" + #we appear to still be in single or double quoted section + append tok "\[" } - append tok "$dquotes\[" } - default { - error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } + } } rb { #right square bracket @@ -2708,31 +3300,35 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit - squotedkey - itablesquotedkey { + literal - literalpart - squotedkey - itablesquotedkey { append tok $c } string - quotedkey - itablequotedkey { if {$had_slash} {append tok "\\"} append tok $c } - comment { - if {$had_slash} {append tok "\\"} - append tok $c - } stringpart { if {$had_slash} {append tok "\\"} append tok $dquotes$c } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } whitespace { if {$state eq "multistring-space"} { #???? @@ -2752,8 +3348,7 @@ namespace eval tomlish::parse { append tok "\\]" } else { if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { - dict set token_waiting type endtablename - dict set token_waiting tok "" + set_token_waiting type endtablename value "" complete 1 return 1 } else { #we appear to still be in single or double quoted section @@ -2765,8 +3360,7 @@ namespace eval tomlish::parse { #todo? if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + set_token_waiting type endtablearrayname value "" complete 1 return 1 } default { @@ -2783,13 +3377,13 @@ namespace eval tomlish::parse { set tok "\]" return 1 } - key-space { + table-space { #invalid - but allow parser statemachine to report it. ? set_tokenType "endarray" set tok "\]" return 1 } - tablename { + tablename-state { #e.g [] - empty tablename - allowed or not? #empty tablename/tablearrayname ? #error "unexpected tablename problem" @@ -2798,7 +3392,7 @@ namespace eval tomlish::parse { set tok "" ;#no output into the tomlish list for this token return 1 } - tablearrayname { + tablearrayname-state { error "unexpected tablearrayname problem" set_tokenType "endtablearray" set tok "" ;#no output into the tomlish list for this token @@ -2809,18 +3403,18 @@ namespace eval tomlish::parse { set tok "\]" return 1 } - multistring-space - multiliteral-space { - if {$state eq "multistring-space"} { - set_tokenType "stringpart" - } else { - set_tokenType "stringlit" ;#review - } + multistring-space { + set_tokenType "stringpart" set tok "" if {$had_slash} { append tok "\\" } append tok "$dquotes\]" } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } default { error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } @@ -2831,12 +3425,16 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! #backslash - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -2850,8 +3448,8 @@ namespace eval tomlish::parse { error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" } } - stringlit - squotedkey - itablesquotedkey { - #never need to set slash_active true when in stringlit + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens append tok "\\" set slash_active 0 } @@ -2892,20 +3490,28 @@ namespace eval tomlish::parse { } } } else { - if {$state eq "multistring-space"} { - if {$slash_active} { - set_tokenType "stringpart" - set tok "\\\\" - set slash_active 0 - } else { - if {$dquotes ne ""} { + switch -exact -- $state { + multistring-space { + if {$slash_active} { set_tokenType "stringpart" - set tok $dquotes + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 } - set slash_active 1 } - } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } } @@ -2913,8 +3519,35 @@ namespace eval tomlish::parse { #single quote set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } whitespace { #end whitespace incr i -1 ;#reprocess sq @@ -2923,7 +3556,8 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { + #temp token creatable only during value-expected or array-space switch -- [tcl::string::length $tok] { 1 { append tok $c @@ -2931,24 +3565,36 @@ namespace eval tomlish::parse { 2 { #switch? append tok $c - set_tokenType startmultisquote + set_tokenType triple_squote return 1 } default { - error "unexpected token length [tcl::string::length $tok] in 'startsquotesequence'" + error "unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } - stringlit { + literal { #slash_active always false - #terminate the stringlit - dict set token_waiting type endsquote - dict set token_waiting tok "'" + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 return 1 } - squotedkey - itablesquotedkey { - dict set token_waiting type endsquote - dict set token_waiting tok "'" + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 return 1 } starttablename - starttablearrayname { @@ -2966,33 +3612,54 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { value-expected - array-space { - #todo - multilitstring startsquotesequence? - set_tokenType "startsquotesequence" + set_tokenType "_start_squote_sequence" set tok "'" } - key-space { - set_tokenType "startsquote" - set tok $c + keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" return 1 } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } curly-space { set_tokenType "startsquote" set tok $c return 1 } - tablename - tablearrayname { - #first char in tablename/tablearrayname state - set_tokenType $state ;#token name matches state name for tablename/tablearrayname + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname append tok "'" } - stringlit { - tomlish::log::debug "sq during stringlit state with no tokentype - empty stringlit?" - set_tokenType stringlit + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal incr -1 return 1 } multistring-space { - + error "unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey } default { error "unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -3006,10 +3673,14 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { append tok $c } elseif {$toklen == 2} { @@ -3021,7 +3692,7 @@ namespace eval tomlish::parse { error "unexpected token length $toklen in 'startquotesequence'" } } - startsquotesequence { + _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { 1 { @@ -3035,11 +3706,11 @@ namespace eval tomlish::parse { return 1 } default { - error "unexpected startsquotesequence length $toklen" + error "unexpected _start_squote_sequence length $toklen" } } } - stringlit { + literal - literalpart { append tok $c } string { @@ -3047,8 +3718,7 @@ namespace eval tomlish::parse { append tok "\\" $c } else { #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + set_token_waiting type endquote value "\"" complete 1 return 1 } } @@ -3060,8 +3730,7 @@ namespace eval tomlish::parse { #incr i -1 if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" + set_token_waiting type endmultiquote value "\"\"\"" complete 1 set multi_dquote "" return 1 } else { @@ -3079,8 +3748,7 @@ namespace eval tomlish::parse { } else { switch -- [tcl::string::length $multi_dquote] { 2 { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" + set_token_waiting type endmultiquote value "\"\"\"" complete 1 set multi_dquote "" return 1 } @@ -3095,21 +3763,23 @@ namespace eval tomlish::parse { } } } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} } default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" + set_token_waiting type startquote value "\"" complete 1 return 1 } } @@ -3123,8 +3793,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + set_token_waiting type endquote value "\"" complete 1 return 1 } } @@ -3147,7 +3816,7 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - value-expected - array-space { + keyval-value-expected - value-expected - array-space { #!? start looking for possible multistartquote #set_tokenType startquote #set tok $c @@ -3164,12 +3833,11 @@ namespace eval tomlish::parse { set multi_dquote "" } else { if {$multi_dquote eq "\"\""} { - tomlish::log::debug "---> endmultiquote" + tomlish::log::debug "- tokloop char dq ---> endmultiquote" set_tokenType "endmultiquote" set tok "\"\"\"" return 1 - #dict set token_waiting type endmultiquote - #dict set token_waiting tok "\"\"\"" + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 #set multi_dquote "" #return 1 } else { @@ -3177,7 +3845,11 @@ namespace eval tomlish::parse { } } } - key-space { + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { set_tokenType "startquote" set tok $c return 1 @@ -3187,8 +3859,16 @@ namespace eval tomlish::parse { set tok $c return 1 } - tablename - tablearrayname { - set_tokenType $state + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin set tok $c } default { @@ -3203,17 +3883,21 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit - squotedkey { + literal - literalpart - squotedkey { #assertion had_slash 0, multi_dquote "" append tok $c } @@ -3227,19 +3911,18 @@ namespace eval tomlish::parse { append tok $dquotes$c } whitespace { - if {$state in {multistring-space multiliteral-space}} { + if {$state eq "multistring-space"} { set backlen [expr {[tcl::string::length $dquotes] + 1}] incr i -$backlen return 1 } else { - dict set token_waiting type equal - dict set token_waiting tok = + set_token_waiting type equal value = complete 1 return 1 } } barekey { - dict set token_waiting type equal - dict set token_waiting tok = + #set_token_waiting type equal value = complete 1 + incr i -1 return 1 } starttablename - starttablearrayname { @@ -3263,7 +3946,15 @@ namespace eval tomlish::parse { } append tok ${dquotes}= } - + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } default { set_tokenType "equal" set tok = @@ -3279,19 +3970,30 @@ namespace eval tomlish::parse { # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { + literal { append tok $c } + literalpart { + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warning "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } stringpart { append tok $dquotes$c } @@ -3321,23 +4023,36 @@ namespace eval tomlish::parse { set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { - #review - append tok $c + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 + return 1 } newline { + #review #this lf is the trailing part of a crlf - append tok lf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } stringpart { @@ -3348,13 +4063,11 @@ namespace eval tomlish::parse { } else { if {$had_slash} { #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - dict set token_waiting type continuation - dict set token_waiting tok \\ + set_token_waiting type continuation value \\ complete 1 incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 return 1 } } @@ -3370,8 +4083,7 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_token_waiting type newline value lf complete 1 return 1 } } @@ -3396,6 +4108,12 @@ namespace eval tomlish::parse { return 1 } } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } default { #ignore slash? error? set_tokenType "newline" @@ -3421,12 +4139,22 @@ namespace eval tomlish::parse { set multi_dquote "" set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3439,28 +4167,27 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} append tok $c } - stringlit - squotedkey - itablesquotedkey { - #assert had_slash always 0, multi_dquote "" - append tok $c - } stringpart { + #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } whitespace { if {$state eq "multistring-space"} { set backlen [expr {[tcl::string::length $dquotes] + 1}] incr i -$backlen return 1 } else { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 return 1 } } default { - dict set token_waiting type comma - dict set token_waiting tok "," + set_token_waiting type comma value "," complete 1 if {$had_slash} {append tok "\\"} return 1 } @@ -3475,7 +4202,7 @@ namespace eval tomlish::parse { } multiliteral-space { #assert had_slash 0, multi_dquote "" - set_tokenType "stringlit" + set_tokenType "literalpart" set tok "," } default { @@ -3491,17 +4218,27 @@ namespace eval tomlish::parse { set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - comment - untyped-value { + comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } @@ -3509,24 +4246,31 @@ namespace eval tomlish::parse { if {$had_slash} {append tok "\\"} append tok $c } - stringlit - squotedkey - itablesquotedkey { - #assert had_slash always 0, multi_dquote "" - append tok $c - } stringpart { if {$had_slash} {append tok "\\"} append tok $dquotes$c } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } whitespace { - if {$state eq "multistring-space"} { - set backchars [expr {[tcl::string::length $dquotes] + 1}] - if {$had_slash} { - incr backchars 1 + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" } - incr i -$backchars - return 1 - } else { - error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" } } starttablename - starttablearrayname { @@ -3541,15 +4285,12 @@ namespace eval tomlish::parse { #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review #x is the tablename y is the key - switch_tokenType tablenamepluskey - incr i -1 - - #error "barekey period unimplemented" + set_token_waiting type dotsep value "." complete 1 + return 1 } default { - error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" - #dict set token_waiting type period - #dict set token_waiting tok "." + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 #return 1 } } @@ -3563,12 +4304,16 @@ namespace eval tomlish::parse { } multiliteral-space { set_tokenType "literalpart" - set tok "" - if {$had_slash} {append tok "\\"} - append tok "$dquotes." + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 } default { - set_tokenType "untyped-value" + set_tokenType "untyped_value" set tok "." } } @@ -3578,14 +4323,24 @@ namespace eval tomlish::parse { " " { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3593,15 +4348,13 @@ namespace eval tomlish::parse { barekey { #todo had_slash - emit token or error #whitespace is a terminator for bare keys - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - untyped-value { + untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } @@ -3615,7 +4368,21 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } append tok $c } - stringlit - squotedkey - itablesquotedkey { + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { append tok $c } whitespace { @@ -3633,20 +4400,6 @@ namespace eval tomlish::parse { append tok $c } } - stringpart { - if {$had_slash} { - #REVIEW - #emit the stringpart - go back to the slash - incr i -2 - return 1 - } else { - #split into STRINGPART aaa WS " " - #keeping WS separate allows easier processing of CONT stripping - append tok $dquotes - incr i -1 - return 1 - } - } starttablename - starttablearrayname { incr i -1 return 1 @@ -3664,15 +4417,19 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - if {$had_slash} { - set tok "\\$c" - } else { - set tok $c - } + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c } multistring-space { if {$had_slash} { @@ -3691,6 +4448,10 @@ namespace eval tomlish::parse { append tok $c } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { if {$had_slash} { error "unexpected backslash [tomlish::parse::report_line]" @@ -3705,43 +4466,58 @@ namespace eval tomlish::parse { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { - append tok $c - } barekey { #whitespace is a terminator for bare keys incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 return 1 } untyped_value { #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c + #set_token_waiting type whitespace value $c complete 1 incr i -1 return 1 } - quotedkey - itablequotedkey { + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { append tok $c } string - comment - whitespace { append tok $c } stringpart { - append tok $dquotes$c + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c } starttablename - starttablearrayname { incr i -1 @@ -3762,10 +4538,14 @@ namespace eval tomlish::parse { set slash_active 0 } switch -exact -- $state { - tablename - tablearrayname { + tablename-state { #tablename can have leading,trailing and interspersed whitespace! #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname set tok $c } multistring-space { @@ -3786,6 +4566,10 @@ namespace eval tomlish::parse { } } } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "whitespace" append tok $c @@ -3795,41 +4579,58 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - if {[string length $tokenType]} { + if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - startsquotesequence { + _start_squote_sequence { + #assert - tok will be one or two squotes only incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 } - stringlit { + literal - literalpart { append tok $c } + default { + set_token_waiting type bom value "\uFEFF" complete 1 + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" return 1 } } - } else { - set_tokenType "bom" - set tok "\uFEFF" - return 1 } } default { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } startquotesequence { _shortcircuit_startquotesequence } - startsquotesequence { - puts stdout "HERE $c" + _start_squote_sequence { incr i -[tcl::string::length $tok] set_tokenType "startsquote" return 1 @@ -3866,7 +4667,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/stringlit/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3874,7 +4675,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - key-space - curly-space - curly-syntax { + table-space - curly-space - curly-syntax { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -3892,17 +4693,25 @@ namespace eval tomlish::parse { set tok $dquotes$c } } - tablename { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { set_tokenType "tablename" set tok $c } - tablearrayname { + tablearrayname-state { set_tokenType "tablearrayname" set tok $c } + dottedkey-space { + set_tokenType barekey + set tok $c + } default { - tomlish::log::debug "char '$c' setting to untyped-value while state:$state" - set_tokenType "untyped-value" + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" set tok $c } } @@ -3913,14 +4722,14 @@ namespace eval tomlish::parse { } #run out of characters (eof) - if {[::string length $tokenType]} { + if {[tcl::string::length $tokenType]} { #check for invalid ending tokens - #if {$state eq "err"} { + #if {$state eq "err-state"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { startquotesequence { - set toklen [::string length $tok] + set toklen [tcl::string::length $tok] if {$toklen == 1} { #invalid #eof with open string @@ -3930,34 +4739,30 @@ namespace eval tomlish::parse { #we ended in a double quote, not actually a startquoteseqence - effectively an empty string switch_tokenType "startquote" incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" + #set_token_waiting type string value "" complete 1 return 1 } } - startsquotesequence { - set toklen [::string length $tok] + _start_squote_sequence { + set toklen [tcl::string::length $tok] switch -- $toklen { 1 { - #invalid eof with open stringlit + #invalid eof with open literal error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - dict set token_waiting type endsquote - dict set token_waiting tok "'" - ### - set_tokenType "stringlit" + set_token_waiting type endsquote value "'" complete 1 + set_tokenType "literal" set tok "" return 1 } } } } - dict set token_waiting type "eof" - dict set token_waiting tok "eof" + set_token_waiting type eof value eof complete 1 return 1 } else { - ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } @@ -4082,7 +4887,7 @@ if {$argc > 0} { puts stderr "argc: $argc args: $argv" if {($argc == 1)} { - if {[::string tolower $argv] in {help -help h -h}} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { puts stdout "Usage: -app where appname one of:[tomlish::appnames]" exit 0 } else { diff --git a/src/vfs/_vfscommon/modules/zipper-0.11.tm b/src/vfs/_vfscommon/modules/zipper-0.11.tm index b73ec5de..a8c49a50 100644 Binary files a/src/vfs/_vfscommon/modules/zipper-0.11.tm and b/src/vfs/_vfscommon/modules/zipper-0.11.tm differ diff --git a/src/vfs/punk86.vfs/main.tcl b/src/vfs/punk86.vfs/main.tcl index 9e46f08b..d7b65302 100644 --- a/src/vfs/punk86.vfs/main.tcl +++ b/src/vfs/punk86.vfs/main.tcl @@ -8,6 +8,15 @@ apply { args { + set has_zipfs [expr {[info commands tcl::zipfs::root] ne ""}] + if {$has_zipfs} { + set has_zipfs_attached [expr {[llength [tcl::zipfs::mount]]}] + } else { + set has_zipfs_attached 0 + } + set tclmajorv [lindex [split [info tclversion] .] 0] + + #here we make an attempt to avoid premature (costly) auto_path/tcl::tm::list scanning caused by our initial 'package require starkit'. #we will first look for a starkit.tcl in an expected location and try to load that, then fallback to package require. @@ -17,7 +26,7 @@ apply { args { set topdir [file dirname $normscript] set found_starkit_tcl 0 - set possible_lib_vfs_folders [glob -dir [file join $topdir lib] -type d vfs*] + set possible_lib_vfs_folders [glob -nocomplain -dir [file join $topdir lib] -type d vfs*] foreach test_folder $possible_lib_vfs_folders { #e.g /lib/vfs1.4.1 #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. @@ -33,9 +42,9 @@ apply { args { set found_starkit_tcl 1 } if {!$found_starkit_tcl} { - #our internal search for starkit failed. - #either we are in a pure zipfs system - or the starkit package is somewhere unexpected - #for pure zipfs - it's wasteful to perform exhaustive search for starkit + #our internal 'quick' search for starkit failed. + #either we are in a pure zipfs system - or the starkit package is somewhere more devious + #for pure zipfs - it's a little wasteful to perform exhaustive search for starkit #review - only keep searching if not 'dev' first arg? #Initially we've done no scans of auto_path/tcl::tm::list - but there will already be a core set of packages known by the kit @@ -44,7 +53,6 @@ apply { args { puts stderr [join [package names] \n] set original_packages [package names] - if {![catch {package require starkit}]} { #known side-effects of starkit::startup #sets the ::starkit::mode variable to the way in which it was launched. One of: {starpack starkit unwrapped tclhttpd plugin service sourced} @@ -67,20 +75,21 @@ apply { args { - # -- --- --- - #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it + #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it. review - for what versions of Tcl does this apply? + #known to occur in old 8.6.8 kits as well as 8.7 #review - do we want $normexe or [info nameofexecutable] for $thisexe here? Presumably [info nameofexecutable] (possible symlink) ok + #we want to be able to launch a process from the interactive shell using the same name this one was launched with. set thisexe [file tail [info nameofexecutable]] ;#e.g punk86.exe set thisexeroot [file rootname $thisexe] ;#e.g punk86 set ::auto_execs($thisexeroot) [info nameofexecutable] if {$thisexe ne $thisexeroot} { + #on windows make the .exe point there too set ::auto_execs($thisexe) [info nameofexecutable] } # -- --- --- - set tclmajorv [lindex [split [info tclversion] .] 0] - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { set kp $::tcl::kitpath set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -94,9 +103,10 @@ apply { args { } } } - if {[info commands tcl::zipfs::root] ne ""} { - #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. - set zipbase [file join [tcl::zipfs::root] app] ;#zipfs root has trailing slash - but file join does the right thing + if {$has_zipfs_attached} { + #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. (why?) + #default 'zipfs root' has trailing slash (//zipfs:/) - but file join does the right thing + set zipbase [file join [tcl::zipfs::root] app] if {"$zipbase" in [tcl::zipfs::mount]} { set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -111,12 +121,15 @@ apply { args { } } } + + + set internal_paths [list] - if {[info commands tcl::zipfs::root] ne ""} { + if {$has_zipfs} { set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path lappend internal_paths $ziproot } - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { lappend internal_paths $::tcl::kitpath } @@ -438,16 +451,46 @@ apply { args { } } } - #force rescan #catch {package require flobrudder666_nonexistant} set arglist $args } - if {[llength $arglist]} { + #assert arglist has had 'dev' first arg removed if it was present. + if {[llength $arglist] == 1 && [lindex $arglist 0] eq "tclsh"} { + #called as dev tclsh or tclsh + #we would like to drop through to standard tclsh repl without launching another process? how? + #tclMain.c doesn't seem to allow it. + + if {![info exists ::env(TCLSH_PIPEREPL)]} { + set is_tclsh_piperepl_env_true 0 + } else { + if {[string is boolean -strict $::env(TCLSH_PIPEREPL)]} { + set is_tclsh_piperepl_env_true $::env(TCLSH_PIPEREPL) + } else { + set is_tclsh_piperepl_env_true 0 + } + } + if {!$is_tclsh_piperepl_env_true} { + puts stderr "tcl_interactive: $::tcl_interactive" + puts stderr "stdin: [chan configure stdin]" + puts stderr "Environment variable TCLSH_PIPEREPL is not set or is false or is not a boolean" + } else { + #according to env TCLSH_PIPEREPL and our commandline argument - tclsh repl is desired + #check if tclsh/punk has had the piperepl patch applied - in which case tclsh(istty) should exist + if {![info exists ::tclsh(istty)]} { + puts stderr "error: the runtime doesn't appear to have been compiled with the piperepl patch" + } + } + set ::tcl_interactive 1 + set ::tclsh(dorepl) 1 + + } elseif {[llength $arglist]} { + #pass through to shellspy commandline processor #puts stdout "main.tcl launching app-shellspy" package require app-shellspy } else { + #punk shell puts stdout "main.tcl launching app-punk. pkg names count:[llength [package names]]" package require app-punk #app-punk starts repl diff --git a/src/vfs/punk86.vfs/main.tcl.xxx b/src/vfs/punk86.vfs/main.tcl.xxx new file mode 100644 index 00000000..9e46f08b --- /dev/null +++ b/src/vfs/punk86.vfs/main.tcl.xxx @@ -0,0 +1,456 @@ + +#main.tcl - we expect to be in the context of a zipkit or tclkit vfs attached to a tcl executable. +#review - what happens if both are somehow attached and both vfs and zipfs are available? +# - if that's even possible - we have no control here over which main.tcl was selected as we're already here +#The logic below will add appropriate package paths from starkit and zipfs vfs paths +# - and restrict package paths to those coming from a vfs (if not launched with 'dev' first arg which allows external paths to remain) + + + +apply { args { + #here we make an attempt to avoid premature (costly) auto_path/tcl::tm::list scanning caused by our initial 'package require starkit'. + #we will first look for a starkit.tcl in an expected location and try to load that, then fallback to package require. + + #standard way to avoid symlinking issues - review! + set normscript [file dirname [file normalize [file join [info script] __dummy__]]] + set normexe [file dirname [file normalize [file join [info nameofexecutable] __dummy__]]] + + set topdir [file dirname $normscript] + set found_starkit_tcl 0 + set possible_lib_vfs_folders [glob -dir [file join $topdir lib] -type d vfs*] + foreach test_folder $possible_lib_vfs_folders { + #e.g /lib/vfs1.4.1 + #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. + #order of folder processing shouldn't matter (rely on order returned by 'package versions' - review) + if {[file exists $test_folder/starkit.tcl] && [file exists $test_folder/pkgIndex.tcl]} { + set dir $test_folder + source $test_folder/pkgIndex.tcl + } + } + if {[set starkitv [lindex [package versions starkit] end]] ne ""} { + #run the ifneeded script for the latest found (assuming package versions ordering is correct) + eval [package ifneeded starkit $starkitv] + set found_starkit_tcl 1 + } + if {!$found_starkit_tcl} { + #our internal search for starkit failed. + #either we are in a pure zipfs system - or the starkit package is somewhere unexpected + #for pure zipfs - it's wasteful to perform exhaustive search for starkit + #review - only keep searching if not 'dev' first arg? + + #Initially we've done no scans of auto_path/tcl::tm::list - but there will already be a core set of packages known by the kit + #retain it so we can 'forget' the difference after our first 'package require' forces a full scan which includes some paths we may not wish to include or at least include with different preferences + puts "main.tcl 1)--> package name count: [llength [package names]]" + puts stderr [join [package names] \n] + set original_packages [package names] + + + if {![catch {package require starkit}]} { + #known side-effects of starkit::startup + #sets the ::starkit::mode variable to the way in which it was launched. One of: {starpack starkit unwrapped tclhttpd plugin service sourced} + #set the ::starkit::topdir variable + #if mode not starpack, then: + # - adds $::starkit::topdir/lib to the auto_path if not already present + # + #In this context (vfs attached to tcl kit executable - we expect the launch mode to be 'starkit' + set starkit_startmode [starkit::startup] + puts stderr "STARKIT MODE: $starkit_startmode" + } + puts "main.tcl 2)--> package name count: [llength [package names]]" + foreach pkg [package names] { + if {$pkg ni $original_packages} { + package forget $pkg + } + } + puts "main.tcl 3)--> package name count: [llength [package names]]" + } + + + + + # -- --- --- + #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it + #review - do we want $normexe or [info nameofexecutable] for $thisexe here? Presumably [info nameofexecutable] (possible symlink) ok + set thisexe [file tail [info nameofexecutable]] ;#e.g punk86.exe + set thisexeroot [file rootname $thisexe] ;#e.g punk86 + set ::auto_execs($thisexeroot) [info nameofexecutable] + if {$thisexe ne $thisexeroot} { + set ::auto_execs($thisexe) [info nameofexecutable] + } + # -- --- --- + set tclmajorv [lindex [split [info tclversion] .] 0] + + if {[info exists ::tcl::kitpath]} { + set kp $::tcl::kitpath + set existing_module_paths [string tolower [tcl::tm::list]] + foreach p [list modules modules_tcl$tclmajorv] { + if {[string tolower [file join $kp $p]] ni $existing_module_paths} { + tcl::tm::add [file join $kp $p] + } + } + foreach l [list lib lib_tcl$tclmajorv] { + if {[string tolower [file join $kp $l]] ni [string tolower $::auto_path]} { + lappend ::auto_path [file join $kp $l] + } + } + } + if {[info commands tcl::zipfs::root] ne ""} { + #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. + set zipbase [file join [tcl::zipfs::root] app] ;#zipfs root has trailing slash - but file join does the right thing + if {"$zipbase" in [tcl::zipfs::mount]} { + set existing_module_paths [string tolower [tcl::tm::list]] + foreach p [list modules modules_tcl$tclmajorv] { + if {[string tolower [file join $zipbase $p]] ni $existing_module_paths} { + tcl::tm::add [file join $zipbase $p] + } + } + foreach l [list lib lib_tcl$tclmajorv] { + if {[string tolower [file join $zipbase $l]] ni [string tolower $::auto_path]} { + lappend ::auto_path [file join $zipbase $l] + } + } + } + } + set internal_paths [list] + if {[info commands tcl::zipfs::root] ne ""} { + set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path + lappend internal_paths $ziproot + } + if {[info exists ::tcl::kitpath]} { + lappend internal_paths $::tcl::kitpath + } + + if {[lindex $args 0] in {dev devquiet}} { + set arglist [lassign $args devmode] + set ::argv $arglist + set ::argc [llength $arglist] + if {$devmode ne "devquiet"} { + puts stderr "DEV MODE - preferencing external libraries and modules" + } + #Note regarding the use of package forget and binary packages + #If the package has loaded a binary component - then a package forget and a subsequent package require can result in both binaries being present, as seen in 'info loaded' result - potentially resulting in anomalous behaviour + #In general package forget after a package has already been required may need special handling and should be avoided where possible. + #Only a limited set of package support unloading a binary component + #We limit the use of 'package forget' here to packages that have not been loaded (whether pure-tcl or not) + #ie in this context it is used only for manipulating preferences of which packages are loaded in the first place + + #Unintuitive preferencing can occur if the same package version is for example present in a tclkit and in a module or lib folder external to the kit. + #It may be desired for performance or testing reasons to preference the library outside of the kit - and raising the version number may not always be possible/practical. + + #If the executable is a kit - we don't know what packages it contains or whether it allows loading from env based external paths. + #For app-punk projects - the lib/module paths based on the project being run should take preference, even if the version number is the same. + #(these are the 'info nameofexecutable' or 'info script' or 'pwd' relative paths that are added here) + #Some kits will remove lib/module paths (from auto_path & tcl::tm::list) that have been added via TCLLIBPATH / TCLX_Y_TM_PATH environment variables + #Some kits will remove those env-provided lib paths but fail to remove the env-provided module paths + #(differences in boot.tcl in the kits) + + #------------------------------------------------------------------------------ + #Module loading + #------------------------------------------------------------------------------ + #If the current directory contains .tm files when the punk repl starts - then it will attempt to preference them + # - but first add our other known relative modules paths - as it won't make sense to use current directory as a modulepath if it's an ancestor of one of these.. + + #original tm list at this point consists of whatever the kit decided + some prepended internal kit paths that punk decided on. + #we want to bring the existing external paths to the front (probably from the kit looking at various env TCL* values) + #we want to maintain the order of the internal paths. + #we then want to add our external dev paths of the total list + + #assert [llength [package names]] should be small at this point ~ <10 ? + + set original_tm_list [tcl::tm::list] + tcl::tm::remove {*}$original_tm_list + + # -- --- --- --- --- --- --- --- + #split existing paths into internal & external + set internal_tm_dirs [list] ;# + set external_tm_dirs [list] + set lcase_internal_paths [string tolower $internal_paths] + foreach tm $original_tm_list { + set tmlower [string tolower $tm] + set is_internal 0 + foreach okprefix $lcase_internal_paths { + if {[string match "$okprefix*" $tmlower]} { + lappend internal_tm_dirs $tm + set is_internal 1 + break + } + } + if {!$is_internal} { + lappend external_tm_dirs $tm + } + } + # -- --- --- --- --- --- --- --- + set original_external_tm_dirs $external_tm_dirs ;#we check some of our additions and bring to front - so we refer to external list as provided by kit + #assert internal_tm_dirs and external_tm_dirs have their case preserved.. + + set module_folders [list] + + #review - the below statement doesn't seem to be true. + #tm list first added end up later in the list - and then override earlier ones if version the same - so add pwd-relative 1st to give higher priority + #(only if Tcl has scanned all paths - see below bogus package load) + #1 + + #2) + # .../bin/punkXX.exe look for ../modules (i.e modules folder at same level as bin folder) + #using normexe under assumption [info name] might be symlink - and more likely to be where the modules are located. + #we will try both relative to symlink and relative to underlying exe - with those at symlink location earlier in the list + #review - a user may have other expectations. + + #case differences could represent different paths on unix-like platforms. + #It's perhaps a little unwise to configure matching paths with only case differences for a cross-platform tool .. but we should support it for those who use it and have no interest in windows - todo! review + set normexe_dir [file dirname $normexe] + if {[file tail $normexe_dir] eq "bin"} { + #underlying exe in a bin dir - backtrack 1 + lappend exe_module_folders [file dirname $normexe_dir]/modules + lappend exe_module_folders [file dirname $normexe_dir]/modules_tcl$tclmajorv + } else { + lappend exe_module_folders $normexe_dir/modules + lappend exe_module_folders $normexe_dir/modules_tcl$tclmajorv + } + set nameexe_dir [file dirname [info nameofexecutable]] + #possible symlink (may resolve to same path as above - we check below to not add in twice) + if {[file tail $nameexe_dir] eq "bin"} { + lappend exe_module_folders [file dirname $nameexe_dir]/modules + lappend exe_module_folders [file dirname $nameexe_dir]/modules_tcl$tclmajorv + } else { + lappend exe_module_folders $nameexe_dir/modules + lappend exe_module_folders $nameexe_dir/modules_tcl$tclmajorv + } + + foreach modulefolder $exe_module_folders { + set lc_external_tm_dirs [string tolower $external_tm_dirs] + set lc_modulefolder [string tolower $modulefolder] + if {$lc_modulefolder in [string tolower $original_external_tm_dirs]} { + #perhaps we have an env var set pointing to one of our dev foldersl. We don't want to rely on how the kit ordered it. + #bring to front if not already there. + #assert it must be present in $lc_external_tm_dirs if it's in $original_external_tm_dirs + set posn [lsearch $lc_external_tm_dirs $lc_modulefolder] + if {$posn > 0} { + #don't rely on lremove here. Not all runtimes have it and we don't want to load our forward-compatibility packages yet. + #(still need to support tcl 8.6 - and this script used in multiple kits) + set external_tm_dirs [lreplace $external_tm_dirs $posn $posn] + #don't even add it back in if it doesn't exist in filesystem + if {[file isdirectory $modulefolder]} { + set external_tm_dirs [linsert $external_tm_dirs 0 $modulefolder] + } + } + } else { + if {$lc_modulefolder ni $lc_external_tm_dirs && [file isdirectory $modulefolder]} { + set external_tm_dirs [linsert $external_tm_dirs 0 $modulefolder] ;#linsert seems faster than 'concat [list $modulefolder] $external_tm_dirs' - review + } + } + } + if {![llength $exe_module_folders]} { + puts stderr "Warning - no 'modules' or 'modules_tcl$tclmajorv' folders found relative to executable (or it's symlink if any)" + } + + + + #add lib and lib_tcl8 lib_tcl9 etc based on tclmajorv + #libs are appended to end - so add higher priority libraries last (opposite to modules) + #auto_path - add exe-relative after exe-relative path + if {"windows" eq $::tcl_platform(platform)} { + #case differences dont matter - but can stop us finding path in auto_path + foreach libsub [list lib_tcl$tclmajorv lib] { + if {[file tail $nameexe_dir] eq "bin"} { + set libfolder [file dirname $nameexe_dir]/$libsub + } else { + set libfolder $nameexe_dir/$libsub + } + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + if {[file tail $normexe_dir] eq "bin"} { + set libfolder [file dirname $normexe_dir]/$libsub + } else { + set libfolder $normexe_dir/$libsub + } + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + set libfolder [pwd]/$libsub + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + } + } else { + #on other platforms, case differences could represent different paths + foreach libsub [list lib_tcl$tclmajorv lib] { + if {[file tail $nameexe_dir] eq "bin"} { + set libfolder [file dirname $nameexe_dir]/$libsub + } else { + set libfolder $nameexe_dir/$libsub + } + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + if {[file tail $normexe_dir] eq "bin"} { + set libfolder [file dirname $normexe_dir]/$libsub + } else { + set libfolder $normexe_dir/$libsub + } + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + set libfolder [pwd]/$libsub + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + } + } + + + #2) support developer running from a folder containing *.tm files they want to make available + # could cause problems if user happens to be in a subdirectory of a tm folder structure as namespaced modules won't work if not at a tm path root. + #The current dir could also be a subdirectory of an existing tm_dir which would fail during tcl::tm::add - we will need to wrap all additions in catch + set currentdir_modules [glob -nocomplain -dir [pwd] -type f -tail *.tm] + #we assume [pwd] will always return an external (not kit) path at this point - REVIEW + if {[llength $currentdir_modules]} { + #now add current dir (if no conflict with above) + #catch {tcl::tm::add [pwd]} + set external_tm_dirs [linsert $external_tm_dirs 0 $currentdir_modules] + if {[file exists [pwd]/modules] || [file exists [pwd]/modules_tcl$tclmajorv]} { + puts stderr "WARNING: modules or modules_tcl$tclmajorv folders not added to tcl::tm::path due to modules found in current workding dir [pwd]" + } + } else { + #modules or modules_tclX subdir relative to cwd cannot be added if [pwd] has been added + set cwd_modules_folder [file normalize [file join [pwd] modules]] + if {[file isdirectory $cwd_modules_folder]} { + if {[string tolower $cwd_modules_folder] ni [string tolower $external_tm_dirs]} { + #prepend + set external_tm_dirs [linsert $external_tm_dirs 0 $cwd_modules_folder] + } + } + set cwd_modules_folder [file normalize [file join [pwd] modules_tcl$tclmajorv]] + if {[file isdirectory $cwd_modules_folder]} { + if {[string tolower $cwd_modules_folder] ni [string tolower $external_tm_dirs]} { + #prepend + set external_tm_dirs [linsert $external_tm_dirs 0 $cwd_modules_folder] + } + } + } + + #assert tcl::tm::list still empty here + #restore module paths + #add internals first as in 'dev' mode (dev as first argument on launch) we preference external modules + #note use of lreverse to maintain same order + foreach p [lreverse $internal_tm_dirs] { + if {$p ni [tcl::tm::list]} { + #the prior tm paths go to the head of the list. + #They are processed first.. but an item of same version later in the list will override one at the head. (depending on when list was scanned) REVIEW - true statement??? + #addition can fail if one path is a prefix of another + if {[catch {tcl::tm::add $p} errM]} { + puts stderr "Failed to add internal module dir '$p' to tcl::tm::list\n$errM" + } + } + } + foreach p [lreverse $external_tm_dirs] { + if {$p ni [tcl::tm::list]} { + if {[catch {tcl::tm::add $p} errM]} { + puts stderr "Failed to add external module dir '$p' to tcl::tm::list\n$errM" + } + } + } + + #------------------------------------------------------------------------------ + #REVIEW + #package require a bogus package to ensure Tcl scans the whole auto_path/tm list - otherwise a lower-versioned module earlier in the path may be loaded + #This seems to take not insignificant time depending on size of auto_path and tcl::tm::list (e.g 2023 i9 ssd 100ms) - but seems unavoidable for now + #catch {package require flobrudder666_nonexistant} + #------------------------------------------------------------------------------ + + + } else { + #Tcl_Init will most likely have set up some external paths + #As our app has been started without the 'dev' first arg - we will prune paths that are not zipfs or tclkit + set new_auto_path [list] + #review - case insensitive ok for windows - but could cause issues on other platforms? + foreach ap $::auto_path { + set aplower [string tolower $ap] + foreach okprefix $internal_paths { + if {[string match "[string tolower $okprefix]*" $aplower]} { + lappend new_auto_path $ap + break + } + } + } + set ::auto_path $new_auto_path + + set new_tm_list [list] + foreach tm [tcl::tm::list] { + set tmlower [string tolower $tm] + foreach okprefix $internal_paths { + if {[string match "[string tolower $okprefix]*" $tmlower]} { + lappend new_tm_list $tm + break + } + } + } + tcl::tm::remove {*}[tcl::tm::list] + tcl::tm::add {*}[lreverse $new_tm_list] + + + #If it looks like we are running the vfs/xxx.vfs/main.tcl from an external tclsh - try to use vfs folders to simulate kit state + #set script_relative_lib [file normalize [file join [file dirname [info script]] lib]] + set scriptdir [file dirname [info script]] + if {![string match //zipfs:/* $scriptdir] && ![info exists ::tcl::kitpath]} { + #presumably running the vfs/xxx.vfs/main.tcl script using a non-kit tclsh that doesn't have starkit lib available.. lets see if we can move forward anyway + set vfscontainer [file normalize [file dirname $scriptdir]] + set vfscommon [file join $vfscontainer _vfscommon] + set vfsdir [file normalize $scriptdir] + set projectroot [file dirname [file dirname $vfscontainer]] ;#back below src/vfs/xxx.vfs/main.tcl + puts stdout "no starkit. projectroot?: $projectroot" + puts stdout "info lib: [info library]" + + #add back the info lib reported by the executable.. as we can't access the one built into a kit + if {[file exists [info library]]} { + lappend ::auto_path [info library] + } + + set lib_types [list lib lib_tcl$tclmajorv] + foreach l $lib_types { + set lib [file join $vfsdir $l] + if {[file exists $lib] && [string tolower $lib] ni [string tolower $::auto_path]} { + lappend ::auto_path $lib + } + } + foreach l $lib_types { + set lib [file join $vfscommon $l] + if {[file exists $lib] && [string tolower $lib] ni [string tolower $::auto_path]} { + lappend ::auto_path $lib + } + } + set mod_types [list modules modules_tcl$tclmajorv] + foreach m $mod_types { + set modpath [file join $vfsdir $m] + if {[file exists $modpath] && [string tolower $modpath] ni [string tolower [tcl::tm::list]]} { + tcl::tm::add $modpath + } + } + foreach m $mod_types { + set modpath [file join $vfscommon $m] + if {[file exists $modpath] && [string tolower $modpath] ni [string tolower [tcl::tm::list]]} { + tcl::tm::add $modpath + } + } + } + + #force rescan + #catch {package require flobrudder666_nonexistant} + set arglist $args + } + + if {[llength $arglist]} { + #puts stdout "main.tcl launching app-shellspy" + package require app-shellspy + } else { + puts stdout "main.tcl launching app-punk. pkg names count:[llength [package names]]" + package require app-punk + #app-punk starts repl + #repl::start stdin -title "main.tcl" + } +}} {*}$::argv diff --git a/src/vfs/punk86bawt.vfs/main.tcl b/src/vfs/punk86bawt.vfs/main.tcl index 9e46f08b..5a844a0e 100644 --- a/src/vfs/punk86bawt.vfs/main.tcl +++ b/src/vfs/punk86bawt.vfs/main.tcl @@ -8,6 +8,15 @@ apply { args { + set has_zipfs [expr {[info commands tcl::zipfs::root] ne ""}] + if {$has_zipfs} { + set has_zipfs_attached [expr {[llength [tcl::zipfs::mount]]}] + } else { + set has_zipfs_attached 0 + } + set tclmajorv [lindex [split [info tclversion] .] 0] + + #here we make an attempt to avoid premature (costly) auto_path/tcl::tm::list scanning caused by our initial 'package require starkit'. #we will first look for a starkit.tcl in an expected location and try to load that, then fallback to package require. @@ -17,7 +26,7 @@ apply { args { set topdir [file dirname $normscript] set found_starkit_tcl 0 - set possible_lib_vfs_folders [glob -dir [file join $topdir lib] -type d vfs*] + set possible_lib_vfs_folders [glob -nocomplain -dir [file join $topdir lib] -type d vfs*] foreach test_folder $possible_lib_vfs_folders { #e.g /lib/vfs1.4.1 #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. @@ -33,9 +42,9 @@ apply { args { set found_starkit_tcl 1 } if {!$found_starkit_tcl} { - #our internal search for starkit failed. - #either we are in a pure zipfs system - or the starkit package is somewhere unexpected - #for pure zipfs - it's wasteful to perform exhaustive search for starkit + #our internal 'quick' search for starkit failed. + #either we are in a pure zipfs system - or the starkit package is somewhere more devious + #for pure zipfs - it's a little wasteful to perform exhaustive search for starkit #review - only keep searching if not 'dev' first arg? #Initially we've done no scans of auto_path/tcl::tm::list - but there will already be a core set of packages known by the kit @@ -44,7 +53,6 @@ apply { args { puts stderr [join [package names] \n] set original_packages [package names] - if {![catch {package require starkit}]} { #known side-effects of starkit::startup #sets the ::starkit::mode variable to the way in which it was launched. One of: {starpack starkit unwrapped tclhttpd plugin service sourced} @@ -67,20 +75,21 @@ apply { args { - # -- --- --- - #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it + #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it. review - for what versions of Tcl does this apply? + #known to occur in old 8.6.8 kits as well as 8.7 #review - do we want $normexe or [info nameofexecutable] for $thisexe here? Presumably [info nameofexecutable] (possible symlink) ok + #we want to be able to launch a process from the interactive shell using the same name this one was launched with. set thisexe [file tail [info nameofexecutable]] ;#e.g punk86.exe set thisexeroot [file rootname $thisexe] ;#e.g punk86 set ::auto_execs($thisexeroot) [info nameofexecutable] if {$thisexe ne $thisexeroot} { + #on windows make the .exe point there too set ::auto_execs($thisexe) [info nameofexecutable] } # -- --- --- - set tclmajorv [lindex [split [info tclversion] .] 0] - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { set kp $::tcl::kitpath set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -94,9 +103,10 @@ apply { args { } } } - if {[info commands tcl::zipfs::root] ne ""} { - #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. - set zipbase [file join [tcl::zipfs::root] app] ;#zipfs root has trailing slash - but file join does the right thing + if {$has_zipfs_attached} { + #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. (why?) + #default 'zipfs root' has trailing slash (//zipfs:/) - but file join does the right thing + set zipbase [file join [tcl::zipfs::root] app] if {"$zipbase" in [tcl::zipfs::mount]} { set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -111,12 +121,15 @@ apply { args { } } } + + + set internal_paths [list] - if {[info commands tcl::zipfs::root] ne ""} { + if {$has_zipfs} { set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path lappend internal_paths $ziproot } - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { lappend internal_paths $::tcl::kitpath } @@ -438,16 +451,28 @@ apply { args { } } } - #force rescan #catch {package require flobrudder666_nonexistant} set arglist $args } - if {[llength $arglist]} { + #assert arglist has had 'dev' first arg removed if it was present. + if {[llength $arglist] == 1 && [lindex $arglist 0] eq "tclsh"} { + #called as dev tclsh or tclsh + #we would like to drop through to standard tclsh repl without launching another process? how? + #tclMain.c doesn't seem to allow it. + + puts "tcl_interactive: $::tcl_interactive" + set ::tcl_interactive 1 + puts "stdin: [chan configure stdin]" + set ::tclsh(dorepl) 1 + + } elseif {[llength $arglist]} { + #pass through to shellspy commandline processor #puts stdout "main.tcl launching app-shellspy" package require app-shellspy } else { + #punk shell puts stdout "main.tcl launching app-punk. pkg names count:[llength [package names]]" package require app-punk #app-punk starts repl diff --git a/src/vfs/punk8win.vfs/main.tcl b/src/vfs/punk8win.vfs/main.tcl index 9e46f08b..5a844a0e 100644 --- a/src/vfs/punk8win.vfs/main.tcl +++ b/src/vfs/punk8win.vfs/main.tcl @@ -8,6 +8,15 @@ apply { args { + set has_zipfs [expr {[info commands tcl::zipfs::root] ne ""}] + if {$has_zipfs} { + set has_zipfs_attached [expr {[llength [tcl::zipfs::mount]]}] + } else { + set has_zipfs_attached 0 + } + set tclmajorv [lindex [split [info tclversion] .] 0] + + #here we make an attempt to avoid premature (costly) auto_path/tcl::tm::list scanning caused by our initial 'package require starkit'. #we will first look for a starkit.tcl in an expected location and try to load that, then fallback to package require. @@ -17,7 +26,7 @@ apply { args { set topdir [file dirname $normscript] set found_starkit_tcl 0 - set possible_lib_vfs_folders [glob -dir [file join $topdir lib] -type d vfs*] + set possible_lib_vfs_folders [glob -nocomplain -dir [file join $topdir lib] -type d vfs*] foreach test_folder $possible_lib_vfs_folders { #e.g /lib/vfs1.4.1 #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. @@ -33,9 +42,9 @@ apply { args { set found_starkit_tcl 1 } if {!$found_starkit_tcl} { - #our internal search for starkit failed. - #either we are in a pure zipfs system - or the starkit package is somewhere unexpected - #for pure zipfs - it's wasteful to perform exhaustive search for starkit + #our internal 'quick' search for starkit failed. + #either we are in a pure zipfs system - or the starkit package is somewhere more devious + #for pure zipfs - it's a little wasteful to perform exhaustive search for starkit #review - only keep searching if not 'dev' first arg? #Initially we've done no scans of auto_path/tcl::tm::list - but there will already be a core set of packages known by the kit @@ -44,7 +53,6 @@ apply { args { puts stderr [join [package names] \n] set original_packages [package names] - if {![catch {package require starkit}]} { #known side-effects of starkit::startup #sets the ::starkit::mode variable to the way in which it was launched. One of: {starpack starkit unwrapped tclhttpd plugin service sourced} @@ -67,20 +75,21 @@ apply { args { - # -- --- --- - #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it + #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it. review - for what versions of Tcl does this apply? + #known to occur in old 8.6.8 kits as well as 8.7 #review - do we want $normexe or [info nameofexecutable] for $thisexe here? Presumably [info nameofexecutable] (possible symlink) ok + #we want to be able to launch a process from the interactive shell using the same name this one was launched with. set thisexe [file tail [info nameofexecutable]] ;#e.g punk86.exe set thisexeroot [file rootname $thisexe] ;#e.g punk86 set ::auto_execs($thisexeroot) [info nameofexecutable] if {$thisexe ne $thisexeroot} { + #on windows make the .exe point there too set ::auto_execs($thisexe) [info nameofexecutable] } # -- --- --- - set tclmajorv [lindex [split [info tclversion] .] 0] - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { set kp $::tcl::kitpath set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -94,9 +103,10 @@ apply { args { } } } - if {[info commands tcl::zipfs::root] ne ""} { - #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. - set zipbase [file join [tcl::zipfs::root] app] ;#zipfs root has trailing slash - but file join does the right thing + if {$has_zipfs_attached} { + #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. (why?) + #default 'zipfs root' has trailing slash (//zipfs:/) - but file join does the right thing + set zipbase [file join [tcl::zipfs::root] app] if {"$zipbase" in [tcl::zipfs::mount]} { set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -111,12 +121,15 @@ apply { args { } } } + + + set internal_paths [list] - if {[info commands tcl::zipfs::root] ne ""} { + if {$has_zipfs} { set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path lappend internal_paths $ziproot } - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { lappend internal_paths $::tcl::kitpath } @@ -438,16 +451,28 @@ apply { args { } } } - #force rescan #catch {package require flobrudder666_nonexistant} set arglist $args } - if {[llength $arglist]} { + #assert arglist has had 'dev' first arg removed if it was present. + if {[llength $arglist] == 1 && [lindex $arglist 0] eq "tclsh"} { + #called as dev tclsh or tclsh + #we would like to drop through to standard tclsh repl without launching another process? how? + #tclMain.c doesn't seem to allow it. + + puts "tcl_interactive: $::tcl_interactive" + set ::tcl_interactive 1 + puts "stdin: [chan configure stdin]" + set ::tclsh(dorepl) 1 + + } elseif {[llength $arglist]} { + #pass through to shellspy commandline processor #puts stdout "main.tcl launching app-shellspy" package require app-shellspy } else { + #punk shell puts stdout "main.tcl launching app-punk. pkg names count:[llength [package names]]" package require app-punk #app-punk starts repl diff --git a/src/vfs/punk9linux.vfs/main.tcl b/src/vfs/punk9linux.vfs/main.tcl index 12c240f5..5a844a0e 100644 --- a/src/vfs/punk9linux.vfs/main.tcl +++ b/src/vfs/punk9linux.vfs/main.tcl @@ -1,22 +1,95 @@ -if {![catch {package require starkit}]} { - starkit::startup -} +#main.tcl - we expect to be in the context of a zipkit or tclkit vfs attached to a tcl executable. +#review - what happens if both are somehow attached and both vfs and zipfs are available? +# - if that's even possible - we have no control here over which main.tcl was selected as we're already here +#The logic below will add appropriate package paths from starkit and zipfs vfs paths +# - and restrict package paths to those coming from a vfs (if not launched with 'dev' first arg which allows external paths to remain) + apply { args { + set has_zipfs [expr {[info commands tcl::zipfs::root] ne ""}] + if {$has_zipfs} { + set has_zipfs_attached [expr {[llength [tcl::zipfs::mount]]}] + } else { + set has_zipfs_attached 0 + } + set tclmajorv [lindex [split [info tclversion] .] 0] + + + #here we make an attempt to avoid premature (costly) auto_path/tcl::tm::list scanning caused by our initial 'package require starkit'. + #we will first look for a starkit.tcl in an expected location and try to load that, then fallback to package require. + + #standard way to avoid symlinking issues - review! + set normscript [file dirname [file normalize [file join [info script] __dummy__]]] + set normexe [file dirname [file normalize [file join [info nameofexecutable] __dummy__]]] + + set topdir [file dirname $normscript] + set found_starkit_tcl 0 + set possible_lib_vfs_folders [glob -nocomplain -dir [file join $topdir lib] -type d vfs*] + foreach test_folder $possible_lib_vfs_folders { + #e.g /lib/vfs1.4.1 + #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. + #order of folder processing shouldn't matter (rely on order returned by 'package versions' - review) + if {[file exists $test_folder/starkit.tcl] && [file exists $test_folder/pkgIndex.tcl]} { + set dir $test_folder + source $test_folder/pkgIndex.tcl + } + } + if {[set starkitv [lindex [package versions starkit] end]] ne ""} { + #run the ifneeded script for the latest found (assuming package versions ordering is correct) + eval [package ifneeded starkit $starkitv] + set found_starkit_tcl 1 + } + if {!$found_starkit_tcl} { + #our internal 'quick' search for starkit failed. + #either we are in a pure zipfs system - or the starkit package is somewhere more devious + #for pure zipfs - it's a little wasteful to perform exhaustive search for starkit + #review - only keep searching if not 'dev' first arg? + + #Initially we've done no scans of auto_path/tcl::tm::list - but there will already be a core set of packages known by the kit + #retain it so we can 'forget' the difference after our first 'package require' forces a full scan which includes some paths we may not wish to include or at least include with different preferences + puts "main.tcl 1)--> package name count: [llength [package names]]" + puts stderr [join [package names] \n] + set original_packages [package names] + + if {![catch {package require starkit}]} { + #known side-effects of starkit::startup + #sets the ::starkit::mode variable to the way in which it was launched. One of: {starpack starkit unwrapped tclhttpd plugin service sourced} + #set the ::starkit::topdir variable + #if mode not starpack, then: + # - adds $::starkit::topdir/lib to the auto_path if not already present + # + #In this context (vfs attached to tcl kit executable - we expect the launch mode to be 'starkit' + set starkit_startmode [starkit::startup] + puts stderr "STARKIT MODE: $starkit_startmode" + } + puts "main.tcl 2)--> package name count: [llength [package names]]" + foreach pkg [package names] { + if {$pkg ni $original_packages} { + package forget $pkg + } + } + puts "main.tcl 3)--> package name count: [llength [package names]]" + } + + + # -- --- --- - #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it - set thisexe [file tail [info nameofexecutable]] - set thisexeroot [file rootname $thisexe] + #when run as a tclkit - the exe is mounted as a dir and Tcl's auto_execok doesn't find it. review - for what versions of Tcl does this apply? + #known to occur in old 8.6.8 kits as well as 8.7 + #review - do we want $normexe or [info nameofexecutable] for $thisexe here? Presumably [info nameofexecutable] (possible symlink) ok + #we want to be able to launch a process from the interactive shell using the same name this one was launched with. + set thisexe [file tail [info nameofexecutable]] ;#e.g punk86.exe + set thisexeroot [file rootname $thisexe] ;#e.g punk86 set ::auto_execs($thisexeroot) [info nameofexecutable] if {$thisexe ne $thisexeroot} { + #on windows make the .exe point there too set ::auto_execs($thisexe) [info nameofexecutable] } # -- --- --- - set tclmajorv [lindex [split [info tclversion] .] 0] - if {[info exists ::tcl::kitpath]} { + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { set kp $::tcl::kitpath set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -30,9 +103,10 @@ apply { args { } } } - if {[info commands tcl::zipfs::root] ne ""} { - #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. - set zipbase [file join [tcl::zipfs::root] app] ;#zipfs root has trailing slash - but file join does the right thing + if {$has_zipfs_attached} { + #review build option may be different - tclZipFs.c ZIPFS_APP_MOUNT defaults to ZIPFS_VOLUME/app - but it could be something else. (why?) + #default 'zipfs root' has trailing slash (//zipfs:/) - but file join does the right thing + set zipbase [file join [tcl::zipfs::root] app] if {"$zipbase" in [tcl::zipfs::mount]} { set existing_module_paths [string tolower [tcl::tm::list]] foreach p [list modules modules_tcl$tclmajorv] { @@ -48,6 +122,17 @@ apply { args { } } + + + set internal_paths [list] + if {$has_zipfs} { + set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path + lappend internal_paths $ziproot + } + if {[info exists ::tcl::kitpath] && $::tcl::kitpath ne ""} { + lappend internal_paths $::tcl::kitpath + } + if {[lindex $args 0] in {dev devquiet}} { set arglist [lassign $args devmode] set ::argv $arglist @@ -78,40 +163,99 @@ apply { args { #If the current directory contains .tm files when the punk repl starts - then it will attempt to preference them # - but first add our other known relative modules paths - as it won't make sense to use current directory as a modulepath if it's an ancestor of one of these.. + #original tm list at this point consists of whatever the kit decided + some prepended internal kit paths that punk decided on. + #we want to bring the existing external paths to the front (probably from the kit looking at various env TCL* values) + #we want to maintain the order of the internal paths. + #we then want to add our external dev paths of the total list + + #assert [llength [package names]] should be small at this point ~ <10 ? + set original_tm_list [tcl::tm::list] tcl::tm::remove {*}$original_tm_list + # -- --- --- --- --- --- --- --- + #split existing paths into internal & external + set internal_tm_dirs [list] ;# + set external_tm_dirs [list] + set lcase_internal_paths [string tolower $internal_paths] + foreach tm $original_tm_list { + set tmlower [string tolower $tm] + set is_internal 0 + foreach okprefix $lcase_internal_paths { + if {[string match "$okprefix*" $tmlower]} { + lappend internal_tm_dirs $tm + set is_internal 1 + break + } + } + if {!$is_internal} { + lappend external_tm_dirs $tm + } + } + # -- --- --- --- --- --- --- --- + set original_external_tm_dirs $external_tm_dirs ;#we check some of our additions and bring to front - so we refer to external list as provided by kit + #assert internal_tm_dirs and external_tm_dirs have their case preserved.. + set module_folders [list] + #review - the below statement doesn't seem to be true. #tm list first added end up later in the list - and then override earlier ones if version the same - so add pwd-relative 1st to give higher priority #(only if Tcl has scanned all paths - see below bogus package load) #1 - if {[file isdirectory [pwd]/modules]} { - catch {tcl::tm::add [pwd]/modules} - } - #2) - if {[string match "*.vfs/*" [file normalize [info script]]]} { - #src/xxx.vfs/lib/app-punk/repl.tcl - # assume if calling directly into .vfs that the user would prefer to use src/modules - so go up 4 levels - #set srcmodulefolder [file dirname [file dirname [file dirname [file dirname [file normalize [info script]]]]]]/modules - # - the src/modules folder doesn't contain important modules such as vendormodules - so the above probably isn't that useful - set srcfolder [file dirname [file dirname [file dirname [file dirname [file normalize [info script]]]]]] - lappend module_folders [file join [file dirname $srcfolder] modules] ;#modules folder at same level as src folder - lappend module_folders [file join [file dirname $srcfolder] modules_tcl$tclmajorv] + # .../bin/punkXX.exe look for ../modules (i.e modules folder at same level as bin folder) + #using normexe under assumption [info name] might be symlink - and more likely to be where the modules are located. + #we will try both relative to symlink and relative to underlying exe - with those at symlink location earlier in the list + #review - a user may have other expectations. + + #case differences could represent different paths on unix-like platforms. + #It's perhaps a little unwise to configure matching paths with only case differences for a cross-platform tool .. but we should support it for those who use it and have no interest in windows - todo! review + set normexe_dir [file dirname $normexe] + if {[file tail $normexe_dir] eq "bin"} { + #underlying exe in a bin dir - backtrack 1 + lappend exe_module_folders [file dirname $normexe_dir]/modules + lappend exe_module_folders [file dirname $normexe_dir]/modules_tcl$tclmajorv + } else { + lappend exe_module_folders $normexe_dir/modules + lappend exe_module_folders $normexe_dir/modules_tcl$tclmajorv + } + set nameexe_dir [file dirname [info nameofexecutable]] + #possible symlink (may resolve to same path as above - we check below to not add in twice) + if {[file tail $nameexe_dir] eq "bin"} { + lappend exe_module_folders [file dirname $nameexe_dir]/modules + lappend exe_module_folders [file dirname $nameexe_dir]/modules_tcl$tclmajorv } else { - # .../bin/punkXX.exe look for ../modules (i.e modules folder at same level as bin folder) - lappend module_folders [file dirname [file dirname [info nameofexecutable]]]/modules - lappend module_folders [file dirname [file dirname [info nameofexecutable]]]/modules_tcl$tclmajorv + lappend exe_module_folders $nameexe_dir/modules + lappend exe_module_folders $nameexe_dir/modules_tcl$tclmajorv } - foreach modulefolder $module_folders { - if {[file isdirectory $modulefolder]} { - tcl::tm::add $modulefolder + + foreach modulefolder $exe_module_folders { + set lc_external_tm_dirs [string tolower $external_tm_dirs] + set lc_modulefolder [string tolower $modulefolder] + if {$lc_modulefolder in [string tolower $original_external_tm_dirs]} { + #perhaps we have an env var set pointing to one of our dev foldersl. We don't want to rely on how the kit ordered it. + #bring to front if not already there. + #assert it must be present in $lc_external_tm_dirs if it's in $original_external_tm_dirs + set posn [lsearch $lc_external_tm_dirs $lc_modulefolder] + if {$posn > 0} { + #don't rely on lremove here. Not all runtimes have it and we don't want to load our forward-compatibility packages yet. + #(still need to support tcl 8.6 - and this script used in multiple kits) + set external_tm_dirs [lreplace $external_tm_dirs $posn $posn] + #don't even add it back in if it doesn't exist in filesystem + if {[file isdirectory $modulefolder]} { + set external_tm_dirs [linsert $external_tm_dirs 0 $modulefolder] + } + } } else { - puts stderr "Warning unable to find module folder at: $modulefolder" + if {$lc_modulefolder ni $lc_external_tm_dirs && [file isdirectory $modulefolder]} { + set external_tm_dirs [linsert $external_tm_dirs 0 $modulefolder] ;#linsert seems faster than 'concat [list $modulefolder] $external_tm_dirs' - review + } } } + if {![llength $exe_module_folders]} { + puts stderr "Warning - no 'modules' or 'modules_tcl$tclmajorv' folders found relative to executable (or it's symlink if any)" + } @@ -121,72 +265,121 @@ apply { args { if {"windows" eq $::tcl_platform(platform)} { #case differences dont matter - but can stop us finding path in auto_path foreach libsub [list lib_tcl$tclmajorv lib] { - set libfolder [file dirname [file dirname [info nameofexecutable]]]/$libsub - if {[string tolower $libfolder] ni [string tolower $::auto_path]} { - if {[file isdirectory $libfolder]} { - lappend ::auto_path $libfolder - } + if {[file tail $nameexe_dir] eq "bin"} { + set libfolder [file dirname $nameexe_dir]/$libsub + } else { + set libfolder $nameexe_dir/$libsub + } + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + if {[file tail $normexe_dir] eq "bin"} { + set libfolder [file dirname $normexe_dir]/$libsub + } else { + set libfolder $normexe_dir/$libsub + } + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder } + # ------------- set libfolder [pwd]/$libsub - if {[string tolower $libfolder] ni [string tolower $::auto_path]} { - if {[file isdirectory $libfolder]} { - lappend ::auto_path $libfolder - } + if {[string tolower $libfolder] ni [string tolower $::auto_path] && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder } } } else { #on other platforms, case differences could represent different paths foreach libsub [list lib_tcl$tclmajorv lib] { - set libfolder [file dirname [file dirname [info nameofexecutable]]]/$libsub - if {$libfolder ni $::auto_path} { - if {[file isdirectory $libfolder]} { - lappend ::auto_path $libfolder - } + if {[file tail $nameexe_dir] eq "bin"} { + set libfolder [file dirname $nameexe_dir]/$libsub + } else { + set libfolder $nameexe_dir/$libsub + } + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- + if {[file tail $normexe_dir] eq "bin"} { + set libfolder [file dirname $normexe_dir]/$libsub + } else { + set libfolder $normexe_dir/$libsub } + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder + } + # ------------- set libfolder [pwd]/$libsub - if {$libfolder ni $::auto_path} { - if {[file isdirectory $libfolder]} { - lappend ::auto_path $libfolder - } + if {$libfolder ni $::auto_path && [file isdirectory $libfolder]} { + lappend ::auto_path $libfolder } } } - #2) + #2) support developer running from a folder containing *.tm files they want to make available + # could cause problems if user happens to be in a subdirectory of a tm folder structure as namespaced modules won't work if not at a tm path root. + #The current dir could also be a subdirectory of an existing tm_dir which would fail during tcl::tm::add - we will need to wrap all additions in catch set currentdir_modules [glob -nocomplain -dir [pwd] -type f -tail *.tm] + #we assume [pwd] will always return an external (not kit) path at this point - REVIEW if {[llength $currentdir_modules]} { #now add current dir (if no conflict with above) - catch {tcl::tm::add [pwd]} + #catch {tcl::tm::add [pwd]} + set external_tm_dirs [linsert $external_tm_dirs 0 $currentdir_modules] + if {[file exists [pwd]/modules] || [file exists [pwd]/modules_tcl$tclmajorv]} { + puts stderr "WARNING: modules or modules_tcl$tclmajorv folders not added to tcl::tm::path due to modules found in current workding dir [pwd]" + } + } else { + #modules or modules_tclX subdir relative to cwd cannot be added if [pwd] has been added + set cwd_modules_folder [file normalize [file join [pwd] modules]] + if {[file isdirectory $cwd_modules_folder]} { + if {[string tolower $cwd_modules_folder] ni [string tolower $external_tm_dirs]} { + #prepend + set external_tm_dirs [linsert $external_tm_dirs 0 $cwd_modules_folder] + } + } + set cwd_modules_folder [file normalize [file join [pwd] modules_tcl$tclmajorv]] + if {[file isdirectory $cwd_modules_folder]} { + if {[string tolower $cwd_modules_folder] ni [string tolower $external_tm_dirs]} { + #prepend + set external_tm_dirs [linsert $external_tm_dirs 0 $cwd_modules_folder] + } + } } + #assert tcl::tm::list still empty here #restore module paths - set tm_list_now [tcl::tm::list] - foreach p [lreverse $original_tm_list] { - if {$p ni $tm_list_now} { + #add internals first as in 'dev' mode (dev as first argument on launch) we preference external modules + #note use of lreverse to maintain same order + foreach p [lreverse $internal_tm_dirs] { + if {$p ni [tcl::tm::list]} { #the prior tm paths go to the head of the list. - #They are processed first.. but an item of same version later in the list will override one at the head. (depending on when list was scanned) - tcl::tm::add $p + #They are processed first.. but an item of same version later in the list will override one at the head. (depending on when list was scanned) REVIEW - true statement??? + #addition can fail if one path is a prefix of another + if {[catch {tcl::tm::add $p} errM]} { + puts stderr "Failed to add internal module dir '$p' to tcl::tm::list\n$errM" + } } } + foreach p [lreverse $external_tm_dirs] { + if {$p ni [tcl::tm::list]} { + if {[catch {tcl::tm::add $p} errM]} { + puts stderr "Failed to add external module dir '$p' to tcl::tm::list\n$errM" + } + } + } + #------------------------------------------------------------------------------ + #REVIEW #package require a bogus package to ensure Tcl scans the whole auto_path/tm list - otherwise a lower-versioned module earlier in the path may be loaded #This seems to take not insignificant time depending on size of auto_path and tcl::tm::list (e.g 2023 i9 ssd 100ms) - but seems unavoidable for now - catch {package require flobrudder666_nonexistant} + #catch {package require flobrudder666_nonexistant} #------------------------------------------------------------------------------ } else { #Tcl_Init will most likely have set up some external paths #As our app has been started without the 'dev' first arg - we will prune paths that are not zipfs or tclkit - set internal_paths [list] - if {[info commands tcl::zipfs::root] ne ""} { - set ziproot [tcl::zipfs::root] ;#root is enough to determine internal zipkit path - lappend internal_paths $ziproot - } - if {[info exists ::tcl::kitpath]} { - lappend internal_paths $::tcl::kitpath - } set new_auto_path [list] #review - case insensitive ok for windows - but could cause issues on other platforms? foreach ap $::auto_path { @@ -258,21 +451,31 @@ apply { args { } } } - #force rescan - catch {package require flobrudder666_nonexistant} + #catch {package require flobrudder666_nonexistant} set arglist $args } - if {[llength $arglist]} { - puts stdout "main.tcl launching app-shellspy" + #assert arglist has had 'dev' first arg removed if it was present. + if {[llength $arglist] == 1 && [lindex $arglist 0] eq "tclsh"} { + #called as dev tclsh or tclsh + #we would like to drop through to standard tclsh repl without launching another process? how? + #tclMain.c doesn't seem to allow it. + + puts "tcl_interactive: $::tcl_interactive" + set ::tcl_interactive 1 + puts "stdin: [chan configure stdin]" + set ::tclsh(dorepl) 1 + + } elseif {[llength $arglist]} { + #pass through to shellspy commandline processor + #puts stdout "main.tcl launching app-shellspy" package require app-shellspy } else { - puts stdout "main.tcl launching app-punk" + #punk shell + puts stdout "main.tcl launching app-punk. pkg names count:[llength [package names]]" package require app-punk #app-punk starts repl #repl::start stdin -title "main.tcl" } - puts stderr "main.tcl done" - flush stderr }} {*}$::argv diff --git a/src/vfs/punk9win.vfs/main.tcl b/src/vfs/punk9win.vfs/main.tcl index 21813708..16c1924b 100644 --- a/src/vfs/punk9win.vfs/main.tcl +++ b/src/vfs/punk9win.vfs/main.tcl @@ -12,7 +12,7 @@ apply { args { if {$has_zipfs} { set has_zipfs_attached [expr {[llength [tcl::zipfs::mount]]}] } else { - set has_zipfs_attached + set has_zipfs_attached 0 } set tclmajorv [lindex [split [info tclversion] .] 0] @@ -26,7 +26,7 @@ apply { args { set topdir [file dirname $normscript] set found_starkit_tcl 0 - set possible_lib_vfs_folders [glob -dir [file join $topdir lib] -type d vfs*] + set possible_lib_vfs_folders [glob -nocomplain -dir [file join $topdir lib] -type d vfs*] foreach test_folder $possible_lib_vfs_folders { #e.g /lib/vfs1.4.1 #we don't expect multiple vfs* folders - but we will process any found and load the pkgIndex.tcl from these folders. @@ -461,10 +461,27 @@ apply { args { #called as dev tclsh or tclsh #we would like to drop through to standard tclsh repl without launching another process? how? #tclMain.c doesn't seem to allow it. - - puts "tcl_interactive: $::tcl_interactive" + if {![info exists ::env(TCLSH_PIPEREPL)]} { + set is_tclsh_piperepl_env_true 0 + } else { + if {[string is boolean -strict $::env(TCLSH_PIPEREPL)]} { + set is_tclsh_piperepl_env_true $::env(TCLSH_PIPEREPL) + } else { + set is_tclsh_piperepl_env_true 0 + } + } + if {!$is_tclsh_piperepl_env_true} { + puts stderr "tcl_interactive: $::tcl_interactive" + puts stderr "stdin: [chan configure stdin]" + puts stderr "Environment variable TCLSH_PIPEREPL is not set or is false or is not a boolean" + } else { + #according to env TCLSH_PIPEREPL and our commandline argument - tclsh repl is desired + #check if tclsh/punk has had the piperepl patch applied - in which case tclsh(istty) should exist + if {![info exists ::tclsh(istty)]} { + puts stderr "error: the runtime doesn't appear to have been compiled with the piperepl patch" + } + } set ::tcl_interactive 1 - puts "stdin: [chan configure stdin]" set ::tclsh(dorepl) 1 } elseif {[llength $arglist]} {