From a04a62d2e97b2f5e69cd93fbe9038c5c2f5cd2a8 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Fri, 28 Mar 2025 12:09:26 +1100 Subject: [PATCH] tomlish and punk::netbox improvements --- src/bootsupport/modules/punk/args-0.1.0.tm | 326 +- src/bootsupport/modules/punk/path-0.1.0.tm | 153 +- src/bootsupport/modules/textblock-0.1.3.tm | 31 +- src/bootsupport/modules/tomlish-1.1.2.tm | 5566 +++++++++++++++++ src/modules/punk/args-999999.0a1.0.tm | 326 +- src/modules/punk/imap4-999999.0a1.0.tm | 1298 +++- src/modules/punk/netbox-999999.0a1.0.tm | 1228 ++++ src/modules/punk/netbox-buildversion.txt | 3 + src/modules/punk/path-999999.0a1.0.tm | 153 +- src/modules/textblock-999999.0a1.0.tm | 31 +- .../bootsupport/modules/punk/args-0.1.0.tm | 326 +- .../bootsupport/modules/punk/path-0.1.0.tm | 153 +- .../bootsupport/modules/textblock-0.1.3.tm | 31 +- .../src/bootsupport/modules/tomlish-1.1.2.tm | 5566 +++++++++++++++++ .../bootsupport/modules/punk/args-0.1.0.tm | 326 +- .../bootsupport/modules/punk/path-0.1.0.tm | 153 +- .../bootsupport/modules/textblock-0.1.3.tm | 31 +- .../src/bootsupport/modules/tomlish-1.1.2.tm | 5566 +++++++++++++++++ src/vendormodules/tomlish-1.1.2.tm | 5566 +++++++++++++++++ 19 files changed, 25823 insertions(+), 1010 deletions(-) create mode 100644 src/bootsupport/modules/tomlish-1.1.2.tm create mode 100644 src/modules/punk/netbox-999999.0a1.0.tm create mode 100644 src/modules/punk/netbox-buildversion.txt create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.2.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.2.tm create mode 100644 src/vendormodules/tomlish-1.1.2.tm diff --git a/src/bootsupport/modules/punk/args-0.1.0.tm b/src/bootsupport/modules/punk/args-0.1.0.tm index 25b01d81..91f29aa5 100644 --- a/src/bootsupport/modules/punk/args-0.1.0.tm +++ b/src/bootsupport/modules/punk/args-0.1.0.tm @@ -331,26 +331,26 @@ tcl::namespace::eval punk::args { parsing and help display. directives include: %B%@id%N% ?opt val...? - options: -id + spec-options: -id %B%@cmd%N% ?opt val...? - options: -name -help + spec-options: -name -help %B%@leaders%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for leading args that come before switches/opts) %B%@opts%N% ?opt val...? - options: -any + spec-options: -any %B%@values%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for trailing args that come after switches/opts) %B%@argdisplay%N% ?opt val...? - options: -header (text for header row of table) - -body (text to replace autogenerated arg info) + spec-options: -header (text for header row of table) + -body (text to replace autogenerated arg info) %B%@doc%N% ?opt val...? - options: -name -url + spec-options: -name -url %B%@seealso%N% ?opt val...? - options: -name -url (for footer - unimplemented) + spec-options: -name -url (for footer - unimplemented) - Some other options normally present on custom arguments are available + Some other spec-options normally present on custom arguments are available to use with the @leaders @opts @values directives to set defaults for subsequent lines that represent your custom arguments. These directives should occur in exactly this order - but can be @@ -361,7 +361,12 @@ tcl::namespace::eval punk::args { or using the i .. function - an @id with -id is needed. All directives can be omitted, in which case every line represents - a custom value or option. + a custom leader, value or option. + All will be leaders by default if no options defined. + If options are defined (by naming with leading dash, or explicitly + specifying @opts) then the definitions prior to the options will be + categorised as leaders, and those following the options will be + categorised as values. Custom arguments are defined by using any word at the start of a line that doesn't begin with @ or - @@ -369,7 +374,7 @@ tcl::namespace::eval punk::args { that @@somearg becomes an argument named @somearg) custom leading args, switches/options (names starting with -) - and trailing values also take options: + and trailing values also take spec-options: -type defaults to string. If no other restrictions @@ -397,12 +402,22 @@ tcl::namespace::eval punk::args { -optional (defaults to true for flags/switches false otherwise) + For non flag/switch arguments - all arguments with + -optional true must sit consecutively within their group. + ie all optional leader arguments must be together, and all + optional value arguments must be together. Furthermore, + specifying both optional leaders and optional values will + often lead to ambiguous parsing results. Currently, all + optional non-flg/switch arguments should be either at the + trailing end of leaders or the trailing end of values. + Further unambiguous arrangements of optional args may be + made in future - but are currently considered 'unsupported' -default -multiple (for leaders & values defines whether - subsequent received values are stored agains the same - argument name - only applies to final leader or value) + subsequent received values are stored against the same + argument name - only applies to final leader OR final value) (for options/flags this allows the opt-val pair or solo - flag to appear multiple times - no necessarily contiguously) + flag to appear multiple times - not necessarily contiguously) -choices {} A list of allowable values for an argument. The -default value doesn't have to be in the list. @@ -438,7 +453,7 @@ tcl::namespace::eval punk::args { Max of -1 represents no upper limit. If allows more than one choice the value is a list consisting of items in the choices made available through - entries in -choices/-choicegrups. + entries in -choices/-choicegroups. -minsize (type dependant) -maxsize (type dependant) -range (type dependant) @@ -1667,6 +1682,7 @@ tcl::namespace::eval punk::args { " @leaders -min 0 -max 0 @opts + -return -default text -choices {text dict} -form -default 0 -help\ "Ordinal index or name of command form" @@ -1694,7 +1710,7 @@ tcl::namespace::eval punk::args { (directives are lines beginning with @ e.g @id, @cmd etc) - if -type is @leaders,@opts or @values matches from that type + if -type is leaders,opts or values matches from that type will be returned. if -type is another directive such as @id, @doc etc the @@ -1706,8 +1722,10 @@ tcl::namespace::eval punk::args { proc resolved_def {args} { + #not eating our own dogfood here as far as argument parsing. -id ::punk::args::resolved_def is for documentation/errors only. set opts [dict create\ - -types {}\ + -return text\ + -types {}\ -form 0\ -antiglobs {}\ -override {}\ @@ -1743,7 +1761,7 @@ tcl::namespace::eval punk::args { } dict for {k v} $opts { switch -- $k { - -form - -types - -antiglobs - -override {} + -return - -form - -types - -antiglobs - -override {} default { punk::args::parse $args withid ::punk::args::resolved_def return @@ -1764,163 +1782,185 @@ tcl::namespace::eval punk::args { variable id_cache_rawdef set realid [real_id $id] + if {$realid eq ""} { + return + } - if {$realid ne ""} { - set deflist [tcl::dict::get $id_cache_rawdef $realid] - set result "" - set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] + set deflist [tcl::dict::get $id_cache_rawdef $realid] + set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] - set opt_form [dict get $opts -form] - if {[string is integer -strict $opt_form]} { - set formname [lindex [dict get $specdict form_names] $opt_form] - } else { - set formname $opt_form - } - set opt_override [dict get $opts -override] - - #set arg_info [dict get $specdict ARG_INFO] - set arg_info [dict get $specdict FORMS $formname ARG_INFO] - set argtypes [dict create leaders leader opts option values value] - - set opt_antiglobs [dict get $opts -antiglobs] - set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] - set suppressed_directives [list] - set suppressed_args [list] - foreach ag $opt_antiglobs { - foreach d $directives { - if {[string match $ag $d]} { - lappend suppressed_directives $d - } + set opt_form [dict get $opts -form] + if {[string is integer -strict $opt_form]} { + set formname [lindex [dict get $specdict form_names] $opt_form] + } else { + set formname $opt_form + } + set opt_override [dict get $opts -override] + set opt_return [dict get $opts -return] + + #set arg_info [dict get $specdict ARG_INFO] + set arg_info [dict get $specdict FORMS $formname ARG_INFO] + set argtypes [dict create leaders leader opts option values value] + + set opt_antiglobs [dict get $opts -antiglobs] + set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] + set suppressed_directives [list] + set suppressed_args [list] + foreach ag $opt_antiglobs { + foreach d $directives { + if {[string match $ag $d]} { + lappend suppressed_directives $d } - foreach argname [dict keys $arg_info] { - if {[string match $ag $argname]} { - lappend suppressed_args $argname - } + } + foreach argname [dict keys $arg_info] { + if {[string match $ag $argname]} { + lappend suppressed_args $argname } } - set suppressed_directives [lsort -unique $suppressed_directives] - set suppressed_args [lsort -unique $suppressed_args] + } + set suppressed_directives [lsort -unique $suppressed_directives] + set suppressed_args [lsort -unique $suppressed_args] - set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] + set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] - set globbed [list] - foreach pat $patterns { - set matches [dict keys $arg_info $pat] - lappend globbed {*}$matches - } - set globbed [lsort -unique $globbed] - set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - - foreach type $typelist { - switch -exact -- $type { - * { - if {"@id" in $included_directives} { - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" - } else { - append result \n "@id -id [dict get $specdict id]" - } - } - foreach directive {@package @cmd @doc @seealso @argdisplay} { - set dshort [string range $directive 1 end] - if {"$directive" in $included_directives} { - if {[dict exists $opt_override $directive]} { - append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict ${dshort}_info]" - } - } - } - #output ordered by leader, option, value - foreach pseudodirective {leaders opts values} tp {leader option value} { - set directive "@$pseudodirective" - switch -- $directive { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - - if {"$directive" in $included_directives} { - if {[dict exists $opt_override "$directive"]} { - append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict $defaults_key]" - } - } + set globbed [list] + foreach pat $patterns { + set matches [dict keys $arg_info $pat] + lappend globbed {*}$matches + } + set globbed [lsort -unique $globbed] + set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - if {$pseudodirective in $included_directives} { - foreach m $included_args { - set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq $tp} { - set argspec [dict remove $argspec -ARGTYPE] - if {[dict exists $opt_override $m]} { - append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" - } else { - append result \n "$m $argspec" - } - } - } - } + set result "" + set resultdict [dict create] + foreach type $typelist { + switch -exact -- $type { + * { + if {"@id" in $included_directives} { + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] } - } - @id { - if {"@id" in $included_directives} { - #only a single id record can exist - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + foreach directive {@package @cmd @doc @seealso @argdisplay} { + set dshort [string range $directive 1 end] + if {"$directive" in $included_directives} { + if {[dict exists $opt_override $directive]} { + append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]] } else { - append result \n "@id -id [dict get $specdict id]" + append result \n "$directive [dict get $specdict ${dshort}_info]" + dict set resultdict $directive [dict get $specdict ${dshort}_info] } } } - @package - @cmd - @doc - @seealso - @argdisplay { - if {"$type" in $included_directives} { - set tp [string range $type 1 end] ;# @package -> package - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" - } else { - append result \n "$type [dict get $specdict ${tp}_info]" - } + #output ordered by leader, option, value + foreach pseudodirective {leaders opts values} tp {leader option value} { + set directive "@$pseudodirective" + switch -- $directive { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} } - } - @leaders - @opts - @values { - #these are the active defaults for further arguments - if {"$type" in $included_directives} { - switch -- $type { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + + if {"$directive" in $included_directives} { + if {[dict exists $opt_override "$directive"]} { + append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]] } else { - append result \n "$type [dict get $specdict leaderspec_defaults]" + append result \n "$directive [dict get $specdict $defaults_key]" + dict set resultdict $directive [dict get $specdict $defaults_key] } } - } - leaders - opts - values { - #pseudo-directives - if {$type in $included_directives} { + + if {$pseudodirective in $included_directives} { foreach m $included_args { set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + if {[dict get $argspec -ARGTYPE] eq $tp} { set argspec [dict remove $argspec -ARGTYPE] if {[dict exists $opt_override $m]} { append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] } else { append result \n "$m $argspec" + dict set resultdict $m $argspec } } } } } - default { + + } + @id { + if {"@id" in $included_directives} { + #only a single id record can exist + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] + } } } + @package - @cmd - @doc - @seealso - @argdisplay { + if {"$type" in $included_directives} { + set tp [string range $type 1 end] ;# @package -> package + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict ${tp}_info]" + dict set resultdict $type [dict get $specdict ${tp}_info] + } + } + } + @leaders - @opts - @values { + #these are the active defaults for further arguments + if {"$type" in $included_directives} { + switch -- $type { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} + } + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict leaderspec_defaults]" + dict set resultdict $type [dict get $specdict leaderspec_defaults] + } + } + } + leaders - opts - values { + #pseudo-directives + if {$type in $included_directives} { + foreach m $included_args { + set argspec [dict get $arg_info $m] + if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + set argspec [dict remove $argspec -ARGTYPE] + if {[dict exists $opt_override $m]} { + append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] + } else { + append result \n "$m $argspec" + dict set resultdict $m $argspec + } + } + } + } + } + default { + } + } + if {$opt_return eq "text"} { + return $result + } else { + return $resultdict } - - return $result } } diff --git a/src/bootsupport/modules/punk/path-0.1.0.tm b/src/bootsupport/modules/punk/path-0.1.0.tm index 51e74719..f0a4a444 100644 --- a/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/bootsupport/modules/punk/path-0.1.0.tm @@ -20,7 +20,7 @@ #[manpage_begin punkshell_module_punk::path 0 0.1.0] #[copyright "2023"] #[titledesc {Filesystem path utilities}] [comment {-- Name section and table of contents description --}] -#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] +#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] #[require punk::path] #[description] #[keywords module path filesystem] @@ -104,21 +104,21 @@ namespace eval punk::path { #*** !doctools #[subsection {Namespace punk::path}] - #[para] Core API functions for punk::path + #[para] Core API functions for punk::path #[list_begin definitions] - # -- --- + # -- --- #punk::path::normjoin # - simplify . and .. segments as far as possible whilst respecting specific types of root. - # -- --- + # -- --- #a form of file normalize that supports //xxx to be treated as server path names #(ie regardless of unices ignoring (generally) leading double slashes, and regardless of windows volumerelative path syntax) - #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) - # -- --- + #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) + # -- --- #This is intended to be purely a string analysis - without reference to filesystem volumes or vfs or zipfs mountpoints etc # #TODO - option for caller to provide a -base below which we can't backtrack. - #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share + #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share #Our default is to allow trackback to: # :// # :/ @@ -128,7 +128,7 @@ namespace eval punk::path { # ./../ - (track back indefinitely on relpath as we are not resolving to anything physical and can't fully simplify the leading backtracks) # #The caller should do the file/vfs operations to determine this - not us. - # -- --- + # -- --- #simplify path with respect to /./ & /../ elements - independent of platform #NOTE: "anomalies" in standard tcl processing on windows: #e.g file normalize {//host} -> c:/host (or e.g d:/host if we happen to be on another volume) @@ -148,9 +148,9 @@ namespace eval punk::path { #known issues: #1) # normjoin d://a//b//c -> d://a/b/c - # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c + # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c # Not considered a problem - just potentially surprising. - # To avoid it we would have to enumerate possible schemes. + # To avoid it we would have to enumerate possible schemes. # As it stands a unix system could define a 'scheme' that happens to match windows style driveletters. Consider a 'feature' ? review. # won't fix? #2) @@ -164,16 +164,16 @@ namespace eval punk::path { # normjoin ///server/share -> ///server/share #This is effectively an empty servername in the input with 'server' being pushed one level down - and the output is consistent # possibly won't fix - review - #4) inconsistency + #4) inconsistency # we return normalized //server/share for //./UNC/server share # but other dos device paths are maintained # e.g //./c:/etc # This is because such paths could contain alternate segment names (windows shortnames) which we aren't in a position to resolve. - # caller should - # #as with 'case' below - caller will need to run a post 'file normalize' + # caller should + # #as with 'case' below - caller will need to run a post 'file normalize' #5) we don't normalize case like file normalize does on windows platform. # This is intentional. It could only be done with reference to underlying filesystem which we don't want here. - # + # # ================ # #relpaths all end up with leading . - while not always the simplest form, this is ok. (helps stop inadvertent conversions to absolutes) @@ -194,14 +194,14 @@ namespace eval punk::path { /// { #if this is effectively //$emptyservername/ #then for consistency we should trail //=3 #todo - shortcircuit that here? } } - # /// - set doubleslash1_posn [string first // $path] + # /// + set doubleslash1_posn [string first // $path] # -- --- --- temp warning on windows only - no x-platform difference in result #on windows //host is of type volumerelative @@ -221,7 +221,7 @@ namespace eval punk::path { } # -- --- --- - set is_relpath 0 + set is_relpath 0 #set path [string map [list \\ /] $path] set finalparts [list] @@ -264,11 +264,11 @@ namespace eval punk::path { #normalize by dropping leading slash before split - and then treating first 2 segments as a root #set parts [file split [string range $path 1 end]] set parts [split $path /] - #assert parts here has {} {} as first 2 entries + #assert parts here has {} {} as first 2 entries set rootindex 2 #currently prefer can backtrack to the //zipfs:/ scheme (below the mountpoint - to browse other mounts) #alternative handling for //zipfs:/path - don't go below mountpoint - #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint + #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint #review - more generally //:/path ? #todo - make an option for zipfs and others to determine the 'base' #if {"zipfs:" eq [lindex $parts 2]} { @@ -281,7 +281,7 @@ namespace eval punk::path { #set parts [file split $path] set parts [::split $path /] #e.g /a/b/c -> {} a b c - #or relative path a/b/c -> a b c + #or relative path a/b/c -> a b c #or c:/a/b/c -> c: a b c if {[string match *: [lindex $parts 0]]} { if {[lindex $parts 1] eq ""} { @@ -295,9 +295,9 @@ namespace eval punk::path { } elseif {[lindex $parts 0] ne ""} { #relpath a/b/c set parts [linsert $parts 0 .] - set rootindex 0 - #allow backtracking arbitrarily for leading .. entries - simplify where possible - #also need to stop possible conversion to absolute path + set rootindex 0 + #allow backtracking arbitrarily for leading .. entries - simplify where possible + #also need to stop possible conversion to absolute path set is_relpath 1 } } @@ -306,7 +306,7 @@ namespace eval punk::path { #puts stderr "-->baseparts:$baseparts" #ensure that if our rootindex already spans a dotted segment (after the first one) we remove it #must maintain initial . for relpaths to stop them converting to absolute via backtrack - # + # set finalparts [list [lindex $baseparts 0]] foreach b [lrange $baseparts 1 end] { if {$b ni {. ..}} { @@ -333,7 +333,7 @@ namespace eval punk::path { lappend finalparts $p } } - incr i + incr i } } else { foreach p [lrange $parts $rootindex+1 end] { @@ -345,7 +345,7 @@ namespace eval punk::path { switch -exact -- $p { . - "" {} .. { - lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 + lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 } default { lappend finalparts $p @@ -403,16 +403,16 @@ namespace eval punk::path { } - #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' - # - no volumerelative + #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' + # - no volumerelative # - no lookup of file volumes (volume is a windows concept - but with //zipfs:/ somewhat applicable to other platforms) # - /* as absolute (covers also //zipfs:/ (volume), //server , //./etc , //./UNC) # - xxx:// as absolute (scheme) # - xxx:/ or x:/ as absolute - # - x: xxx: -> as absolute (volume-basic or volume-extended) + # - x: xxx: -> as absolute (volume-basic or volume-extended) #note also on windows - legacy name for COM devices - # COM1 = COM1: + # COM1 = COM1: # //./COM1 ?? review proc pathtype {str} { @@ -425,7 +425,7 @@ namespace eval punk::path { return absolute } - #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review + #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review #e.g a:b:/.. or a::/.. or :/.. is not treated as volume/scheme whereas ab:/ is. set firstslash [string first / $str] if {$firstslash == -1} { @@ -434,9 +434,9 @@ namespace eval punk::path { set firstsegment [string range $str 0 $firstslash-1] } if {[set firstc [string first : $firstsegment]] > 0} { - set lhs_firstsegment [string range $firstsegment 0 $firstc-1] + set lhs_firstsegment [string range $firstsegment 0 $firstc-1] set rhs_firstsegment [string range $firstsegment $firstc+1 end] ;#exclude a:b/ etc - if {$rhs_firstsegment eq ""} { + if {$rhs_firstsegment eq ""} { set rhs_entire_path [string range $str $firstc+1 end] #assert lhs_firstsegment not empty since firstc > 0 #count following / sequence @@ -466,7 +466,7 @@ namespace eval punk::path { } } } - #assert first element of any return has been absolute or relative + #assert first element of any return has been absolute or relative return relative } @@ -489,7 +489,7 @@ namespace eval punk::path { } return $str } - #purely string based - no reference to filesystem knowledge + #purely string based - no reference to filesystem knowledge #unix-style forward slash only proc plainjoin {args} { set args [lmap a $args {string map "\\\\ /" $a}] @@ -499,12 +499,12 @@ namespace eval punk::path { set out "" foreach a $args { if {![string length $out]} { - append out [plain $a] + append out [plain $a] } else { set a [plain $a] if {[string map {/ ""} $out] eq ""} { set out [string range $out 0 end-1] - } + } if {[string map {/ ""} $a] eq ""} { #all / segment @@ -512,16 +512,16 @@ namespace eval punk::path { } else { if {[string length $a] > 2 && [string match "./*" $a]} { set a [string range $a 2 end] - } + } if {[string index $out end] eq "/"} { append out $a } else { - append out / $a + append out / $a } } } } - return $out + return $out } proc plainjoin1 {args} { if {[llength $args] == 1} { @@ -530,9 +530,9 @@ namespace eval punk::path { set out [trim_final_slash [lindex $args 0]] foreach a [lrange $args 1 end] { set a [trim_final_slash $a] - append out / $a + append out / $a } - return $out + return $out } #intention? @@ -554,13 +554,13 @@ namespace eval punk::path { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure - #[para] ** matches any number of subdirectories. + #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc #[para] any segment that does not contain ** must match exactly one segment in the path - #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc + #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc #[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified. - #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals + #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals #todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? * @@ -572,9 +572,9 @@ namespace eval punk::path { } switch -- $seg { * {lappend pats {[^/]*}} - ** {lappend pats {.*}} + ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -614,14 +614,14 @@ namespace eval punk::path { } } } - # -- --- --- --- --- --- + # -- --- --- --- --- --- set opt_nocase [dict get $opts -nocase] - set explicit_nocase 1 ;#default to disprove + set explicit_nocase 1 ;#default to disprove if {$opt_nocase eq "\uFFFF"} { set opt_nocase 0 set explicit_nocase 0 - } - # -- --- --- --- --- --- + } + # -- --- --- --- --- --- if {$opt_nocase} { return [regexp -nocase [pathglob_as_re $pathglob] $path] } else { @@ -651,33 +651,33 @@ namespace eval punk::path { -call-depth-internal -default 0 -type integer -antiglob_paths -default {} -help\ "list of path patterns to exclude - may include * and ** path segments e.g + may include * and ** path segments e.g /usr/** (exlude subfolders based at /usr but not files within /usr itself) **/_aside (exlude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" - @values -min 0 -max -1 -optional 1 -type string + @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path within the directory tree being searched." } - #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ + #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ #then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase) proc treefilenames {args} { #*** !doctools #[call [fun treefilenames] [opt {option value...}] [opt {globpattern...}]] #[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive #[para] options: - #[para] [opt -dir] + #[para] [opt -dir] #[para] defaults to [lb]pwd[rb] - base path for tree to search - #[para] [opt -antiglob_paths] + #[para] [opt -antiglob_paths] #[para] list of path patterns to exclude - may include * and ** path segments e.g /usr/** #[para]no natsorting - so order is dependent on filesystem set argd [punk::args::parse $args withid ::punk::path::treefilenames] - lassign [dict values $argd] leaders opts values received + lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- set opt_antiglob_paths [dict get $opts -antiglob_paths] @@ -694,7 +694,7 @@ namespace eval punk::path { set opt_dir [dict get $opts -directory] } if {![file isdirectory $opt_dir]} { - return [list] + return [list] } } else { #assume/require to exist in any recursive call @@ -713,15 +713,26 @@ namespace eval punk::path { } #todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match? - set dirfiles [lsort [glob -nocomplain -dir $opt_dir -type f {*}$tailglobs]] + if {[catch {glob -nocomplain -dir $opt_dir -type f {*}$tailglobs} matches]} { + #we can get for example a permissions error + puts stderr "treefilenames error while listing files in dir $opt_dir\n $matches" + set dirfiles [list] + } else { + set dirfiles [lsort $matches] + } + lappend files {*}$dirfiles - set dirdirs [glob -nocomplain -dir $opt_dir -type d *] + if {[catch {glob -nocomplain -dir $opt_dir -type d *} dirdirs]} { + puts stderr "treefilenames error while listing subdirs in dir $opt_dir\n $dirdirs" + set dirdirs [list] + } + foreach dir $dirdirs { set skip 0 foreach anti $opt_antiglob_paths { if {[globmatchpath $anti $dir]} { set skip 1 - break + break } } if {$skip} { @@ -743,8 +754,8 @@ namespace eval punk::path { #[item] #[para] Arguments: # [list_begin arguments] - # [arg_def string reference] The path from which the relative path to location is determined. - # [arg_def string location] The location path which may be above or below the reference path + # [arg_def string reference] The path from which the relative path to location is determined. + # [arg_def string location] The location path which may be above or below the reference path # [list_end] #[item] #[para] Results: @@ -753,7 +764,7 @@ namespace eval punk::path { #[item] #[para] Notes: #[para] Both paths must be the same type - ie both absolute or both relative - #[para] Case sensitive. ie punk::path::relative /etc /etC + #[para] Case sensitive. ie punk::path::relative /etc /etC # will return ../etC #[para] On windows, the drive-letter component (only) is not case sensitive #[example_begin] @@ -774,7 +785,7 @@ namespace eval punk::path { #[example_begin] # P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below # - somewhere/below - # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here # - ../../lib/here #[example_end] #[list_end] @@ -791,7 +802,7 @@ namespace eval punk::path { #avoid normalizing if possible (file normalize *very* expensive on windows) set do_normalize 0 if {[file pathtype $reference] eq "relative"} { - #if reference is relative so is location + #if reference is relative so is location if {[regexp {[.]{2}} [list $reference $location]]} { set do_normalize 1 } @@ -857,7 +868,7 @@ namespace eval punk::path::lib { namespace path [namespace parent] #*** !doctools #[subsection {Namespace punk::path::lib}] - #[para] Secondary functions that are part of the API + #[para] Secondary functions that are part of the API #[list_begin definitions] @@ -877,17 +888,17 @@ namespace eval punk::path::lib { namespace eval punk::path::system { #*** !doctools #[subsection {Namespace punk::path::system}] - #[para] Internal functions that are not part of the API + #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Ready +## Ready package provide punk::path [namespace eval punk::path { variable pkg punk::path variable version - set version 0.1.0 + set version 0.1.0 }] return diff --git a/src/bootsupport/modules/textblock-0.1.3.tm b/src/bootsupport/modules/textblock-0.1.3.tm index 2d185f01..c102ca29 100644 --- a/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/bootsupport/modules/textblock-0.1.3.tm @@ -5974,13 +5974,40 @@ tcl::namespace::eval textblock { [>punk . rhs]\ [punk::lib::list_as_lines -- [lrepeat 8 " | "]] } + punk::args::define [punk::lib::tstr -return string { + @id -id ::textblock::table + @cmd -name "textblock::table" -help\ + "A wrapper for creating a textblock::class::table + + NOTE: more options available - argument definition + is incomplete" + @opts + -return -choices {table tableobject} + -rows -type list -default "" -help\ + "A list of lists. + Each toplevel element represents a row. + The number of elements in each row must + be the same. + e.g for 2 rows and 3 columns: + table -rows {{r0c0 r0c1 r0c2} {r1c0 r1c1 r1c2}} + " + -headers -type list -default "" -help\ + "This is a simplified form where each column + has a single header row. + Each element in this list goes into the top + header row for a column. + More complex header arrangements where each + column has multiple headers can be made + by using -return tableobject and calling + $tableobj configure_column -headers" + }] proc table {args} { #todo - use punk::args upvar ::textblock::class::opts_table_defaults toptdefaults set defaults [tcl::dict::create\ -rows [list]\ -headers [list]\ - -return string\ + -return table\ ] @@ -6017,7 +6044,7 @@ tcl::namespace::eval textblock { - if {$opt_return eq "string"} { + if {$opt_return eq "table"} { set result [$t print] $t destroy return $result diff --git a/src/bootsupport/modules/tomlish-1.1.2.tm b/src/bootsupport/modules/tomlish-1.1.2.tm new file mode 100644 index 00000000..9270ca9c --- /dev/null +++ b/src/bootsupport/modules/tomlish-1.1.2.tm @@ -0,0 +1,5566 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.2 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.2] +#[copyright "2024"] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module parsing toml configuration] +#[description] +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEY = bare key and value + #QKEY = double quoted key and value ;#todo - rename to DQKEY? + #SQKEY = single quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for to_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error "tomlish _get_keyval_value keyval_element doesn't seem to be a properly structured { = } list" + } + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "tomlish Failed to find value element in KEY. '$keyval_element'" + } + if {$found_value > 1} { + error "tomlish Found multiple value elements in KEY, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL - LITERALPART { + #REVIEW + set result [list type $type value $value] + } + TABLE { + #invalid? + error "tomlish _get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::to_dict [list $found_sub]] + } + ARRAY { + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + default { + error "tomlish Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "tomlish _get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } + + + #to_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # to_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + # + #Note, currently we get a plain sub dictionary when an inline table is a direct value for a key, but an ITABLE when it's in an ARRAY - REVIEW + #Namespacing? + #ie note the difference: + #[Data] + #temp = { cpu = 79.5, case = 72.0} + # versus + #[Data] + #temps = [{cpu = 79.5, case = 72.0}] + proc to_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEY - QKEY - SQKEY { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "to_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [tcl::string::range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$table_key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a key/qkey/skey ? + + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + + #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg + } + } + + } + + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? + set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::to_dict [list $element]]] + } + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted + switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "double quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + + proc _from_dictval_tomltype {parents tablestack keys typeval} { + set type [dict get $typeval type] + set val [dict get $typeval value] + switch -- $type { + ARRAY { + set subitems [list] + foreach item $val { + lappend subitems [_from_dictval [list {*}$parents ARRAY] $tablestack $keys $item] SEP + } + if {[lindex $subitems end] eq "SEP"} { + set subitems [lrange $subitems 0 end-1] + } + return [list ARRAY {*}$subitems] + } + ITABLE { + if {$val eq ""} { + return ITABLE + } else { + return [_from_dictval [list {*}$parents ITABLE] $tablestack $keys $val] + } + } + MULTISTRING { + #value is a raw string that isn't encoded as tomlish + #create a valid toml snippet with the raw value and decode it to the proper tomlish MULTISTRING format + set tomlpart "x=\"\"\"\\\n" + append tomlpart $val "\"\"\"" + set tomlish [tomlish::decode::toml $tomlpart] + #e.g if val = " etc\nblah" + #TOMLISH {DOTTEDKEY {{KEY x}} = {MULTISTRING CONT {NEWLINE LF} {WS { }} {STRINGPART etc} {NEWLINE lf} {STRINGPART blah} } } + #lindex 1 3 is the MULTISTRING tomlish list + return [lindex $tomlish 1 3] + } + MULTILITERAL { + set tomlpart "x='''\n" + append tomlpart $val ''' + set tomlish [tomlish::decode::toml $tomlpart] + return [lindex $tomlish 1 3] + } + default { + return [list $type $val] + } + } + } + + proc _from_dictval {parents tablestack keys vinfo} { + set k [lindex $keys end] + if {[regexp {\s} $k] || [string first . $k] >= 0} { + #Any dot in the key would have been split by to_dict - so if it's present here it's part of this key - not a level separator! + #todo - QKEY? + set K_PART [list SQKEY $k] + } else { + set K_PART [list KEY $k] + } + puts stderr "---parents:'$parents' keys:'$keys' vinfo: $vinfo---" + puts stderr "---tablestack: $tablestack---" + set result [list] + set lastparent [lindex $parents end] + if {$lastparent in [list "" do_inline]} { + if {[tomlish::dict::is_tomltype $vinfo]} { + set type [dict get $vinfo type] + #treat ITABLE differently? + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} + } else { + #set result [list TABLE $k {NEWLINE lf}] + if {$vinfo ne ""} { + + #set result [list DOTTEDKEY [list [list KEY $k]] = ] + #set records [list ITABLE] + + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + + if {$lastparent eq "do_inline"} { + set result [list DOTTEDKEY [list $K_PART] =] + set records [list ITABLE] + } else { + #review - quoted k ?? + set result [list TABLE $k {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $k]] + set records [list] + } + + + + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >= 0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART {WS { }}] = {WS { }} $sublist] + } else { + if {$vv eq ""} { + #experimental + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline KEY $vk (tablestack:$tablestack)" + #set tname [tomlish::dict::name_from_tablestack [list {*}$tablestack [list T $vk]]] + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + } else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + set tablestack [list {*}$tablestack [list I $vk]] + } + } else { + if { 0 } { + #experiment.. sort of getting there. + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline2 KEYS [list {*}$keys $vk] (tablestack:$tablestack)" + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + + #review - todo? + set dottedkey_value [_from_dictval [list {*}$parents TABLE] $tablestack [list {*}$keys $vk] $vv] + lappend record {*}$dottedkey_value + + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + } + if {$dictidx != $lastidx} { + #lappend record SEP + if {$lastparent eq "do_inline"} { + lappend record SEP + } else { + lappend record {NEWLINE lf} + } + } + lappend records $record + incr dictidx + } + if {$lastparent eq "do_inline"} { + lappend result $records {NEWLINE lf} + } else { + lappend result {*}$records {NEWLINE lf} + } + } else { + if {$lastparent eq "do_inline"} { + lappend result DOTTEDKEY [list [list KEY $k]] = ITABLE {NEWLINE lf} + } else { + lappend result TABLE $k {NEWLINE lf} + } + } + } + } else { + #lastparent is not toplevel "" or "do_inline" + if {[tomlish::dict::is_tomltype $vinfo]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result {*}$sublist + } else { + if {$lastparent eq "TABLE"} { + #review + dict for {vk vv} $vinfo { + set dottedkey_value [_from_dictval [list {*}$parents DOTTEDKEY] $tablestack [list {*}$keys $vk] $vv] + lappend result [list DOTTEDKEY [list [list KEY $vk]] = $dottedkey_value {NEWLINE lf}] + } + } else { + if {$vinfo ne ""} { + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + set sub [list] + set result $lastparent ;#e.g sets ITABLE + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >=0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART] = $sublist] + } else { + if {$vv eq ""} { + #can't just uninline at this level + #we need a better method to query main dict for uninlinability at each level + # (including what's been inlined already) + #if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + # puts stderr "_from_dictval uninline2 KEY $keys" + # set tname [join [list {*}$keys $vk] .] + # set record [list TABLE $tname {NEWLINE lf}] + # set tablestack [list {*}$tablestack [list T $vk]] + #} else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + #} + } else { + #set sub [_from_dictval ITABLE $vk $vv] + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + #set record [list DOTTEDKEY [list $VK_PART] = ITABLE $dottedkey_value] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + if {$dictidx != $lastidx} { + lappend record SEP + } + lappend result $record + incr dictidx + } + } else { + puts stderr "table x-1" + lappend result DOTTEDKEY [list $K_PART] = ITABLE + } + } + } + } + return $result + } + + + proc from_dict {d} { + #consider: + # t1={a=1,b=2} + # x = 1 + #If we represent t1 as an expanded table we get + # [t1] + # a=1 + # b=2 + # x=1 + # --- which is incorrect - as x was a toplevel key like t1! + #This issue doesn't occur if x is itself an inline table + # t1={a=1,b=2} + # x= {no="problem"} + # + # (or if we were to reorder x to come before t1) + + #As the dictionary form doesn't distinguish the structure used to create tables {[table1]\nk=v} vs inline {table1={k=v}} + #Without a solution, from_dict would have to always produce the inline form for toplevel tables unless we allowed re-ordering, + #which is unpreferred here. + + #A possible solution: + #scan the top level to see if all (trailing) elements are themselves dicts + # (ie not of form {type XXX value yyy}) + # + # A further point is that if all root level values are at the 'top' - we can treat lower table-like structures as {[table]} elements + #ie we don't need to force do_inline if all the 'simple' keys are before any compound keys + + #set root_has_values 0 + #approach 1) - the naive approach - forces inline when not always necessary + #dict for {k v} $d { + # if {[llength $v] == 4 && [lindex $v 0] eq "type"} { + # set root_has_values 1 + # break + # } + #} + + + #approach 2) - track the position of last {type x value y} in the dictionary built by to_dict + # - still not perfect. Inlines dotted tables unnecessarily + #This means from_dict doesn't produce output optimal for human editing. + set last_simple [tomlish::dict::last_tomltype_posn $d] + + + ## set parent "do_inline" ;#a value used in _from_dictval to distinguish from "" or other context based parent values + #Any keys that are themselves tables - will need to be represented inline + #to avoid reordering, or incorrect assignment of plain values to the wrong table. + + ## set parent "" + #all toplevel keys in the dict structure can represent subtables. + #we are free to use {[tablename]\n} syntax for toplevel elements. + + + set tomlish [list TOMLISH] + set dictposn 0 + set tablestack [list [list T root]] ;#todo + dict for {t tinfo} $d { + if {$last_simple > $dictposn} { + set parents [list do_inline] + } else { + set parents [list ""] + } + set keys [list $t] + set trecord [_from_dictval $parents $tablestack $keys $tinfo] + lappend tomlish $trecord + incr dictposn + } + return $tomlish + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + #TODO use huddle? + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::to_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![tcl::string::is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [::list BOOL true] + } else { + return [::list BOOL false] + } + } + } + + + #TODO + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" + } + lappend pairs [list KEY $keystr = $valuepart] + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEY $n = [list STRING $v]] + } else { + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" + foreach part [lrange $item 1 end] { + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$literal''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + # ---------------------------------------------------------------------------------------------- + # NOTE: the production of tomlish from toml source doesn't indicate the toml source was valid!!! + # e.g we deliberately don't check certain things such as duplicate table declarations here. + # ---------------------------------------------------------------------------------------------- + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + + proc toml {args} { + #*** !doctools + #[call [fun toml] [arg arg...]] + #[para] return a Tcl list of tomlish tokens + + set s [join $args \n] + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + set ::tomlish::parse::state_list [list] + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below + + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) + } + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" + } + endmultiquote { + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" + } + default { + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + + incr nest -1 + + } elseif {$last_space_action eq "push"} { + set prevnest $nest + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + barekey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + quotedkey - itablequotedkey { + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL + } + default { + error "---- push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" + } + startquote { + switch -exact -- $newstate { + string-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itable-quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startmultiquote { + #review + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped_value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" + } + lappend v($nest) [list $tag $tok] + + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end-state"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg " \t"] + } + return [join $trimmed_segments .] + } + + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set sLen [tcl::string::length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {set i 0} {$i < $sLen} {incr i} { + + if {$i > 0} { + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $tablename $i] + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[tcl::string::trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen-1} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [tcl::string::trim $seg " \t"] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[tcl::string::match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[tcl::string::match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [tcl::string::length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[tcl::string::length $buffer4] < 4} { + append buffer4 $c + } + if {[tcl::string::length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[tcl::string::length $buffer8] < 8} { + append buffer8 $c + } + if {[tcl::string::length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [tcl::string::map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [tcl::string::range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[tcl::string::length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[tcl::string::last - $str] > 0} { + return 0 + } + if {[tcl::string::last + $str] > 0} { + return 0 + } + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![tcl::string::is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[tcl::string::length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [tcl::string::map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![tcl::string::is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + #review - we + proc is_datetime {str} { + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... + # + # notes: + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases + + variable stateMatrix + set stateMatrix [dict create] + + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-testing"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + + + + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } + dict set stateMatrix\ + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + + dict set stateMatrix\ + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } + dict set stateMatrix\ + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline + dict set stateMatrix\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes + dict set stateMatrix\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + + dict set stateMatrix\ + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + + dict set stateMatrix\ + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next + variable spacePopTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + + + variable state_list ;#reset every tomlish::decode::toml + + namespace export tomlish toml + namespace ensemble create + + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state + variable nest + variable v + + set prevstate $currentstate + + + variable spacePopTransitions + variable spacePushTransitions + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + set starttok "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + POPSPACE { + spacestack pop + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + } else { + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } + } + set result $next + } + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" + } else { + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (table-space) + spacestack pop + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list PUSHSPACE [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] + } + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + + set last_space_action "push" + set last_space_type "space" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" + } else { + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } + } + set result $next + } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } + default { + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word + } + } + } else { + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" + } + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "tomlish set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "tomlish set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "tomlish set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "tomlish set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "tomlish Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 + #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + + variable tokenType + variable tokenType_list + + + variable endToken + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof + variable token_waiting + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] + return 1 + } + #------------------------------ + + set resultlist [list] + set sLen [tcl::string::length $s] + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do returns inside the loop + + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #set_token_waiting type comment value "" complete 1 + incr i -1 ;#leave comment for next run + return 1 + } + untyped_value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped_value. + incr i -1 + return 1 + } + starttablename - starttablearrayname { + #fix! + error "tomlish Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } + default { + #quotedkey, itablequotedkey, string,literal, multistring + append tok $c + } + } + } else { + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + } + } + lc { + #left curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "tomlish Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "tomlish state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + + } + rc { + #right curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "tomlish right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "tomlish endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "tomlish state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + itable-space { + #handle state just to give specific error msg + error "tomlish state: '$state'. Left square bracket invalid. Cannot start array in inline table without key. Use key=\[\] syntax. [tomlish::parse::report_line]" + } + default { + error "tomlish state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + rb { + #right square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } else { + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } + } + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "tomlish state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "tomlish Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + starttablename - starttablearrayname { + error "tomlish backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + barekey { + error "tomlish Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "tomlish Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tomlish tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + } + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + append tok $c + } + 2 { + #switch? + append tok $c + set_tokenType triple_squote + return 1 + } + default { + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" + } + } + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "tomlish unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 + return 1 + } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "tomlish unexpected _start_squote_sequence length $toklen" + } + } + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } + } + } + } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" + append tok $c + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "tomlish got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + itable-keyval-value-expected { + #JMN 2025 - review + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { + if {$multi_dquote eq "\"\""} { + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" + return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 + } else { + append multi_dquote "\"" + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { + #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } + } + barekey { + #set_token_waiting type equal value = complete 1 + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } + default { + error "tomlish unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } + default { + set_tokenType "equal" + set tok = + return 1 + } + } + } + } + cr { + #REVIEW! + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #part of MLL string (multi-line literal string) + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warn "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } + stringpart { + #part of MLB string (multi-line basic string) + #jmn2025 - review + #append tok $dquotes$c + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType "newline" + set tok cr + } + } + lf { + # \n newline + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + newline { + #review + #this lf is the trailing part of a crlf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } + starttablename - tablename - tablearrayname - starttablearrayname { + error "tomlish Character is invalid in $tokenType. [tomlish::parse::report_line]" + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } + } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } + default { + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" + set tok "," + } + default { + set_tokenType "comma" + set tok "," + return 1 + } + } + } + } + . { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "tomlish Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "tomlish Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #e.g x.y = 1 + #we need to transition the barekey to become a structured table name ??? review + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 + } + default { + error "tomlish Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." + } + multiliteral-space { + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 + } + default { + set_tokenType "untyped_value" + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[tcl::string::length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #todo had_slash - emit token or error + #whitespace is a terminator for bare keys + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $dquotes$c + } + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "whitespace" + append tok $c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + if {$had_slash} { + error "tomlish unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set_token_waiting type whitespace value $c complete 1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + table-space - itable-space { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { + set_tokenType "tablename" + set tok $c + } + tablearrayname-state { + set_tokenType "tablearrayname" + set tok $c + } + dottedkey-space { + set_tokenType barekey + set tok $c + } + default { + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[tcl::string::length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err-state"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } + } + } + set_token_waiting type eof value eof complete 1 startindex $i ;#review + return 1 + } else { + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +namespace eval tomlish::dict { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + + proc is_tomltype {d} { + expr {[dict size $d] == 2 && [dict exists $d type] && [dict exists $d value]} + } + proc is_tomltype2 {d} { + upvar ::tomlish::tags tags + expr {[lindex $d 0] eq "type" && [lindex $d 1] in $tags} + } + proc last_tomltype_posn {d} { + set last_simple -1 + set dictposn [expr {[dict size $d] -1}] + foreach k [lreverse [dict keys $d]] { + set dval [dict get $d $k] + if {[is_tomltype $dval]} { + set last_simple $dictposn + break + } + incr dictposn -1 + } + return $last_simple + } + + + #review + proc name_from_tablestack {tablestack} { + set name "" + foreach tinfo [lrange $tablestack 1 end] { + lassign $tinfo type namepart + switch -- $type { + T { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + I { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + default { + #end at first break in the leading sequence of T & I tablenames + break + } + } + } + return $name + } + +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.2 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/args-999999.0a1.0.tm b/src/modules/punk/args-999999.0a1.0.tm index 25524b2b..f7730214 100644 --- a/src/modules/punk/args-999999.0a1.0.tm +++ b/src/modules/punk/args-999999.0a1.0.tm @@ -331,26 +331,26 @@ tcl::namespace::eval punk::args { parsing and help display. directives include: %B%@id%N% ?opt val...? - options: -id + spec-options: -id %B%@cmd%N% ?opt val...? - options: -name -help + spec-options: -name -help %B%@leaders%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for leading args that come before switches/opts) %B%@opts%N% ?opt val...? - options: -any + spec-options: -any %B%@values%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for trailing args that come after switches/opts) %B%@argdisplay%N% ?opt val...? - options: -header (text for header row of table) - -body (text to replace autogenerated arg info) + spec-options: -header (text for header row of table) + -body (text to replace autogenerated arg info) %B%@doc%N% ?opt val...? - options: -name -url + spec-options: -name -url %B%@seealso%N% ?opt val...? - options: -name -url (for footer - unimplemented) + spec-options: -name -url (for footer - unimplemented) - Some other options normally present on custom arguments are available + Some other spec-options normally present on custom arguments are available to use with the @leaders @opts @values directives to set defaults for subsequent lines that represent your custom arguments. These directives should occur in exactly this order - but can be @@ -361,7 +361,12 @@ tcl::namespace::eval punk::args { or using the i .. function - an @id with -id is needed. All directives can be omitted, in which case every line represents - a custom value or option. + a custom leader, value or option. + All will be leaders by default if no options defined. + If options are defined (by naming with leading dash, or explicitly + specifying @opts) then the definitions prior to the options will be + categorised as leaders, and those following the options will be + categorised as values. Custom arguments are defined by using any word at the start of a line that doesn't begin with @ or - @@ -369,7 +374,7 @@ tcl::namespace::eval punk::args { that @@somearg becomes an argument named @somearg) custom leading args, switches/options (names starting with -) - and trailing values also take options: + and trailing values also take spec-options: -type defaults to string. If no other restrictions @@ -397,12 +402,22 @@ tcl::namespace::eval punk::args { -optional (defaults to true for flags/switches false otherwise) + For non flag/switch arguments - all arguments with + -optional true must sit consecutively within their group. + ie all optional leader arguments must be together, and all + optional value arguments must be together. Furthermore, + specifying both optional leaders and optional values will + often lead to ambiguous parsing results. Currently, all + optional non-flg/switch arguments should be either at the + trailing end of leaders or the trailing end of values. + Further unambiguous arrangements of optional args may be + made in future - but are currently considered 'unsupported' -default -multiple (for leaders & values defines whether - subsequent received values are stored agains the same - argument name - only applies to final leader or value) + subsequent received values are stored against the same + argument name - only applies to final leader OR final value) (for options/flags this allows the opt-val pair or solo - flag to appear multiple times - no necessarily contiguously) + flag to appear multiple times - not necessarily contiguously) -choices {} A list of allowable values for an argument. The -default value doesn't have to be in the list. @@ -438,7 +453,7 @@ tcl::namespace::eval punk::args { Max of -1 represents no upper limit. If allows more than one choice the value is a list consisting of items in the choices made available through - entries in -choices/-choicegrups. + entries in -choices/-choicegroups. -minsize (type dependant) -maxsize (type dependant) -range (type dependant) @@ -1667,6 +1682,7 @@ tcl::namespace::eval punk::args { " @leaders -min 0 -max 0 @opts + -return -default text -choices {text dict} -form -default 0 -help\ "Ordinal index or name of command form" @@ -1694,7 +1710,7 @@ tcl::namespace::eval punk::args { (directives are lines beginning with @ e.g @id, @cmd etc) - if -type is @leaders,@opts or @values matches from that type + if -type is leaders,opts or values matches from that type will be returned. if -type is another directive such as @id, @doc etc the @@ -1706,8 +1722,10 @@ tcl::namespace::eval punk::args { proc resolved_def {args} { + #not eating our own dogfood here as far as argument parsing. -id ::punk::args::resolved_def is for documentation/errors only. set opts [dict create\ - -types {}\ + -return text\ + -types {}\ -form 0\ -antiglobs {}\ -override {}\ @@ -1743,7 +1761,7 @@ tcl::namespace::eval punk::args { } dict for {k v} $opts { switch -- $k { - -form - -types - -antiglobs - -override {} + -return - -form - -types - -antiglobs - -override {} default { punk::args::parse $args withid ::punk::args::resolved_def return @@ -1764,163 +1782,185 @@ tcl::namespace::eval punk::args { variable id_cache_rawdef set realid [real_id $id] + if {$realid eq ""} { + return + } - if {$realid ne ""} { - set deflist [tcl::dict::get $id_cache_rawdef $realid] - set result "" - set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] + set deflist [tcl::dict::get $id_cache_rawdef $realid] + set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] - set opt_form [dict get $opts -form] - if {[string is integer -strict $opt_form]} { - set formname [lindex [dict get $specdict form_names] $opt_form] - } else { - set formname $opt_form - } - set opt_override [dict get $opts -override] - - #set arg_info [dict get $specdict ARG_INFO] - set arg_info [dict get $specdict FORMS $formname ARG_INFO] - set argtypes [dict create leaders leader opts option values value] - - set opt_antiglobs [dict get $opts -antiglobs] - set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] - set suppressed_directives [list] - set suppressed_args [list] - foreach ag $opt_antiglobs { - foreach d $directives { - if {[string match $ag $d]} { - lappend suppressed_directives $d - } + set opt_form [dict get $opts -form] + if {[string is integer -strict $opt_form]} { + set formname [lindex [dict get $specdict form_names] $opt_form] + } else { + set formname $opt_form + } + set opt_override [dict get $opts -override] + set opt_return [dict get $opts -return] + + #set arg_info [dict get $specdict ARG_INFO] + set arg_info [dict get $specdict FORMS $formname ARG_INFO] + set argtypes [dict create leaders leader opts option values value] + + set opt_antiglobs [dict get $opts -antiglobs] + set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] + set suppressed_directives [list] + set suppressed_args [list] + foreach ag $opt_antiglobs { + foreach d $directives { + if {[string match $ag $d]} { + lappend suppressed_directives $d } - foreach argname [dict keys $arg_info] { - if {[string match $ag $argname]} { - lappend suppressed_args $argname - } + } + foreach argname [dict keys $arg_info] { + if {[string match $ag $argname]} { + lappend suppressed_args $argname } } - set suppressed_directives [lsort -unique $suppressed_directives] - set suppressed_args [lsort -unique $suppressed_args] + } + set suppressed_directives [lsort -unique $suppressed_directives] + set suppressed_args [lsort -unique $suppressed_args] - set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] + set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] - set globbed [list] - foreach pat $patterns { - set matches [dict keys $arg_info $pat] - lappend globbed {*}$matches - } - set globbed [lsort -unique $globbed] - set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - - foreach type $typelist { - switch -exact -- $type { - * { - if {"@id" in $included_directives} { - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" - } else { - append result \n "@id -id [dict get $specdict id]" - } - } - foreach directive {@package @cmd @doc @seealso @argdisplay} { - set dshort [string range $directive 1 end] - if {"$directive" in $included_directives} { - if {[dict exists $opt_override $directive]} { - append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict ${dshort}_info]" - } - } - } - #output ordered by leader, option, value - foreach pseudodirective {leaders opts values} tp {leader option value} { - set directive "@$pseudodirective" - switch -- $directive { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - - if {"$directive" in $included_directives} { - if {[dict exists $opt_override "$directive"]} { - append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict $defaults_key]" - } - } + set globbed [list] + foreach pat $patterns { + set matches [dict keys $arg_info $pat] + lappend globbed {*}$matches + } + set globbed [lsort -unique $globbed] + set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - if {$pseudodirective in $included_directives} { - foreach m $included_args { - set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq $tp} { - set argspec [dict remove $argspec -ARGTYPE] - if {[dict exists $opt_override $m]} { - append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" - } else { - append result \n "$m $argspec" - } - } - } - } + set result "" + set resultdict [dict create] + foreach type $typelist { + switch -exact -- $type { + * { + if {"@id" in $included_directives} { + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] } - } - @id { - if {"@id" in $included_directives} { - #only a single id record can exist - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + foreach directive {@package @cmd @doc @seealso @argdisplay} { + set dshort [string range $directive 1 end] + if {"$directive" in $included_directives} { + if {[dict exists $opt_override $directive]} { + append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]] } else { - append result \n "@id -id [dict get $specdict id]" + append result \n "$directive [dict get $specdict ${dshort}_info]" + dict set resultdict $directive [dict get $specdict ${dshort}_info] } } } - @package - @cmd - @doc - @seealso - @argdisplay { - if {"$type" in $included_directives} { - set tp [string range $type 1 end] ;# @package -> package - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" - } else { - append result \n "$type [dict get $specdict ${tp}_info]" - } + #output ordered by leader, option, value + foreach pseudodirective {leaders opts values} tp {leader option value} { + set directive "@$pseudodirective" + switch -- $directive { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} } - } - @leaders - @opts - @values { - #these are the active defaults for further arguments - if {"$type" in $included_directives} { - switch -- $type { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + + if {"$directive" in $included_directives} { + if {[dict exists $opt_override "$directive"]} { + append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]] } else { - append result \n "$type [dict get $specdict leaderspec_defaults]" + append result \n "$directive [dict get $specdict $defaults_key]" + dict set resultdict $directive [dict get $specdict $defaults_key] } } - } - leaders - opts - values { - #pseudo-directives - if {$type in $included_directives} { + + if {$pseudodirective in $included_directives} { foreach m $included_args { set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + if {[dict get $argspec -ARGTYPE] eq $tp} { set argspec [dict remove $argspec -ARGTYPE] if {[dict exists $opt_override $m]} { append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] } else { append result \n "$m $argspec" + dict set resultdict $m $argspec } } } } } - default { + + } + @id { + if {"@id" in $included_directives} { + #only a single id record can exist + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] + } } } + @package - @cmd - @doc - @seealso - @argdisplay { + if {"$type" in $included_directives} { + set tp [string range $type 1 end] ;# @package -> package + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict ${tp}_info]" + dict set resultdict $type [dict get $specdict ${tp}_info] + } + } + } + @leaders - @opts - @values { + #these are the active defaults for further arguments + if {"$type" in $included_directives} { + switch -- $type { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} + } + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict leaderspec_defaults]" + dict set resultdict $type [dict get $specdict leaderspec_defaults] + } + } + } + leaders - opts - values { + #pseudo-directives + if {$type in $included_directives} { + foreach m $included_args { + set argspec [dict get $arg_info $m] + if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + set argspec [dict remove $argspec -ARGTYPE] + if {[dict exists $opt_override $m]} { + append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] + } else { + append result \n "$m $argspec" + dict set resultdict $m $argspec + } + } + } + } + } + default { + } + } + if {$opt_return eq "text"} { + return $result + } else { + return $resultdict } - - return $result } } diff --git a/src/modules/punk/imap4-999999.0a1.0.tm b/src/modules/punk/imap4-999999.0a1.0.tm index cbcfe26f..411cbff9 100644 --- a/src/modules/punk/imap4-999999.0a1.0.tm +++ b/src/modules/punk/imap4-999999.0a1.0.tm @@ -75,6 +75,17 @@ # showlog command to see cli/svr conversation - todo! - disable by default and limit storage. # Addition of AUTH_PLAIN SASL authentication mechanism # change isableto -> has_capability (to better reflect capabilities such as LOGINDISABLED) +# 202503 J. Noble - API changes, add more argument parsing/documentation +# Change IMAP API commands that take msgid or range to accept IMAP protocol style sequence-sets +# composed of seq-ranges. +# ie - no longer accept tcllib IMAP4 style range consisting of incomplete colon based ranges such as : :x x: +# Instead we accept the full comma delimited sequence sets and require use of the special * operator in ranges +# e.g 1:* 3,4,10:* etc +# The equivalent of tcllib IMAP's : would be 1:* +# Added GETACL,SETACL,MYRIGHTS,LISTRIGHTS commands. +# Added initial RETURN handling for SEARCH (not yet handling ESEARCH responses) +# Changed OPEN to CONNECT +# (slightly better clarity for API because the IMAP CLOSE command is not the opposite of OPEN) # # @@ Meta Begin @@ -253,13 +264,95 @@ tcl::namespace::eval punk::imap4::system { } } +tcl::namespace::eval punk::imap4::stringprep { + #https://core.tcl-lang.org/tcllib/doc/tcllib-1-18/embedded/www/tcllib/files/modules/stringprep/stringprep.html#3 + + #RFC3454 - table definitions + + #IMAP stringprep Profiles for Usernames RFC4314 RFC5738 + #IMAP stringprep Profiles for Passwords RFC5738 + + #RFC4013 SASLprep: Stringprep Profile for User Names and Passwords + #Prohibited Output + # - Non-ASCII space characters [StringPrep, C.1.2] + # - ASCII control characters [StringPrep, C.2.1] + # - Non-ASCII control characters [StringPrep, C.2.2] + # - Private Use characters [StringPrep, C.3] + # - Non-character code points [StringPrep, C.4] + # - Surrogate code points [StringPrep, C.5] + # - Inappropriate for plain text characters [StringPrep, C.6] + # - Inappropriate for canonical representation characters + # [StringPrep, C.7] + # - Change display properties or deprecated characters + # [StringPrep, C.8] + # - Tagging characters [StringPrep, C.9] + set prohibited_sets {A.1 C.1.2 C.2.1 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9} + + #This profile specifies: + # - non-ASCII space characters [StringPrep, C.1.2] that can be + # mapped to SPACE (U+0020), and + # - the "commonly mapped to nothing" characters [StringPrep, B.1] + # that can be mapped to nothing. + + #Unassigned Code points - [STRINGPREP, A.1] + + package require stringprep + #Mapping C.1.2 ?? + #we only have it in -prohibited - but it seems to be mapped to space, which is what we want - but why? + ::stringprep::register saslprep -mapping {B.1} -prohibited $prohibited_sets -normalization KC -prohibitedBidi 1 + + proc normal_userpass {input} { + #set input [map_to_space $input] ;#C.1.2 non-ascii spaces mapped to space + set normalised [::stringprep::stringprep saslprep $input] + } + + #probably unneeded - see command above re mapping C.1.2 + proc map_to_space {input} { + #C.1.2 Non-ASCII space characters + #----- Start Table C.1.2 ----- + #00A0; NO-BREAK SPACE + #1680; OGHAM SPACE MARK + #2000; EN QUAD + #2001; EM QUAD + #2002; EN SPACE + #2003; EM SPACE + #2004; THREE-PER-EM SPACE + #2005; FOUR-PER-EM SPACE + #2006; SIX-PER-EM SPACE + #2007; FIGURE SPACE + #2008; PUNCTUATION SPACE + #2009; THIN SPACE + #200A; HAIR SPACE + #200B; ZERO WIDTH SPACE + #202F; NARROW NO-BREAK SPACE + #205F; MEDIUM MATHEMATICAL SPACE + #3000; IDEOGRAPHIC SPACE + #----- End Table C.1.2 ----- + set map [list \u00A0 " " \u1680 " " \u2000 " " \u2001 " " \u2002 " " \u2003 " " \u2004 " " \u2005 " " \u2006 " " \u2007 " "\ + \u2007 " " \u2008 " " \u2009 " " \u200A " " \u200b " " \u202F " " \u205F " " \u3000 " "\ + ] + return [string map $map $input] + } +} tcl::namespace::eval punk::imap4::proto { variable PUNKARGS - variable info variable coninfo namespace export {[a-z]*} + proc is_imap_number {n} { + return [expr {[string is integer -strict $n] && $n >= 0 && $n <= 4294967296}] + } + proc is_imap_number64 {n} { + return [expr {[string is integer -strict $n] && $n >= 0 && $n <= 9223372036854775807}] + } + proc is_imap_nznumber {n} { + return [expr {[string is integer -strict $n] && $n > 0 && $n <= 4294967296}] + } + proc is_imap_nznumber64 {n} { + return [expr {[string is integer -strict $n] && $n > 0 && $n <= 9223372036854775807}] + } + #JMN 2025 - rename to pop0 to make clear distinction between this and tcl9 builtin lpop # Pop an element from the list inside the named variable and return it. # If a list is empty, raise an error. The error is specific for the @@ -345,6 +438,49 @@ tcl::namespace::eval punk::imap4::proto { #we aren't assuming all request formats are valid Tcl lists return [punk::imap4::lib::firstword $lastrequest] } + + #experimental + proc resync_tag {chan} { + set last_request_tag [lastrequesttag $chan] + set last_line [lastline $chan] + #word0 + set last_response_tag [punk::imap4::lib::firstword $last_line] + puts stderr "last request tag: $last_request_tag" + puts stderr "last response tag: $last_response_tag" + if {$last_response_tag < $last_request_tag} { + set diff [expr {$last_request_tag - $last_response_tag}] + puts stderr "Reading $diff responses to catch up.." + set servertag $last_response_tag + for {set i 0} {$i < $diff} {incr i} { + #JMN + set is_err [catch {getresponse $chan [incr servertag]} getresponse_result] + if {!$is_err} { + if {$getresponse_result == 0} { + puts stderr "READ read number: $i result: $getresponse_result" + } else { + puts stderr "READPROBLEM read number: $i result: $getresponse_result" + } + } else { + puts stderr "READERROR read number: $i" + puts stderr " error: $getresponse_result" + } + } + #todo retest? + puts stderr "Done - view log using 'showlog $chan'" + } elseif {$last_response_tag > $last_request_tag} { + set synctag [expr {$last_response_tag + 1}] + puts stderr "Updating client curtag to $synctag" + upvar ::punk::imap4::proto::info info + set info($chan,curtag) $synctag + puts stderr "calling NOOP" + punk::imap4::NOOP $chan + #todo - retest? + puts stderr "Done" + } else { + puts stderr "resync_tag - OK No difference detected" + } + } + # Get the current state proc state {chan} { variable info @@ -430,11 +566,54 @@ tcl::namespace::eval punk::imap4::proto { # This a general implementation for a simple implementation # of an IMAP command that just requires to call ::imap4::request # and ::imap4::getresponse. - proc simplecmd {chan command validstates args} { + lappend PUNKARGS [list { + @id -id ::punk::imap4::proto::simplecmd + @cmd -name punk::imap4::proto::simplecmd -help\ + "This is a general implementation for a simple + implementation of an IMAP command that is + composed of a a ::punk::imap4::request followed + by a punk::imap4::response" + + @leaders -min 1 -max 1 + chan -optional 0 -help\ + "existing channel for an open IMAP connection" + @opts + -validstates -default * -help\ + "A list of valid states from which this + command can be called" + @values -min 1 -max -1 + command -type string + arg -multiple 1 -optional 1 -help\ + {Each argument for the command must be + supplied in a way that preserved the form + expected by an IMAP server. + For example, if an argument has spaces it + may need to be in double quotes and so need + to be explicitly specified with quotes and a + protecting set of braces. + e.g + simplecmd EXAMINE {"mailbox name with spaces"} + If Tcl variable substitution is required, escapes + within a quoted string could be used, or string map. + e.g + simplecmd $ch SETMETADATA $b "($ann \"$val\")" + } + }] + proc simplecmd {args} { + set argd [punk::args::parse $args withid ::punk::imap4::proto::simplecmd] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set validstates [dict get $opts -validstates] + set command [dict get $values command] + set arglist [list] + if {[dict exists $received arg]} { + set arglist [dict get $values arg] + } + requirestate $chan $validstates set req "$command" - foreach arg $args { + foreach arg $arglist { append req " $arg" } @@ -516,7 +695,7 @@ tcl::namespace::eval punk::imap4::proto { } - # Process an IMAP response line. + # Process an IMAP response 'logical' line. # This function trades simplicity in IMAP commands # implementation with monolithic handling of responses. # However note that the IMAP server can reply to a command @@ -543,7 +722,7 @@ tcl::namespace::eval punk::imap4::proto { set literals {} set line "" while {1} { - # Read a line + # Read a physical line - which may be only part of the logical line if there is a 'literal' specifier if {[gets $chan buf] == -1} { error "([dict get $coninfo $chan hostname])IMAP unexpected EOF from server." } @@ -571,7 +750,7 @@ tcl::namespace::eval punk::imap4::proto { } } else { #We are at the end of a single line, - #or a sequence of 1 or more lines which had trailing literal specifiers {nnn} followed by data we have read. + #or a sequence of 1 or more physical lines which had trailing literal specifiers {nnn} followed by data we have read. break } } @@ -602,8 +781,11 @@ tcl::namespace::eval punk::imap4::proto { set info($chan,lastcode) $code } + set dirty 0 ;#review - naming as 'dirty' seems odd + #This seems to just indicate we've already matched a result as the implementation + #splits the scanning into two switch statements. + # Extract information from the line - set dirty 0 switch -glob -- $line { {*\[READ-ONLY\]*} {::punk::imap4::_set_mboxinfo $chan perm READ-ONLY; incr dirty} {*\[READ-WRITE\]*} {::punk::imap4::_set_mboxinfo $chan perm READ-WRITE; incr dirty} @@ -655,7 +837,12 @@ tcl::namespace::eval punk::imap4::proto { #cli> 1 LOGIN user pass #svr> 1 OK [CAPABILITY IMAP4rev1 ... ] User logged in SESSIONID= regexp {.*\[CAPABILITY\s+(.*)\]\s*(.*)$} $line => capstring tailstring - set info($chan,capability) [split [string toupper $capstring]] + #consider the capability: RIGHTS=kxten + #Probably inappropriate to convert to uppercase, standard rights are defined as lowercase. + #(no uppercase rights currently allowed - but perhaps that may change?) + # Unknown if there are other capabilities with lowercase values. + #set info($chan,capability) [split [string toupper $capstring]] + set info($chan,capability) [split $capstring] incr dirty if {$tailstring ne ""} { if {[dict get $coninfo $chan debug]} { @@ -671,26 +858,37 @@ tcl::namespace::eval punk::imap4::proto { if {!$dirty && $tag eq {*}} { switch -regexp -nocase -- $line { {^[0-9]+\s+EXISTS} { - regexp {^([0-9]+)\s+EXISTS} $line => mboxinfo($chan,exists) + #regexp {^([0-9]+)\s+EXISTS} $line => mboxinfo($chan,exists) + regexp {^([0-9]+)\s+EXISTS} $line => val + punk::imap4::_set_mboxinfo $chan exists $val incr dirty } {^[0-9]+\s+RECENT} { - regexp {^([0-9]+)\s+RECENT} $line => mboxinfo($chan,recent) + #DEPRECATED response for imaprev2 - should ignore? + #regexp {^([0-9]+)\s+RECENT} $line => mboxinfo($chan,recent) + regexp {^([0-9]+)\s+RECENT} $line => val + punk::imap4::_set_mboxinfo $chan recent $val incr dirty } {.*?\[UIDVALIDITY\s+[0-9]+?\]} { - regexp {.*?\[UIDVALIDITY\s+([0-9]+?)\]} $line => \ - mboxinfo($chan,uidval) + #regexp {.*?\[UIDVALIDITY\s+([0-9]+?)\]} $line => \ + # mboxinfo($chan,uidval) + regexp {.*?\[UIDVALIDITY\s+([0-9]+?)\]} $line => val + punk::imap4::_set_mboxinfo $chan uidval $val incr dirty } {.*?\[UNSEEN\s+[0-9]+?\]} { - regexp {.*?\[UNSEEN\s+([0-9]+?)\]} $line => \ - mboxinfo($chan,unseen) + #regexp {.*?\[UNSEEN\s+([0-9]+?)\]} $line => \ + # mboxinfo($chan,unseen) + regexp {.*?\[UNSEEN\s+([0-9]+?)\]} $line => val + punk::imap4::_set_mboxinfo $chan unseen $val incr dirty } {.*?\[UIDNEXT\s+[0-9]+?\]} { - regexp {.*?\[UIDNEXT\s+([0-9]+?)\]} $line => \ - mboxinfo($chan,uidnext) + #regexp {.*?\[UIDNEXT\s+([0-9]+?)\]} $line => \ + # mboxinfo($chan,uidnext) + regexp {.*?\[UIDNEXT\s+([0-9]+?)\]} $line => val + punk::imap4::_set_mboxinfo $chan uidnext $val incr dirty } {^[0-9]+\s+FETCH} { @@ -707,6 +905,19 @@ tcl::namespace::eval punk::imap4::proto { processmetadataline $chan $request_tag $line $literals #incr dirty ;#??? review } + {^MYRIGHTS\s+} { + #line eg: MYRIGHTS INBOX lrswipkxtecdan + #puts stderr "line: $line" + set words [punk::imap4::lib::imapwords $line 3] + if {[dict size $words] == 3} { + set mbox [dict get $words 1 value] + set myrights [dict get $words 2 value] + #set folderinfo($chan,myrights) + } else { + puts stderr "processline unable to make sense of MYRIGHTS response: $line" + puts stderr "words:$words" + } + } {^CAPABILITY\s+.*} { #direct response to a CAPABILITY request #e.g @@ -731,7 +942,9 @@ tcl::namespace::eval punk::imap4::proto { } } {^LIST\s*$} { - regexp {^([0-9]+)\s+EXISTS} $line => mboxinfo($chan,exists) + #regexp {^([0-9]+)\s+EXISTS} $line => mboxinfo($chan,exists) + regexp {^([0-9]+)\s+EXISTS} $line => val + punk::imap4::_set_mboxinfo $chan exists $val incr dirty } {^SEARCH\s*$} { @@ -766,7 +979,7 @@ tcl::namespace::eval punk::imap4::proto { #our lines here have had the literals separated out #so we get complete lines where the literal acts as a placeholder #e.g METADATA Junk ("/private/specialuse" {5}) - puts stderr "processmetadataline: $line" + #puts stderr "processmetadataline: $line" set words [punk::imap4::lib::imapwords $line] set msgbox [dict get $words 1 value] set resultlist [dict get $words 2 value] @@ -775,6 +988,7 @@ tcl::namespace::eval punk::imap4::proto { } set itemwords [punk::imap4::lib::imapwords [string range $resultlist 1 end-1]] ;#strip () and process contents set items [list] + #use lib::imapwords_resolved? dict for {w wordinfo} $itemwords { if {[dict get $wordinfo type] eq "literal"} { set lit [dict get $wordinfo value] @@ -888,7 +1102,7 @@ tcl::namespace::eval punk::imap4::proto { # Write a multiline request. The 'request' list must contain - # parts of command and literals interleaved. Literals are ad odd + # parts of command and literals interleaved. Literals are at odd # list positions (1, 3, ...). proc multiline_request {chan request} { variable info @@ -903,6 +1117,10 @@ tcl::namespace::eval punk::imap4::proto { } puts -nonewline $chan "$line\r\n" flush $chan + + set info($chan,lastrequest) "$line" + ::punk::imap4::system::add_conlog $chan c $request_tag line [list $line] + incr items -1 if {!$items} break @@ -918,8 +1136,10 @@ tcl::namespace::eval punk::imap4::proto { puts -nonewline $chan $literal flush $chan incr items -1 + + #REVIEW + ::punk::imap4::system::add_conlog $chan c $request_tag chunk [list [list length [string length $literal] chunk $literal]] } - set info($chan,lastrequest) $request } @@ -1064,7 +1284,9 @@ tcl::namespace::eval punk::imap4::proto { lappend req {} } - lset req 0 " $cmd[lindex $req 0]" + #Extra space between tag and command can cause NULL command error on at least some servers (cyrus) + #lset req 0 " $cmd[lindex $req 0]" + lset req 0 "$cmd[lindex $req 0]" } # Concat an already created search expression to a multiline request. @@ -1087,8 +1309,10 @@ tcl::namespace::eval punk::imap4::proto { ANSWERED - DELETED - DRAFT - FLAGGED - RECENT - SEEN - NEW - OLD - UNANSWERED - UNDELETED - - UNDRAFT - UNFLAGGED - UNSEEN - - ALL {multiline_append_command result [string toupper $token]} + UNDRAFT - UNFLAGGED - + UNSEEN { + multiline_append_command result [string toupper $token] + } BODY - CC - FROM - SUBJECT - TEXT - KEYWORD - BCC { @@ -1104,7 +1328,25 @@ tcl::namespace::eval punk::imap4::proto { multiline_concat_expr result $first multiline_concat_expr result $second } - + ALL { + #ALL messages in the mailbox: the default inital key for ANDing + #also RETURN ALL - trigger ESEARCH response code? + multiline_append_command result [string toupper $token] + } + FUZZY { + #RFC6203 + set argset [convert_search_expr [pop0 expr]] + multiline_append_command result "FUZZY" + multiline_concat_expr result $argset + } + RETURN { + set options [convert_search_expr [pop0 expr]] + multiline_append_command result "RETURN" + multiline_concat_expr result $options + } + COUNT - MIN - MAX - SAVE { + multiline_append_command result [string toupper $token] + } NOT { set e [convert_search_expr [pop0 expr]] multiline_append_command result "NOT" @@ -1114,7 +1356,6 @@ tcl::namespace::eval punk::imap4::proto { SMALLER - LARGER { set len [pop0 expr] - ##nagelfar ignore if {![string is integer $len]} { error "Invalid integer follows '$token' in IMAP search" } @@ -1142,6 +1383,64 @@ tcl::namespace::eval punk::imap4::proto { } + # ------------------------------------------------------------------------------------------------------ + #RFC2086 + set rights_2086 [dict create\ + l "lookup (mailbox i- s visible to LIST/LSUB commands)"\ + r "read (SELECT the mailbox, perform CHECK, FETCH, PARTIAL, SEARCH, COPY from mailbox)"\ + s "keep seen/unseen information across sessions (STORE SEEN flag)"\ + w "write (STORE flags other than SEEN and DELETED)"\ + i "insert (perform APPEND, COPY into mailbox)"\ + p "post (send mail to submission address for mailbox, not enforced by IMAP4 itself)"\ + c "create (CREATE new sub-mailboxes in any implementation-defined hierarchy)"\ + d "delete (STORE DELETED flag, perform EXPUNGE)"\ + a "administer (perform SETACL)"\ + ] + #c and d in 2086 have ambiguity + #RFC4314 'obsoleted' them but reclassified them as 'virtual rights' + #For backwards compatibility with clients - more modern servers MUST still include c and d in ACL/MYRIGHTS responses when appropriate. + + + #RFC4314 + set rights_4314 [dict create\ + l {lookup (mailbox is visible to LIST/LSUB commands, SUBSCRIBE mailbox)}\ + r {read (SELECT the mailbox, perform STATUS)}\ + s {keep seen/unseen information across sessions (set or clear \SEEN flag via STORE, also set \SEEN during APPEND/COPY/FETCH BODY[...])}\ + w {write (set or clear flags other than \SEEN and \DELETED via STORE, also set them during APPEND/COPY)}\ + i {insert (perform APPEND, COPY into mailbox)}\ + p {post (send mail to submission address for mailbox, not enforced by IMAP4 itself)}\ + k {create mailboxes (CREATE new sub-mailboxes in any implementation-defined hierarchy, parent mailbox for the new mailbox name in RENAME)}\ + x {delete mailbox (DELETE mailbox, old mailbox name in RENAME)}\ + t {delete messages (set or clear \DELETED flag via STORE, set \DELETED flag during APPEND/COPY)}\ + e {perform EXPUNGE and expunge as a part of CLOSE}\ + a {administer (perform SETACL/DELETEACL/GETACL/LISTRIGHTS)}\ + ] + + #some servers chose 2086 "c" to control the DELETE command + set rights_1 [dict create\ + create {k x}\ + delete {e t}\ + ] + #some servers chose 2086 "d" to control the DELETE command + set rights_2 [dict create\ + create {k}\ + delete {e t x}\ + ] + + # "n" right? RFC? + + set virtual_rights [dict create\ + d delete\ + c create\ + ] + + #TODO + proc rights_info {} { + } + + # ------------------------------------------------------------------------------------------------------ + + # Protocol error! Enter the debug mode if ::imap4::debug is true. # Otherwise just raise the error. @@ -1207,13 +1506,24 @@ tcl::namespace::eval punk::imap4 { lappend PUNKARGS [list { - @id -id ::punk::imap4::OPEN - @cmd -name punk::imap4::OPEN -help\ + @id -id ::punk::imap4::CONNECT + @cmd -name punk::imap4::CONNECT -help\ "Open a new IMAP connection and initialise the handler. Returns the Tcl channel to use in subsequent calls to - the API." + the API. Other API commands will return zero on success. + e.g + % set chan [CONNECT mail.example.com] + sock123aaa456789 + % AUTH_PLAIN $chan user pass + 0 + ... EXAMINE/CLOSE mailboxes, SEARCH, FETCH etc ... + % LOGOUT $chan + 0" @leaders -min 0 -max 0 - -debug -type boolean -default 0 + -debug -type boolean -default 0 -help\ + "Display some of the cli/server interaction on stdout + during commands. This can be set or queried using + the 'debugchan $chan ?bool?' command." -security -nocase 1 -choices {None TLS/SSL STARTTLS} -help\ "Connection security. TLS/SSL is recommended (implicit TLS). @@ -1228,8 +1538,8 @@ tcl::namespace::eval punk::imap4 { hostname -optional 0 -help\ "Host/IP Address of server. port may optionally be specified at tail of hostname - after a colon, but not if the following port argument - is also supplied and is non-zero. + after a colon, but not if the following optional port + argument to the command is also supplied and is non-zero. e.g server.example.com:143 [::1]::993 @@ -1240,8 +1550,8 @@ tcl::namespace::eval punk::imap4 { defaults to 143 when -security None or STARTTLS defaults to 993 when -security TLS/SSL or -security is omitted." }] - proc OPEN {args} { - set argd [punk::args::parse $args withid ::punk::imap4::OPEN] + proc CONNECT {args} { + set argd [punk::args::parse $args withid ::punk::imap4::CONNECT] lassign [dict values $argd] leaders opts values received set hostname [dict get $values hostname] if {[dict exists $received -security]} { @@ -1255,7 +1565,7 @@ tcl::namespace::eval punk::imap4 { } if {$arg_port != 0 && $addrport != 0} { puts stderr "Cannot specify port both in port argument as well as in hostname" - puts stderr [punk::args::usage -scheme error ::punk::imap4::OPEN] + puts stderr [punk::args::usage -scheme error ::punk::imap4::CONNECT] return } if {$addrport != 0} { @@ -1517,8 +1827,9 @@ tcl::namespace::eval punk::imap4 { #protocol callbacks to api cache namespace #msginfo #we need request_tag to determine when we have multiple values for a field - versus subsequent requests which will overwrite - #msgnum is sequence. todo UIDs separate variable? - #some headers have multipl values (SMTP traces) + #msgnum is sequence-set? + # todo UIDs separate variable? + #some headers have multiple values (SMTP traces) #also consider the somewhat contrived use of partials: # FETCH (BODY[]<0.100> BODY[]<0.10>) #These are returned in the FETCH response as "BODY[]<0> {100}" and "BODY[]<0> {10}" @@ -1625,7 +1936,9 @@ tcl::namespace::eval punk::imap4 { set opt_ignorestate [dict exists $received -ignorestate] set opt_ignorelogindisabled [dict exists $received -ignorelogindisabled] set username [dict get $values username] + set username [punk::imap4::stringprep::normal_userpass $username] set password [dict get $values password] + set password [punk::imap4::stringprep::normal_userpass $password] if {!$opt_ignorelogindisabled} { if {[punk::imap4::proto::has_capability $chan LOGINDISABLED]} { @@ -1635,7 +1948,7 @@ tcl::namespace::eval punk::imap4 { if {!$opt_ignorestate} { punk::imap4::proto::requirestate $chan NOAUTH } - set rtag [punk::imap4::proto::request $chan "LOGIN $username $password"] + set rtag [punk::imap4::proto::request $chan [list LOGIN $username $password]] if {[punk::imap4::proto::getresponse $chan $rtag] != 0} { return 1 } @@ -1686,34 +1999,178 @@ tcl::namespace::eval punk::imap4 { set allowstates NOAUTH } set username [dict get $values username] + set username [punk::imap4::stringprep::normal_userpass $username] set password [dict get $values password] + set password [punk::imap4::stringprep::normal_userpass $password] package require base64 set b64_creds [base64::encode $opt_authorization\0$username\0$password] - if {[punk::imap4::proto::simplecmd $chan "AUTHENTICATE PLAIN" {*}$allowstates $b64_creds]} { + if {[punk::imap4::proto::simplecmd $chan -validstates $allowstates AUTHENTICATE PLAIN $b64_creds]} { return 1 } set info($chan,state) AUTH return 0 } + lappend PUNKARGS [list { + @id -id ::punk::imap4::MYRIGHTS + @cmd -name punk::imap4::MYRIGHTS -help\ + "Get the set of rights that the current user + has to the mailbox. + + incomplete + Currently need debug mode or showlog + to see results" + @leaders -min 1 -max 1 + chan + @values -min 0 -max 1 + mailbox -default INBOX + }] + proc MYRIGHTS {args} { + set argd [punk::args::parse $args withid ::punk::imap4::MYRIGHTS] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + if {[punk::imap4::proto::simplecmd $chan MYRIGHTS $mailbox] != 0} { + return 1 + } + #todo - store in appropriate cache - retrieve if -inline specified? + return 0 + } + lappend PUNKARGS [list { + @id -id ::punk::imap4::GETACL + @cmd -name punk::imap4::GETACL -help\ + "Get ACL for a mailbox. + The current user must have permission to administer + the mailbox (the \"a\" right) to perform ACL commands + ie SETACL/GETACL/DELETEACL/LISTRIGHTS + + As opposed to MYRIGHTS, GETACL will return info + about other users' rights on the mailbox + (including current user) + + incomplete + Currently need debug mode or showlog + to see results" + @leaders -min 1 -max 1 + chan + @values -min 0 -max 1 + mailbox -default INBOX + }] + proc GETACL {args} { + set argd [punk::args::parse $args withid ::punk::imap4::GETACL] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + if {[punk::imap4::proto::simplecmd $chan GETACL $mailbox] != 0} { + return 1 + } + #todo - store in appropriate cache - retrieve if -inline specified? + return 0 + } + lappend PUNKARGS [list { + @id -id ::punk::imap4::SETACL + @cmd -name punk::imap4::SETACL -help\ + "Set ACL for a specified user on a mailbox. + The current user must have permission to administer + the mailbox (the \"a\" right) to perform ACL commands + ie SETACL/GETACL/DELETEACL/LISTRIGHTS" + @leaders -min 1 -max 1 + chan + @values -min 3 -max 3 + mailbox + user + rights -help\ + "A rights string consisting of zero or more rights + characters (lowercase) optionally beginning with a + \"+\" or \"-\" + e.g SETACL projectfolder other.user +cda + If the string starts with a plus, the following + rights are added to any existing rights for the + specified user. + If the string starts with a minus, the following + rights are removed from any existing rights for + the specified user. + If the string does not start with a plus or minus, + the rights replace any existing rights for the + specified user. + " + }] + proc SETACL {args} { + set argd [punk::args::parse $args withid ::punk::imap4::SETACL] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + set user [dict get $values user] + set rights [dict get $values rights] + if {[punk::imap4::proto::simplecmd $chan SETACL $mailbox $user $rights] != 0} { + return 1 + } + #todo - update appropriate cache? + return 0 + } + + lappend PUNKARGS [list { + @id -id ::punk::imap4::LISTRIGHTS + @cmd -name punk::imap4::LISTRIGHTS -help\ + "Get information about the required rights + and the optional rights for a specified user + on this mailbox. + The required rights (a possibly empty string) + are the rights that will always be granted to that + user in the mailbox. + The optional rights are rights that CAN be granted. + + incomplete + Currently need debug mode or showlog + to see results" + @leaders -min 1 -max 1 + chan + @values -min 0 -max 2 + mailbox -default INBOX + user -default anyone + }] + proc LISTRIGHTS {args} { + set argd [punk::args::parse $args withid ::punk::imap4::LISTRIGHTS] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + set user [dict get $values user] + if {[punk::imap4::proto::simplecmd $chan LISTRIGHTS $mailbox $user] != 0} { + return 1 + } + #todo - store in appropriate cache - retrieve if -inline specified? + return 0 + } + + + lappend PUNKARGS [list { @id -id ::punk::imap4::SELECT @cmd -name punk::imap4::SELECT -help\ - "Selects a mailbox so that messages in the mailbox can be + {Selects a mailbox so that messages in the mailbox can be accessed. - Only one mailbox can be selected at a time in a connection; - simultaneous access to multiple mailboxes requires multiple + Only one mailbox can be selected at a time in a connection. + This is termed a "session". + Simultaneous access to multiple mailboxes requires multiple connections. The SELECT command automatically deselects any currently selected mailbox before attempting the new selection. Consequently, if a mailbox is selected and a SELECT command that fails is attempted, no mailbox is selected. - " + } @leaders -min 1 -max 1 chan @values -min 0 -max 1 - mailbox -default INBOX + mailbox -default INBOX -help\ + {To supply a mailbox name with spaces + The value will need to be enclosed with + double quotes - and these quotes need to + be sent to the server. Enclose in curly + braces to ensure this. + e.g + SELECT $ch {"Deleted Items"} + } }] proc SELECT {args} { set argd [punk::args::parse $args withid ::punk::imap4::SELECT] @@ -1724,10 +2181,40 @@ tcl::namespace::eval punk::imap4 { selectmbox $chan SELECT $mailbox } + lappend PUNKARGS [list { + @id -id ::punk::imap4::EXAMINE + @cmd -name punk::imap4::EXAMINE -help\ + {The EXAMINE command is identical to SELECT and returns the + same output; however, the selected mailbox is identified as + read-only. No changes to the permanent state of the mailbox, + including per-user state, are permitted.} + @leaders -min 1 -max 1 + chan + @values -min 0 -max 1 + #todo - share argdefs more! + mailbox -default INBOX -help\ + {To supply a mailbox name with spaces + The value will need to be enclosed with + double quotes - and these quotes need to + be sent to the server. Enclose in curly + braces to ensure this. + e.g + SELECT $ch {"Deleted Items"} + } + }] + proc EXAMINE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::EXAMINE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + + selectmbox $chan EXAMINE $mailbox + } # General function for selection. proc selectmbox {chan cmd mailbox} { upvar ::punk::imap4::proto::info info variable mboxinfo + variable msginfo punk::imap4::proto::requirestate $chan {AUTH SELECT} # Clean info about the previous mailbox if any, @@ -1735,6 +2222,10 @@ tcl::namespace::eval punk::imap4 { #set savedmboxinfo [array get mboxinfo $chan,*] #array unset mboxinfo $chan,* dict unset mboxinfo $chan + #msginfo is based on seq-number - which is per mailbox, so we have to clear it for now. + #todo - keep cache of per mailbox msginfo even when based on seq-number? + dict unset msginfo $chan + #review - keep cache of uid based msginfo - where? set rtag [punk::imap4::proto::request $chan "$cmd $mailbox"] if {[punk::imap4::proto::getresponse $chan $rtag] != 0} { #array set mboxinfo $savedmboxinfo @@ -1742,30 +2233,81 @@ tcl::namespace::eval punk::imap4 { return 1 } + #TODO - state SELECT vs EXAMINE? set info($chan,state) SELECT + # Set the new name as mbox->current. #set mboxinfo($chan,current) $mailbox _set_mboxinfo $chan current $mailbox return 0 } - # Read-only equivalent of SELECT. - proc EXAMINE {chan {mailbox INBOX}} { - selectmbox $chan EXAMINE $mailbox + + #parse_seq-range - parse a seq-range from a sequence-set + #sequence-set + #Example: a message sequence number set of + # ; 2,4:7,9,12:* for a mailbox with 15 messages is + # ; equivalent to 2,4,5,6,7,9,12,13,14,15 + + + #parse_seq-range should be used primarily for examining sequence-set members + #when we want to determine the applicable ranges e.g to lookup cached info for each message + #When sending a sequence-set to the server, we can use parse_seq-range to check for errors, + #but we shouldn't be 'expanding' a valid sequence-set being sent to the server. + #We don't accept the : or :n or n: syntax accepted by the tcllib imap4 library + # - because the more explicit syntax specified in the IMAP RFCs is preferred + #(with possible * special value) + proc parse_seq-range {chan range} { + if {[string first , $range] >=0} { + error "parse_seq_range supplied value '$range' appears to be a sequence-set, not a seq-range or seq-number" + } + set rangelist [split $range :] + switch -- [llength $rangelist] { + 1 { + if {$range eq "*"} { + set start [mboxinfo $chan exists] + set end $start + } else { + set start $range + set end $range + } + if {![punk::imap4::proto::is_imap_nznumber $start] || ![punk::imap4::proto::is_imap_nznumber $end]} { + error "parse_seq-range Invalid range '$range'" + } + } + 2 { + lassign $rangelist start end + if {$start eq "*" && $end eq "*"} { + set end [mboxinfo $chan exists] + set start $end + } elseif {$start eq "*"} { + set start [mboxinfo $chan exists] + } elseif {$end eq "*"} { + set end [mboxinfo $chan exists] + } + if {![punk::imap4::proto::is_imap_nznumber $start] || ![punk::imap4::proto::is_imap_nznumber $end]} { + error "parse_seq-range Invalid range '$range'" + } + } + default { + error "parse_seq-range Invalid range '$range'" + } + } + return [list $start $end] } - # Parse an IMAP range, store 'start' and 'end' in the + #old_parse_seq-range + # Parse an IMAP seq-range, store 'start' and 'end' in the # named vars. If the first number of the range is omitted, # 1 is assumed. If the second number of the range is omitted, # the value of "exists" of the current mailbox is assumed. # # So : means all the messages. - proc parserange {chan range startvar endvar} { + proc old_parse_seq-range {chan range startvar endvar} { upvar $startvar start $endvar end set rangelist [split $range :] switch -- [llength $rangelist] { 1 { - ##nagelfar ignore if {![string is integer $range]} { error "Invalid range" } @@ -1780,7 +2322,6 @@ tcl::namespace::eval punk::imap4 { if {![string length $end]} { set end [mboxinfo $chan exists] } - ##nagelfar ignore if {![string is integer $start] || ![string is integer $end]} { error "Invalid range" } @@ -1796,28 +2337,35 @@ tcl::namespace::eval punk::imap4 { @cmd -name punk::imap4::FETCH -help\ "Fetch a number of attributes from messages. A mailbox must be SELECTed first and an appropriate - range supplied for the message(s) of interest." + sequence-set supplied for the message(s) of interest." @leaders -min 1 -max 1 chan @opts -inline -type none - @values -min 1 -max -1 - range -help\ - "Message sequence number set. + @values -min 2 -max -1 + #todo - use same sequence-set definition across argdefs + sequence-set -help\ + "Message sequence set. + 1 is the lowest valid sequence number. + * represents the maximum message sequence number + in the mailbox. e.g 1 - 1:3 2:2 - :3 + 1:3 + 3,5,9:10 + 1,10:* + *:5 + * " queryitems -default {} -help\ "Some common FETCH queries are shown here, but this list isn't exhaustive."\ - -multiple 1 -choiceprefix 0 -choicerestricted 0 -choicecolumns 2 -choices { + -multiple 1 -optional 0 -choiceprefix 0 -choicerestricted 0 -choicecolumns 2 -choices { ALL FAST FULL BODY BODYSTRUCTURE ENVELOPE FLAGS INTERNALDATE SIZE RFC822.SIZE UID - TEXT HEADER BODY[] + TEXT HEADER BODY[] BINARY[] BINARY.SIZE[] } -choicelabels { ALL\ " Macro equivalent to: @@ -1840,8 +2388,45 @@ tcl::namespace::eval punk::imap4 { " A parenthesized list that describes the MIME-IMB body structure of a message." {BODY[]}\ - "This retrieves the entire body including - headers" + { This retrieves the entire body including headers. + (RFC5322 expression of the entire message) + This implicitly sets the \Seen flag, as do other + FETCH BODY[...] operations. Ensure the mailbox is + opened using EXAMINE, or use BODY.PEEK[...] to avoid + this.} + {BINARY[]}\ + { Requests that the specified section be transmitted + after performing decoding of the section's + Content-Transfer-Encoding. + Like BODY[...] it will set the \Seen flag and also + has a BINARY.PEEK[...] alternate form. + Can only be requested for leaf body parts: those that + have media types other than multipart/*, + message/rfc822, or message/global.} + {BINARY.SIZE[]}\ + { Requests the decoded size fo the section (i.e , the + size to expect in response to the corresponding + FETCH BINARY request). + Only available for leaf body parts. + Can be an expensive operation on some servers. + } + RFC822.SIZE\ + { Number of octets in the message when the message + is expressed in RFC5322 format. SHOULD match the + result of a "FETCH BODY[]" command. Some servers + may store with different internal format and store + the size to avoid recalculation.} + SIZE\ + { Client-side alias for RFC822.SIZE for consistency + with tcllib IMAP4. Consider deprecating.} + ENVELOPE\ + " The envelope structure of the message. + Computed by the server by parsing the RFC5322 + header defaulting various fields as necessary" + INTERNALDATE\ + " The internal date of the message. + (Suitable as date arg for APPEND if copying a msg + from one server to another)" } }] proc FETCH {args} { @@ -1851,11 +2436,15 @@ tcl::namespace::eval punk::imap4 { set chan [dict get $leaders chan] set opt_inline [dict exists $received -inline] - set range [dict get $values range] + set sequenceset [dict get $values sequence-set] set query_items [dict get $values queryitems] punk::imap4::proto::requirestate $chan SELECT - parserange $chan $range start end + + #parse each seqrange to give it a chance to raise error for bad values + foreach seqrange [split $sequenceset ,] { + parse_seq-range $chan $seqrange + } set items {} set hdrfields {} @@ -1948,9 +2537,9 @@ tcl::namespace::eval punk::imap4 { # #don't wrap a single element in brackets - it may already be bracketed by the caller #for ALL FAST FULL - which can only occur on their own, bracketing is not allowed anyway. - set request_tag [punk::imap4::proto::request $chan "FETCH $start:$end [lindex $items 0]"] + set request_tag [punk::imap4::proto::request $chan "FETCH $sequenceset [lindex $items 0]"] } else { - set request_tag [punk::imap4::proto::request $chan "FETCH $start:$end ([join $items])"] + set request_tag [punk::imap4::proto::request $chan "FETCH $sequenceset ([join $items])"] } if {[punk::imap4::proto::getresponse $chan $request_tag] != 0} { if {$opt_inline} { @@ -2183,21 +2772,67 @@ tcl::namespace::eval punk::imap4 { proc NOOP {args} { set argd [punk::args::parse $args withid ::punk::imap4::NOOP] set chan [dict get $argd leaders chan] - punk::imap4::proto::simplecmd $chan NOOP * {} + punk::imap4::proto::simplecmd $chan NOOP } # CHECK. Flush to disk. - proc CHECK {chan} { - punk::imap4::proto::simplecmd $chan CHECK SELECT {} + lappend PUNKARGS [list { + @id -id ::punk::imap4::CHECK + @cmd -name punk::imap4::CHECK -help\ + "OBSOLETED in RFC9051. + NOOP should generally be used instead. + + The CHECK command requests a checkpoint of the currently + selected mailbox. + This was for implementation dependent housekeeping associated + with the mailbox. + " + @leaders -min 1 -max 1 + chan -optional 0 + @opts + @values -min 0 -max 0 + }] + proc CHECK {args} { + set argd [punk::args::parse $args withid ::punk::imap4::CHECK] + set chan [dict get $argd leaders chan] + punk::imap4::proto::simplecmd $chan -validstates {SELECT} CHECK } # Close the mailbox. Permanently removes \Deleted messages and return to # the AUTH state. - proc CLOSE {chan} { + lappend PUNKARGS [list { + @id -id ::punk::imap4::CLOSE + @cmd -name punk::imap4::CLOSE -help\ + {The CLOSE command permanently removes all messages that have the + \Deleted flag set from the currently selected mailbox, and it returns + to the authenticated state from the selected state. No untagged + EXPUNGE responses are sent. + + No messages are removed, and no error is given, if the mailbox is + selected by an EXAMINE command or is otherwise selected as read-only. + + Even if a mailbox is selected, a SELECT, EXAMINE, or LOGOUT command + MAY be issued without previously issuing a CLOSE command. The + SELECT, EXAMINE, and LOGOUT commands implicitly close the currently + selected mailbox without doing an expunge. However, when many + messages are deleted, a CLOSE-LOGOUT or CLOSE-SELECT sequence is + considerably faster than an EXPUNGE-LOGOUT or EXPUNGE-SELECT because + no untagged EXPUNGE responses (which the client would probably + ignore) are sent.} + @leaders -min 1 -max 1 + chan -optional 0 + @opts + @values -min 0 -max 0 + }] + proc CLOSE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::CLOSE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + upvar ::punk::imap4::proto::info info variable mboxinfo - if {[punk::imap4::proto::simplecmd $chan CLOSE SELECT {}]} { + if {[punk::imap4::proto::simplecmd $chan -validstates {SELECT} CLOSE]} { return 1 } @@ -2257,7 +2892,7 @@ tcl::namespace::eval punk::imap4 { } #todo - limit to imap4 rev2+? - if {[punk::imap4::proto::simplecmd $chan UNSELECT {*}$allowstates {}]} { + if {[punk::imap4::proto::simplecmd $chan -validstates $allowstates UNSELECT]} { return 1 } #array set mboxinfo {} ;#JMN @@ -2267,22 +2902,65 @@ tcl::namespace::eval punk::imap4 { } proc NAMESPACE {chan} { - punk::imap4::proto::simplecmd $chan NAMESPACE * + punk::imap4::proto::simplecmd $chan NAMESPACE } # Create a new mailbox. #todo - allow creation with specialuse metadata if # CREATE-SPECIAL-USE capability is present - proc CREATE {chan mailbox} { - punk::imap4::proto::simplecmd $chan CREATE {AUTH SELECT} $mailbox + lappend PUNKARGS [list { + @id -id "::punk::imap4::CREATE" + @cmd -name "punk::imap4::CREATE" -help\ + "Create a mailbox with the given name. + It is an error to attempt to create INBOX + or a name that refers to an existing mailbox. + Servers will generally allow creation of a + hierarchy of mailboxes if the mailbox separator + is within the name." + @leaders -min 1 -max 1 + chan + @opts + @values -min 1 -max 1 + mailbox + }] + proc CREATE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::CREATE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT} CREATE $mailbox } + + # RFC 5464 The IMAP METADATA Extension # ------------------------------------------------------------ # - RFC6154 IMAP LIST Extension for Special-use Mailboxes # - other mailbox 'annotations' ? # - relevant CAPS: SPECIAL-USE CREATE-SPECIAL-USE LIST-EXTENDED # ------------------------------------------------------------ - proc GETMETADATA {chan mailbox annotation} { + lappend PUNKARGS [list { + @id -id "::punk::imap4::GETMETADATA" + @cmd -name "punk::imap4::GETMETDATA" -help\ + "Get metadata on named mailbox, or server annotations + if empty-string provided instead of mailbox name." + @leaders -min 1 -max 1 + chan + @opts + @values -min 2 -max 2 + mailbox -help\ + {Mailbox name or empty string {""} for server annotations} + annotation -choicerestricted 0 -help\ + "May include glob character *"\ + -choices { + /private/specialuse /private/squat /private/sieve /private/sharedseen /private/comment + /private/expire /private/news2mail /private/pop3showafter + } -help\ + "Annotation is a string beginning with /private/ or /shared/ + Check specific server for supported mailbox annotations. + " + }] + proc GETMETADATA {args} { #on cyrus at least, annotation must begin with /shared or /private #e.g /private/specialuse #C: GETMETDATA "Foldername" /private/specialuse @@ -2294,20 +2972,35 @@ tcl::namespace::eval punk::imap4 { #S: \Junk #S: ) #S: OK Completed + set argd [punk::args::parse $args withid ::punk::imap4::GETMETADATA] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + set annotation [dict get $values annotation] + set annotation [string trim $annotation] if {![string match "/private/?*" $annotation] && ![string match "/shared/?*" $annotation]} { + #cyrus IMAP enforces this anyway.. others? can we ever send just the following? GETMETADATA name * error "GETMETADATA annotation must begin with /shared/ or /private/" } - punk::imap4::proto::simplecmd $chan GETMETADATA {AUTH SELECT} $mailbox $annotation + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} GETMETADATA $mailbox $annotation } lappend PUNKARGS [list { @id -id "::punk::imap4::SETMETADATA" @cmd -name "punk::imap4::SETMETDATA" -help\ - "Set metadata on mailbox" + "Set metadata on mailbox name. + + If an empty string is provided instead of the + mailbox name - the annotation is applied at + the server level. Users may be able to set + /private or /shared annotations at the server + level depending on how the server restricts + them." @leaders -min 1 -max 1 chan @opts + -ignorestate -type none @values -min 3 -max 3 mailbox annotation -choicerestricted 0 -choices { @@ -2324,6 +3017,11 @@ tcl::namespace::eval punk::imap4 { set argd [punk::args::parse $args withid ::punk::imap4::SETMETADATA] lassign [dict values $argd] leaders opts values received set chan [dict get $leaders chan] + if {[dict exists $received -ignorestate]} { + set ignorestate 1 + } else { + set ignorestate 0 + } set mailbox [dict get $values mailbox] set annotation [dict get $values annotation] set value [dict get $values value] @@ -2332,38 +3030,121 @@ tcl::namespace::eval punk::imap4 { if {![string match /private/?* $annotation] && ![string match /shared/?* $annotation]} { error "SETMETADATA annotation must begin with /shared/ or /private/" } + if {$ignorestate} { + set validstates * + } else { + set validstates {AUTH SELECT EXAMINE} + } if {$value in [list "" NIL]} { - punk::imap4::proto::simplecmd $chan SETMETADATA {AUTH SELECT} $mailbox "($annotation NIL)" + punk::imap4::proto::simplecmd $chan -validstates $validstates SETMETADATA $mailbox "($annotation NIL)" } else { - punk::imap4::proto::simplecmd $chan SETMETADATA {AUTH SELECT} $mailbox "($annotation \"$value\")" + punk::imap4::proto::simplecmd $chan -validstates $validstates SETMETADATA $mailbox "($annotation \"$value\")" } } # ------------------------------------------------------------ - # Delete a mailbox + lappend PUNKARGS [list { + @id -id "::punk::imap4::DELETE" + @cmd -name "punk::imap4::DELETE" -help\ + "Permanently delete the mailbox with the + given name. + Server behaviour may vary with regards + to when/if mailboxes with sub-boxes can + be deleted. + If the mailbox is successfully deleted, + all messages in that mailbox are removed. + Todo - document more." + @leaders -min 1 -max 1 + chan + @opts + @values -min 1 -max 1 + mailbox + }] proc DELETE {chan mailbox} { - punk::imap4::proto::simplecmd $chan DELETE {AUTH SELECT} $mailbox + set argd [punk::args::parse $args withid ::punk::imap4::DELETE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} DELETE $mailbox } - # Rename a mailbox - proc RENAME {chan oldname newname} { - punk::imap4::proto::simplecmd $chan RENAME {AUTH SELECT} $oldname $newname + lappend PUNKARGS [list { + @id -id "::punk::imap4::RENAME" + @cmd -name "punk::imap4::RENAME" -help\ + "Rename a mailbox. + It is an error to attempt to rename from a mailbox + name that does not exist or to a mailbox name that + already exists. + Some servers will allow renaming INBOX - but with + special behaviour - moving all messages in INBOX + to a folder with the given name, leaving INBOX + empty - except that submailboxes of INBOX (if any) + are not moved." + @leaders -min 1 -max 1 + chan + @opts + @values -min 2 -max 2 + oldname + newname + }] + proc RENAME {args} { + set argd [punk::args::parse $args withid ::punk::imap4::SUBSCRIBE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set oldname [dict get $values oldname] + set newname [dict get $values newname] + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} RENAME $oldname $newname } - # Subscribe to a mailbox - proc SUBSCRIBE {chan mailbox} { - punk::imap4::proto::simplecmd $chan SUBSCRIBE {AUTH SELECT} $mailbox + lappend PUNKARGS [list { + @id -id "::punk::imap4::SUBSCRIBE" + @cmd -name "punk::imap4::SUBSCRIBE" -help\ + "Add the specified mailbox name to the server's set + of \"active\" or \"subscribed\" mailboxes as returned + by the LIST (SUBSCRIBED) command. + + Some servers may maintain a mailbox name in its + subscribed list even if the mailbox doesn't always + exist. e.g a system-alerts mailbox that is created + and removed as necessary. + " + @leaders -min 1 -max 1 + chan + @opts + @values -min 1 -max 1 + mailbox + }] + proc SUBSCRIBE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::SUBSCRIBE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} SUBSCRIBE $mailbox } - # Unsubscribe to a mailbox - proc UNSUBSCRIBE {chan mailbox} { - punk::imap4::proto::simplecmd $chan UNSUBSCRIBE {AUTH SELECT} $mailbox + lappend PUNKARGS [list { + @id -id "::punk::imap4::UNSUBSCRIBE" + @cmd -name "punk::imap4::UNSUBSCRIBE" -help\ + "Unsubscribe to a mailbox" + @leaders -min 1 -max 1 + chan + @opts + @values -min 1 -max 1 + mailbox + }] + proc UNSUBSCRIBE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::UNSUBSCRIBE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $values mailbox] + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} UNSUBSCRIBE $mailbox } #TODO proc IDLE {chan} { - if {[punk::imap4::prot::has_capability $chan IDLE]} { - punk::imap4::proto::simplecmd $chan IDLE {AUTH SELECT} + if {[punk::imap4::proto::has_capability $chan IDLE]} { + punk::imap4::proto::simplecmd $chan -validstates {AUTH SELECT EXAMINE} IDLE } else { error "IMAP SERVER has NOT advertised the capability IDLE." } @@ -2396,7 +3177,7 @@ tcl::namespace::eval punk::imap4 { -inline -type none @values -min 0 -max 2 ref -default "" - mbox -default "*" + mailboxpattern -default "*" }] # List of folders proc FOLDERS {args} { @@ -2408,7 +3189,7 @@ tcl::namespace::eval punk::imap4 { set opt_inline [dict exists $received -inline] set opt_ignorestate [dict exists $received -ignorestate] set ref [dict get $values ref] - set mbox [dict get $values mbox] + set mbox [dict get $values mailboxpattern] array unset folderinfo $chan,* @@ -2422,29 +3203,30 @@ tcl::namespace::eval punk::imap4 { # parray folderinfo #set rv [punk::imap4::proto::simplecmd $chan LIST $allowstates \"$ref\" \"$mbox\"] if {[has_capability $chan SPECIAL-USE]} { - set rv [punk::imap4::proto::simplecmd $chan LIST $allowstates \"$ref\" \"$mbox\" RETURN (SPECIAL-USE SUBSCRIBED)] + set rv [punk::imap4::proto::simplecmd $chan -validstates $allowstates LIST \"$ref\" \"$mbox\" RETURN {(SPECIAL-USE SUBSCRIBED)}] } else { - set rv [punk::imap4::proto::simplecmd $chan LIST $allowstates \"$ref\" \"$mbox\" RETURN (SUBSCRIBED)] - } - if {$opt_inline} { - set rv {} - foreach f [folderinfo $chan flags] { - set lflags {} - foreach fl [lindex $f 1] { - #review - here we are converting things like {\HasNoChildren} to {hasnochildren} - #This may be desirable from a tcl script user's point of view - but may also - #be a surprise for those expecting the exact IMAP flags. todo? - if {[string is alnum [string index $fl 0]]} { - lappend lflags [string tolower $fl] - } else { - lappend lflags [string tolower [string range $fl 1 end]] - } + set rv [punk::imap4::proto::simplecmd $chan -validstates $allowstates LIST \"$ref\" \"$mbox\" RETURN (SUBSCRIBED)] + } + if {!$opt_inline} { + return $rv + } + + set inlineresult {} + foreach f [folderinfo $chan flags] { + set lflags {} + foreach fl [lindex $f 1] { + #review - here we are converting things like {\HasNoChildren} to {hasnochildren} + #This may be desirable from a tcl script user's point of view - but may also + #be a surprise for those expecting the exact IMAP flags. todo? + if {[string is alnum [string index $fl 0]]} { + lappend lflags [string tolower $fl] + } else { + lappend lflags [string tolower [string range $fl 1 end]] } - lappend rv [list [lindex $f 0] $lflags] } + lappend inlineresult [list [lindex $f 0] $lflags] } - # parray folderinfo - return $rv + return $inlineresult } @@ -2454,8 +3236,8 @@ tcl::namespace::eval punk::imap4 { error "missing arguments. Usage: search chan arg ?arg ...?" } - punk::imap4::proto::requirestate $chan SELECT - set imapexpr [convert_search_expr $args] + punk::imap4::proto::requirestate $chan {SELECT EXAMINE} + set imapexpr [punk::imap4::proto::convert_search_expr $args] punk::imap4::proto::multiline_prefix_command imapexpr "SEARCH" punk::imap4::proto::multiline_request $chan $imapexpr if {[punk::imap4::proto::getresponse $chan]} { @@ -2616,38 +3398,173 @@ tcl::namespace::eval punk::imap4 { # proc ::imap4::store # proc ::imap4::logout (need to clean both msg and mailbox info arrays) - # Amend the flags of a message to be updated once CLOSE/EXPUNGE is initiated - proc STORE {chan range key values} { - set valid_keys { - FLAGS - FLAGS.SILENT - +FLAGS - +FLAGS.SILENT - -FLAGS - -FLAGS.SILENT - } - if {$key ni $valid_keys} { - error "Invalid data item: $key. Must be one of [join $valid_keys ,]" - } - parserange $chan $range start end - set newflags {} - foreach val $values { - if {[regexp {^\\+(.*?)$} $val]} { - lappend newflags $values - } else { - lappend newflags "\\$val" - } + # Amend the flags of a message to be updated once CLOSE/EXPUNGE is initiated ;#obsolete? + #STORE of a flag should be imediately reflected in the server state. + #\Recent is imaprev1 only (deprecated) - but in any case, is read-only + #The UID SEARCH mechanism should now be used instead of looking for \Recent flag on the mailbox + #or the untagged response: * RECENT + #UID SEARCH UID > + #The \Recent flag may exist on messages - but is optional + lappend PUNKARGS [list { + @id -id ::punk::imap4::STORE + @cmd -name punk::imap4::STORE -help\ + "Alters data associated with a message (or messages) in the mailbox. + + The .SILENT suffix for the storetype arg indicates the client is not + requesting an untagged FETCH response indicating the new state of + the flags; however, even in it's presence, servers should send an + untagged FETCH response if an external change to the flags is + observed (e.g changed by another client/session) + " + @leaders -min 1 -max 1 + chan -optional 0 -help\ + "existing channel for an open IMAP connection" + @values -min 2 -max 3 + sequence-set -help\ + "A message sequence set such as: + 1:1 + 2:4 + *:3 + 1,3,5,7:9 + " + storetype -default +FLAGS -choicecolumns 1 -choices {+FLAGS +FLAGS.SILENT -FLAGS -FLAGS.SILENT FLAGS FLAGS.SILENT}\ + -choicelabels { + +FLAGS\ + "Add the supplied flagnames to the flags for the message. + The new value of the flags is returned as if a FETCH of + those flags was done." + +FLAGS.SILENT\ + "Equivalent to FLAGS, but without returning the new value." + -FLAGS\ + "Remove the supplied flagnames from the flags for the + message. The new value of the flags is returned as if a + FETCH of those flags was done." + -FLAGS.SILENT\ + "Equivalent to -FLAGS, but without returning a new value." + FLAGS\ + "REPLACE the flags for the message with the suplied + flagnames. The new value of the flags is returned as if + a FETCH of those flags was done." + FLAGS.SILENT\ + "Equivalent to FLAGS, but without returning a new value." + } -help\ + "The type of STORE operation to be performed on the upplied flagnames" + flagname -multiple 1 -choicecolumns 2 -choicerestricted 0 -choicegroups { + SystemFlags {{\Deleted} {\Flagged} {\Seen} {\Answered} {\Draft}} + Keywords9051 {{$MDNSent} {$Forwarded} {$Junk} {$NotJunk} {$Phishing}} + OtherKeywords {{$Important} {$Submitted} {$SubmitPending}} + Obsolete {{\Recent}} + }\ + -choicelabels { + {\Seen}\ + { Message has been read} + {\Answered}\ + { Message has been answered} + {\Flagged}\ + { Message is "flagged" for urgent/special attention} + {\Deleted}\ + { Message is "deleted" for removal by later EXPUNGE} + {\Draft}\ + { Message has not completed composition (marked as a + draft).} + {\Recent}\ + { This flag was in use in IMAP4rev1 and was deprecated + in RFC9051} + $Forwarded\ + " Message has been forwarded to another email address + by being embedded within, or attached to a new message. + An email client sets this keyword when it successfully + forwards the message to another email address. Typical + usage of this keyword is to show a different (or + additional) icon for a message that has been forwarded. + Once set, the flag SHOULD NOT be cleared." + $MDNSent\ + " Message Disposition Notification [RFC8098] was + generated and sent for this message. See [RFC3503] for + more details on how this keyword is used and for + requirements on clients and servers." + $Junk\ + " The user (or a delivery agent on behalf of the user) + may choose to mark a message as definitely containing + junk. The $Junk keyword can be used to mark, group, + or hide undesirable messages (and such messages might + be removed or deleted later)." + $NotJunk\ + " The user (or a delivery agent on behalf of the user) + may choose to mark a message as definitely not + containing junk. The $NotJunk keyword can be used to + mark, group, or show messages that the user wants to + see." + $Phishing\ + " The $Phishing keyword can be used by a delivery agent + to mark a message as highly likely to be a phishing + email. A message that's determined to be a phishing + email by the delivery agent should also be considered + junk email and have the appropriate junk filtering + applied, including setting the $Junk flag and placing + the message in the \Junk special-use mailbox if + available" + } -help\ + {Each supplied value is a system flag such as \Seen \Deleted etc or a + keyword/user-defined flag (a name not beginning with a backslash) + The items listed as Keywords9051 are mentioned in RFC9051 as SHOULD be supported + by servers. See also registered keywords: + https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml + } + }] + proc STORE {args} { + set argd [punk::args::parse $args withid ::punk::imap4::STORE] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set sequenceset [dict get $values sequence-set] + set storetype [dict get $values storetype] + set flagnames [dict get $values flagname] ;#multiple + + set ranges [split $sequenceset ,] + #parse each seq-range to give a chance to raise error + foreach range $ranges { + parse_seq-range $chan $range } - set clitag [punk::imap4::proto::request $chan "STORE $start:$end $key ([join $newflags])"] + + #review - do we need any client side validation? Duplicates only? + #What about presence of inconsistent flags $Junk $NotJunk? + #probably just best to let the server sort it out + #set validatedflags {} + #foreach fname $flagnames { + # if {[regexp {^\\+(.*?)$} $fname]} { + # #system flag - restrict? + # lappend validatedflags "\\$fname" + # } else { + # #user-defined flag - any name that does not start with a backslash + # lappend validatedflags $fname + # } + #} + set clitag [punk::imap4::proto::request $chan "STORE $sequenceset $storetype ([join $flagnames])"] if {[punk::imap4::proto::getresponse $chan $clitag]} { return 1 } return 0 } - # Logout - proc LOGOUT {chan} { - if {[punk::imap4::proto::simplecmd $chan LOGOUT * {}]} { + lappend PUNKARGS [list { + @id -id ::punk::imap4::LOGOUT + @cmd -name punk::imap4::LOGOUT -help\ + "End the connection cleanly. + + This disconnects from the server and reads the untagged BYE response + from the server. + It also tidies up client state associated with the channel." + @leaders -min 1 -max 1 + chan -optional 0 + @opts + @values -min 0 -max 0 + }] + proc LOGOUT {args} { + set argd [punk::args::parse $args withid ::punk::imap4::LOGOUT] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + + if {[punk::imap4::proto::simplecmd $chan LOGOUT]} { # clean out info arrays variable folderinfo variable mboxinfo @@ -2670,23 +3587,84 @@ tcl::namespace::eval punk::imap4 { return 0 } - # Expunge : force removal of any messages with the - # flag \Deleted + #Permanently removes all messages that have the \Deleted flag + #set from the currently selected mailbox. proc EXPUNGE {chan} { - if {[punk::imap4::proto::simplecmd $chan EXPUNGE SELECT {}]} { + #Cannot call from EXAMINE state + if {[punk::imap4::proto::simplecmd $chan -validstates {SELECT} EXPUNGE]} { return 1 } return 0 } # copy : copy a message to a destination mailbox - proc COPY {chan msgid mailbox} { - if {[punk::imap4::proto::simplecmd $chan COPY SELECT [list $msgid $mailbox]]} { + lappend PUNKARGS [list { + @id -id ::punk::imap4::COPY + @cmd -name punk::imap4::COPY -help\ + "Copies the specified message(s) to the end + of the destination mailbox. + The server SHOULD preserve the flags and + internal date of the message(s) in the copy." + @leaders -min 1 -max 1 + chan + @values -min 2 -max 2 + sequence-set + mailbox + }] + proc COPY {args} { + set argd [punk::args::parse $args withid :punk::imap4::COPY] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set sequenceset [dict get $values sequence-set] + set mailbox [dict get $values mailbox] + if {[punk::imap4::proto::simplecmd $chan -validstates {SELECT EXAMINE} COPY $sequenceset $mailbox]} { return 1 } return 0 } + lappend PUNKARGS [list { + @id -id ::punk::imap4::APPEND + @cmd -name punk::imap4::APPEND -help\ + "EXPERIMENTAL - incomplete" + @leaders -min 2 -max 4 + chan + mailbox + #The API is a little clunky because the IMAP function has optional interim arguments between mailbox and message. + #We can only put flags after all leaders - which can make this function + #appear inconsistent with others where options always come after chan. + #This is a somewhat deliberate limitation of punk::args - it is intended to provide a simple understandable model + #covering most use-cases - not totally freeform mixes of options between other arguments - especially with optional + #non-flag arguments. (efficiency and complexity and unambiguity regarding values starting with - are important considerations) + #e.g "func a -opt1 o1 b? c? d e" is not supported. + #(optional non-flag args must be at end of leaders or values - and opts must be between those 2 sets.) + #so instead we will use the equiv of "func a b? c? -opt1 o1 d e" + flaglist -default {} -optional 1 -type list -help\ + {List of flags such as \Seen \Flagged} + datetime -default "" -optional 1 -type string + @opts + @values -min 1 -max 1 + message + }] + proc APPEND {args} { + set argd [punk::args::parse $args withid ::punk::imap4::APPEND] + lassign [dict values $argd] leaders opts values received + set chan [dict get $leaders chan] + set mailbox [dict get $leaders mailbox] + set flaglist [dict get $leaders flaglist] + set datetime [dict get $leaders datetime] + set message [dict get $values message] + + #todo - send as single synchronizing literal after getting server's continuation (or non-synchronising literals) + + return 1 + #if {[punk::imap4::proto::simplecmd $chan -validstates {SELECT EXAMINE} APPEND $mailbox]} { + # return 1 + #} + #return 0 + } + + #ascii art from RFC3501/RFC9051 proc rfc_diagram {} { punk::args::lib::tstr { @@ -2739,6 +3717,12 @@ tcl::namespace::eval punk::imap4 { } } + #FROM RFC9051 + #"Session" refers to the sequence of client/server interaction from + #the time that a mailbox is selected (SELECT or EXAMINE command) until + #the time that selection ends (SELECT or EXAMINE of another mailbox, + #CLOSE command, UNSELECT command, or connection termination). + #*** !doctools #[list_end] [comment {--- end definitions namespace punk::imap4 ---}] } @@ -3193,6 +4177,28 @@ tcl::namespace::eval punk::imap4::lib { return $words } + #taking an existing words dict that may contain type = literal entries (value = {n}) + # and a list of the previously read literals + # stitch them together + proc imapwords_resolved {words literals} { + dict for {wordindex wordinfo} $words { + if {[dict get $wordinfo type] eq "literal"} { + set lit [dict get $wordinfo value] + set litinner [string range $lit 1 end-1] + #server does not send non-synchronizing literals e.g {123+} + set resolved_value [::lpop literals 0] + if {[punk::imap4::proto::is_imap_number64 $litinner] && [string length $resolved_value] == $litinner} { + dict set words $wordindex value $resolved_value + } else { + #protoerror $chan "IMAP: METADATA malformed response ($lit mismatch size of literal [string length $val]) '$line'" + } + + dict set words $wordindex type resolvedliteral + } + } + return $words ;#resolved words where type 'literal' has been replaced with 'resolvedliteral' + } + #firstword_basic and secondword_basic don't handle IMAP structures such as lists etc proc firstword_basic {line} { if {[regexp -indices -start 0 {\S+} $line range]} { @@ -3424,7 +4430,7 @@ if {[info script] eq $argv0} { set folder [dict get $values folder] # open and login ... - set imap [punk::imap4::OPEN {*}$debugflags -security $opt_security $server $opt_port] + set imap [punk::imap4::CONNECT {*}$debugflags -security $opt_security $server $opt_port] punk::imap4::AUTH_LOGIN $imap $user $pass punk::imap4::select $imap $folder @@ -3439,7 +4445,7 @@ if {[info script] eq $argv0} { set fields {from: to: subject: size} # fetch 3 records (at most)) inline set max [expr {$num_mails<=3?$num_mails:3}] - foreach rec [punk::imap4::FETCH $imap :$max -inline {*}$fields] { + foreach rec [punk::imap4::FETCH $imap 1:$max -inline {*}$fields] { puts -nonewline "#[incr idx])" for {set j 0} {$j<[llength $fields]} {incr j} { puts "\t[lindex $fields $j] [lindex $rec $j]" diff --git a/src/modules/punk/netbox-999999.0a1.0.tm b/src/modules/punk/netbox-999999.0a1.0.tm new file mode 100644 index 00000000..84449643 --- /dev/null +++ b/src/modules/punk/netbox-999999.0a1.0.tm @@ -0,0 +1,1228 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2025 +# +# @@ Meta Begin +# Application punk::netbox 999999.0a1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::netbox 0 999999.0a1.0] +#[copyright "2025"] +#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[require punk::netbox] +#[keywords module] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::netbox +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::netbox +#[list_begin itemized] + +package require Tcl 8.6- +package require http +package require rest +package require punk::args +#*** !doctools +#[item] [package {Tcl 8.6}] +#[item] [package {http}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::netbox::class { + #*** !doctools + #[subsection {Namespace punk::netbox::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::netbox {} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::netbox::system { + #*** !doctools + #[subsection {Namespace punk::netbox::system}] + #[para] Internal functions that are not part of the API + + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + + punk::args::define { + @id -id ::punk::netbox::system::make_rest_func + @leaders -min 2 -max 2 + commandname -help\ + "Fully qualified commandname. + There must be an existing punk::args definition with @id + directive -id matching the name" + endpoint -help\ + "The subpath to be appended to the base url. + e.g api/ipam/ip-addresses/" + -verb -default get -choices {get post patch head put delete} + -body -default optional -choicecolumns 2 -choices {none optional required mime_multipart}\ + -choicelabels { + none\ + " The call has no request body, + none must be supplied." + optional\ + " A request body can be supplied, + but is not required" + required\ + " A request body must be supplied." + mime_multipart\ + " A request body must be supplied + and will be interpreted as each + argument representing one part of + a mime/multipart document. Arguments + must be lists containing 2 elements, + a list of header keys and values, + and the mime part body, in this order." + } + } + proc make_rest_func {args} { + set argd [punk::args::parse $args withid ::punk::netbox::system::make_rest_func] + lassign [dict values $argd] leaders opts values received + + set commandname [dict get $leaders commandname] + set endpoint [dict get $leaders endpoint] + set verb [dict get $opts -verb] + set body [dict get $opts -body] + + set custom [dict create\ + %commandname% $commandname\ + %endpoint% $endpoint\ + %verb% $verb\ + %body% $body\ + %showdict% {!@@results @@results/@*/@*.@*}\ + %showdict2% {@@results/@*/@*.@* !@@results}\ + ] + if {$commandname eq "::punk::netbox::status"} { + #we get duplicate django-version for %showdict% - todo - something. + dict set custom %showdict% {@@django-version @@installed-apps/@*.@* !@@installed-apps} + dict set custom %showdict2% {@@installed-apps/@*.@* !@@installed-apps} + } + + set procbody [string map $custom { + set argd [punk::args::parse $args withid %commandname%] + lassign [dict values $argd] leaders opts values received + set apicontextid [dict get $leaders apicontextid] + if {[dict exists $received -RETURN]} { + set returntype [dict get $opts -RETURN] + } else { + if {[dict exists $opts -RETURN]} { + #not received - but has default + set returntype [dict get $opts -RETURN] + } else { + #fallback if -RETURN was defined without a default or was omitted + set returntype dict + } + } + + set query [dict create] + dict for {k v} $opts { + switch -- $k { + -CUSTOM_PARAM { + foreach custval $v { + lassign $custval param value + dict set query $param $value + } + } + -RETURN { + #ignore - already handled + } + default { + if {[string match *_FILTER $k]} { + set field [string range [string tolower [lindex [split $k _] 0]] 1 end] ;# -NAME_FILTER -> name + foreach fv $v { + lassign $fv filter value + dict set query ${field}__$filter $value + } + } else { + dict set query [string range $k 1 end] $v + } + } + } + + } + set body %body% + switch -- $body { + required { + set requestbody [dict get $values body] + } + optional { + if {[dict exists $received body]} { + set requestbody [dict get $values body] + } else { + set requestbody "" + } + } + } + upvar ::punk::netbox::contexts contexts + if {![dict exists $contexts $apicontextid]} { + error "specified contextid '$apicontextid' not found" + } + set config [dict create\ + format json\ + result json\ + ] + #rest api documentation is unclear on 'result' field + #note our default: result json + #this actually converts the json to a dict + + + dict set config headers [list Authorization [list Token [dict get $contexts $apicontextid token value]]] + if {$returntype eq "json"} { + #if we set result json - we get a dict instead of json :/ + dict set config result raw + } + + #variable headerdict + #set config [dict create\ + # headers $headerdict\ + #] + + set url [dict get $contexts $apicontextid url value] + puts "${url}%endpoint% '$query' '$config'" + if {$body in {required optional}} { + set result [::rest::%verb% ${url}%endpoint% $query $config $requestbody] + } else { + set result [::rest::%verb% ${url}%endpoint% $query $config] + } + switch -exact -- $returntype { + showdict { + #return [punk::lib::showdict $result !@@results @@results/@*/@*.@*] + return [punk::lib::showdict $result %showdict%] + } + showdict2 { + #return [punk::lib::showdict $result @@results/@*/@*.@* !@@results] + return [punk::lib::showdict $result %showdict2%] + } + default { + #dict or json - the counterintuitive 'result' field above sets this + return $result + } + } + }] + proc $commandname {args} $procbody + } + +} + +tcl::namespace::eval punk::netbox { + # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + # Base namespace + # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + #*** !doctools + #[subsection {Namespace punk::netbox}] + #[para] Core API functions for punk::netbox + #[list_begin definitions] + + variable PUNKARGS + + variable has_tls + set has_tls [expr {![catch {package require tls}]}] + + if {$has_tls} { + ::http::register https 443 ::tls::socket + } + + variable ipam + + #TEMP - todo + variable headerdict + set headerdict [dict create\ + Authorization "Token af65b993000874eaefeca0fa02b0d86014e48365"\ + ] + #temp + variable url https://www.netbox1.intx.com.au/ + + + variable contexts [dict create] + variable context_id 0 + + proc api_context_names {} { + variable contexts + return [dict keys $contexts] + } + + lappend PUNKARGS [list { + @id -id ::punk::netbox::api_contexts + @cmd -name punk::netbox::api_contexts -help\ + "Show in-memory api contexts. + These are named contexts for calling + the NETBOX rest api. + They are loaded using api_contexts_load from + a .toml configuration file, or created using + api_context_create." + -return -default table -choices {table tableobject dict} + -fields -type list -default {url tokentail comment} -choices {url token tokentail comment *} -choicemultiple {0 -1} -choicerestricted 0 -help\ + "The * token can be included in the list of specified + fields, and represents any other available fields found + from the matched contexts" + @values -min 0 -max 1 + globname -default * -help\ + "pattern to match the context name(s)" + }] + proc api_contexts {args} { + set argd [punk::args::parse $args withid ::punk::netbox::api_contexts] + lassign [dict values $argd] leaders opts values received + set returntype [dict get $opts -return] + set fields [dict get $opts -fields] + set globname [dict get $values globname] + + + variable contexts + set matches [dict keys $contexts $globname] + + + if {"*" in $fields} { + set starposn [lsearch -exact $fields *] + set before [lrange $fields 0 $starposn-1] + set after [lrange $fields $starposn+1 end] + set allspecified [list {*}$before {*}$after] + # use * as placeholder for all others not specified - retain order of specified columns + set fields [list] + #check fields in all matches + set starfields [list] + if {"tokentail" ni $allspecified} { + #calculated column + lappend starfields tokentail + } + foreach k $matches { + set contextinfo [dict get $contexts $k] + dict for {valkey valinfo} $contextinfo { + if {$valkey ni $allspecified && $valkey ni $starfields} { + lappend starfields $valkey + } + } + } + set fields [list {*}$before {*}$starfields {*}$after] + } + + + switch -- $returntype { + table - tableobject { + package require textblock + set t [textblock::table -return tableobject -minwidth 75 -headers [list contextid {*}$fields]] + foreach k $matches { + set contextinfo [dict get $contexts $k] + set tokentail "" + if {"tokentail" in $fields} { + #computed column + if {[dict exists $contextinfo token]} { + set tokentail [string range [dict get $contextinfo token] end-5 end] + } + } + set rowdata [list $k] + foreach f $fields { + if {[dict exists $contextinfo $f value]} { + lappend rowdata [dict get $contextinfo $f value] + } else { + if {$f eq "tokentail"} { + lappend rowdata $tokentail + } else { + lappend rowdata "" + } + } + } + $t add_row $rowdata + } + if {$returntype eq "table"} { + set tableview [$t print] + $t destroy + return $tableview + } else { + return $t + } + } + dict { + set result [dict create] + foreach k $matches { + set contextinfo [dict get $contexts $k] + set tokentail "" + if {"tokentail" in $fields} { + #computed column + if {[dict exists $contextinfo token]} { + set tokentail [string range [dict get $contextinfo token] end-5 end] + } + } + dict set result $k {} ;#ensure record is output even if empty fieldlist + foreach f $fields { + if {[dict exists $contextinfo $f value]} { + dict set result $k $f [dict get $contextinfo $f value] + } else { + if {$f eq "tokentail"} { + dict set result $k tokentail $tokentail + } + } + } + #dict for {valkey valinfo} $contextinfo { + # dict set result $k $valkey [dict get $valinfo value] + #} + } + return $result + } + } + } + + + + #get api handle(s) for a netbox server (with url and token) to pass to the punk::netbox api functions + lappend PUNKARGS [list { + @id -id ::punk::netbox::api_context_load + @cmd -name punk::netbox::api_context_load -help\ + "Load API context information (url token) + from a .toml file in the data directory + or from a specified file. + To create an initial file, use api_context + to create one or more named configurations + specifying the url and authentication token. + Then use api_context_save to persist them." + @opts + -contextname -default * -help\ + "Name of an API context or a pattern for + which contexts to load from the file." + @values + filepath -default "" -type file + }] + proc api_context_load {args} { + set argd [punk::args::parse $args withid ::punk::netbox::api_context_load] + lassign [dict values $argd] leaders opts values received + set contextglob [dict get $opts -contextname] + set filepath [dict get $values filepath] + + if {$filepath eq ""} { + set filepath [_datafile] + } + if {![file exists $filepath]} { + error "No existing datafile at '$filepath'\nUse api_context_create to configure a context and save it with api_context_save" + } + package require tomlish + set tomldata [readFile $filepath] + set tomlish [tomlish::from_toml $tomldata] ;#intermediate (unvalidated) representation of toml data - maintaining whitespace and comments + set tomldict [tomlish::to_dict $tomlish] ;#lossy conversion to a programmatic structure (loss is of comments, whitespace) + variable contexts + #merge into any existing-in-memory loaded/created contexts + set loaded [list] + dict for {contextid contextinfo} $tomldict { + if {[string match $contextglob $contextid]} { + if {![dict exists $contextinfo url]} { + puts "api_context_load warning: Loaded context $contextid is missing 'url' key" + } + if {![dict exists $contextinfo token]} { + puts "api_context_load warning: Loaded context $contextid is missing 'token' key" + } + dict set contexts $contextid $contextinfo + lappend loaded $contextid + } + } + return $loaded + } + + lappend PUNKARGS [list { + @id -id ::punk::netbox::api_context_create + @cmd -name punk::netbox::api_context_create -help\ + "Create an in-memory configuration for an API context. + This consists of a name (contextid) under which a + url and authentication token are stored. + It can optionally be persisted using api_context_save + to the file of your choice, or to a reasonable default + location. (see _datafile). + The api_context_load function can be used to retrieve + previously stored contextids instead of calling this + function each time. + + A contextid is required when calling the netbox rest api + functions such as ipam::vrfs + This allows easy intermixing of calls to either the same + or different servers using the different permissions + granted by each token. + " + @leaders -min 1 -max 1 + contextid -type string -help\ + "Name for the api context. + If saved to a .toml file, this + will be the name of a toplevel table + containing configuration elements such + as url and token." + @opts + -property_value -type list -minsize 2 -maxsize 2 -multiple 1 -help\ + "custom property and value. + e.g + property_value {comment {test comment}}" + @values -min 2 -max 2 + url -type string -help\ + "Base url of netbox server" + token -type string -help\ + "Netbox API authentication token" + }] + proc api_context_create {args} { + set argd [punk::args::parse $args withid ::punk::netbox::api_context_create] + lassign [dict values $argd] leaders opts values received + set contextid [dict get $leaders contextid] + if {[dict exists $received -property_value]} { + set propvals [dict get $opts -property_value] ;#multiple - as pairs + } else { + set propvals [list] + } + set baseurl [dict get $values url] + set token [dict get $values token] + + variable contexts + if {[dict exists $contexts $contextid]} { + error "api_context_create a context with id '$contextid' already exists." + } + set allprops [dict create url [dict create type STRING value $baseurl] token [dict create type STRING value $token]] + foreach pv $propvals { + lassign $pv p v + if {$p in {url token}} { + puts stderr "ignoring -property_value $p - invalid - already specified in arguments" + } + #todo - multiline? + dict set allprops $p [dict create type STRING value $v] + } + dict set contexts $contextid $allprops + return $contextid + } + proc _homedir {} { + if {[info exists ::env(HOME)]} { + set home [file normalize $::env(HOME)] + } else { + #not available on 8.6? ok will error out here. + set home [file tildeexpand ~] + } + return $home + } + lappend PUNKARGS [list { + @id -id ::punk::netbox::_datafile + @cmd -name punk::netbox::_datafile -help\ + "Get the path for the default storage file + used when an explicit path is not given by + the caller to the api_context load/save + functions. This file is in toml format. + The XDG_DATA_HOME env var is the preferred + choice of location - considered more secure + than XDG_CONFIG_HOME, although not as good + as a proper secret store. + A folder under the user's home directory, + at .local/share/punk/netbox is chosen if + XDG_DATA_HOME is not configured. + " + @leaders -min 0 -max 0 + @opts + -quiet -type none -help\ + "Suppress warning given when the folder does + not yet exist" + @values -min 0 -max 0 + }] + proc _datafile {args} { + set argd [punk::args::parse $args withid ::punk::netbox::_datafile] + lassign [dict values $argd] leaders opts values received + set be_quiet [dict exists $received -quiet] + + set was_noisy 0 + if {[info exists ::env(XDG_DATA_HOME)]} { + set data_home $::env(XDG_DATA_HOME) + } else { + set data_home [file join [_homedir] .local share] + if {!$be_quiet} { + puts stderr "Environment variable XDG_DATA_HOME does not exist - consider setting it if $data_home is not a suitable location" + set was_noisy 1 + } + } + if {!$be_quiet && ![file exists $data_home]} { + #parent folder for 'punk' config dir doesn't exist + set msg "configuration location (XDG_DATA_HOME or ~/.local/share) $data_home does not yet exist" + append msg \n " - please create it and/or set XDG_DATA_HOME env var." + puts stderr $msg + set was_noisy 1 + } + set punk_netbox_data_dir [file join $data_home punk netbox] + if {!$be_quiet && ![file exists $punk_netbox_data_dir]} { + set msg "punk::netbox data storage folder at $punk_netbox_data_dir does not yet exist." + append msg \n " It will be created if api_context_save is called without specifying an alternate location." + puts stderr $msg + set was_noisy 1 + } + if {!$be_quiet && $was_noisy} { + puts stderr "punk::netbox::_datafile - call with -quiet option to suppress these messages" + } + return [file join $punk_netbox_data_dir netbox_api_contexts.toml] + } + + lappend PUNKARGS [list { + @id -id ::punk::netbox::api_context_save + @cmd -name punk::netbox::api_context_save -help\ + "" + @values + contextid -type string -help\ + "Name for the api context. + If saved to a .toml file, this + will be the name of a toplevel table + containing configuration elements such + as url and token." + filepath -default "" -optional 1 -type file -help\ + "Path of .toml configuration file containing + API url and token information. + If empty it will store under XDG_DATA_DIR + if the env var is defined, or in the + corresponding location within ~/.local/share. + In both cases the subfolder netbox/punk will + be used. + These locations are fairly reasonable for + sensitive data - but as tokens are not + encrypted, a proper security store should be + used instead if your risk-policy requires + more serious security. + " + }] + proc api_context_save {args} { + set argd [punk::args::parse $args withid ::punk::netbox::api_context_save] + lassign [dict values $argd] leaders opts values received + set contextid [dict get $values contextid] + set filepath [dict get $values filepath] + + variable contexts + if {![dict exists $contexts $contextid]} { + error "punk::netbox::api_context_save error. No context with id '$contextid' exists. Load from file, or create it using punk::netbox::api_context" + } + if {$filepath eq ""} { + set filepath [_datafile -quiet] + set filefolder [file dirname $filepath] + if {![file exists $filefolder]} { + file mkdir $filefolder + } + } + set configdir [file dirname $filepath] + if {![file exists $configdir]} { + error "api_context_save error: folder $configdir doesn't exist" + } + package require tomlish + if {[file exists $filepath]} { + set existing_toml [readFile $filepath] + set tomlish [tomlish::from_toml $existing_toml] + set data_dict [tomlish::to_dict $tomlish] + if [dict exists $data_dict $contextid] { + #todo - nondestructive merge - don't destroy comments/formatting of existing records + #if we use to_dict on the existing tomlish - we lose comments etc + #also from_dict doesn't yet produce canonical nice-for-humans tomlish/toml + #merge + puts stderr "contextid '$contextid' exists in file $filepath" + puts stderr "Merge not implemented.." + set newfiledata "" + } else { + #append to existing toml data + set newdict [dict create $contextid [dict get $contexts $contextid]] + #we store our contexts in a structure already suitable for toml + # (ie one where we tag strings,ints e.g {type STRING value "etc"}) + set newtomlish [tomlish::from_dict $newdict] + set newtoml [tomlish::to_toml $newtomlish] + set newfiledata $existing_toml\n$newtoml + } + } else { + set newdict [dict create $contextid [dict get $contexts $contextid]] + set newtomlish [tomlish::from_dict $newdict] + set newtoml [tomlish::to_toml $newtomlish] + set newfiledata $newtoml + } + + if {$newfiledata ne ""} { + writeFile $filepath $newfiledata + puts stderr "saved [string length $newfiledata] bytes to '$filepath'" + } + } + + + namespace eval argdoc { + set _page_options { + -limit -default 100 -type integer -help\ + "Each REST query returns a maximum number + of results. This can be set to 0 to mean + no limit - but it is still restricted to + the max configured on the server. (1000?) + + This is effectively the page-size of the + results. To retrieve more than a page, the + next and previous urls can be iterated over." + -offset -default 0 -type integer + } + set _create_update_options { + -created + -created__gte + -created__lte + -last_updated + -last_updated__gte + -last_updated__lte + } + set _tenant_options { + -tenant_group_id + -tenant_group + -tenant_id + -tenant + } + set _region_options { + -region_id + -region + } + set _site_options { + -site_group_id + -site_group + -site_id + -site + } + set _group_options { + -group_id + -group + } + set _role_options { + -role_id + -role + } + set _filter_string [list\ + "ie \n Exact match\n(case-insensitive)"\ + "nie \n Inverse exact match\n(case-insensitive)"\ + "n \n Not equal to"\ + "ic \n Contains\n (case-insensitive)"\ + "nic \n Does not contain\n (case-insensitive)"\ + "isw \n Starts with\n (case-insensitive)"\ + "nisw \n Does not start with\n (case-insensitive)"\ + "iew \n Ends with\n (case-insensitive)"\ + "niew \n Does not end with\n (case-insensitive)"\ + "empty \n Is empty/null"\ + ] + set _CUSTOM_PARAMS { + -CUSTOM_PARAM -type list -minsize 2 -maxsize 2 -multiple 1 -help\ + "Specify a parameter not in this API + e.g -CUSTOM_PARAM {mytag blah}" + } + set _RETURN { + -RETURN -type string -choices {dict showdict showdict2 json} -choicelabels { + dict\ + " Tcl dictionary + (fastest)" + showdict\ + " human readable dict display + with same order as dict." + showdict2\ + " human readable dict display + results first metadata last." + } -help\ + "Options for returned data. + Note that showdict results are relatively slow, especially for large resultsets" + } + set _RETURN_STATUS { + -RETURN -type string -default showdict2 -choices {dict showdict showdict2 json} -choicelabels { + dict\ + " Tcl dictionary" + showdict\ + " human readable dict display" + showdict2\ + " human readable dict display + installed-apps first." + } -help\ + "Options for returned data." + } + + set _name_filter_help "Paired search filter for name:\n" + append _name_filter_help [textblock::list_as_table -columns 4 -show_hseps 1 $_filter_string] + + set _description_filter_help "Paired search filter for description:\n" + append _description_filter_help [textblock::list_as_table -columns 4 -show_hseps 1 $_filter_string] + + set string_filter_help "Paired search filter for string:\n" + append _string_filter_help [textblock::list_as_table -columns 4 -show_hseps 1 $_filter_string] + } + + punk::args::define {*}[list\ + { + @dynamic + @id -id ::punk::netbox::status + @cmd -name punk::netbox::status -help\ + "status_list + GET request for endpoint /status/ + + Netbox's current operational status + " + @leaders -min 1 -max 1 + apicontextid -help\ + "The name of the stored api context to use. + A contextid can be created in-memory using + api_context_create, or loaded from a .toml + file using api_context_load."\ + -choices {${[punk::netbox::api_context_names]}} + @opts + }\ + [set ::punk::netbox::argdoc::_RETURN_STATUS]\ + { + @values -min 0 -max 0 + }] + ::punk::netbox::system::make_rest_func ::punk::netbox::status api/status/ -verb get -body none + + + #test function - todo use punk::netbox::system::make_rest_func + #proc vrfs {args} { + # set argd [punk::args::parse $args withid ::punk::netbox::vrfs] + # lassign [dict values $argd] leaders opts values received + # set apicontextid [dict get $leaders apicontextid] + # set query [dict create] + # dict for {k v} $opts { + # if {$k eq "-CUSTOM_PARAM"} { + # foreach custval $v { + # lassign $custval param value + # dict set query $param $value + # } + # } elseif {[string match *_FILTER $k]} { + # set field [string range [string tolower [lindex [split $k _] 0]] 1 end] ;# -NAME_FILTER -> name + # foreach fv $v { + # lassign $fv filter value + # dict set query ${field}__$filter $value + # } + # } else { + # dict set query [string range $k 1 end] $v + # } + # } + # variable contexts + # if {![dict exists $contexts $apicontextid]} { + # error "specified contextid '$apicontextid' not found" + # } + # set config [dict create\ + # result json\ + # ] + # dict set config headers [list Authorization [list Token [dict get $contexts $apicontextid token value]]] + + # #variable headerdict + # #set config [dict create\ + # # headers $headerdict\ + # # result json\ + # #] + # #variable url + # set url [dict get $contexts $apicontextid url value] + + # puts "${url}api/ipam/vrfs/ '$query' '$config'" + # rest::get ${url}api/ipam/vrfs/ $query $config + #} + + #set ipam(vrfs) [dict create\ + # url https://www.netbox1.intx.com.au/api/ipam/vrfs/\ + # method get\ + # result json\ + # body none\ + # headers $headerdict\ + # opt_args {id: name: limit:100 offset:0} + #] + #set ipam(ip-addresses) [dict create\ + # url https://www.netbox1.intx.com.au/api/ipam/ip-addresses/\ + # method get\ + # headers $headerdict\ + # opt_args {parent: limit:100 offset:0} + #] + #set ipam(prefixes) [dict create\ + # url https://www.netbox1.intx.com.au/api/ipam/prefixes/\ + # method get\ + # headers $headerdict\ + # opt_args {prefix: limit:100 offset:0} + #] + #rest::create_interface ::punk::netbox::ipam + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::netbox ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +tcl::namespace::eval punk::netbox::ipam { + namespace export {[a-z]*} + lappend PUNKARGS [list\ + { + @dynamic + @id -id ::punk::netbox::ipam::vrfs + @cmd -name punk::netbox::ipam::vrfs -help\ + "ipam_vrfs_list + GET request for endpoint /ipam/vrfs/" + @leaders -min 1 -max 1 + apicontextid -help\ + "The name of the stored api context to use. + A contextid can be created in-memory using + api_context_create, or loaded from a .toml + file using api_context_load."\ + -choices {${[punk::netbox::api_context_names]}} + @opts + -id -type integer + -name + -NAME_FILTER -type list -minsize 2 -maxsize 2 -multiple 1 -help {${$::punk::netbox::argdoc::_name_filter_help}} + -rd -type string -help\ + "Route distinguisher in any format" + -enforce_unique + -description -type string -help "Exact Match (case sensitive)" + -DESCRIPTION_FILTER -type list -minsize 2 -maxsize 2 -multiple 1 -help {${$::punk::netbox::argdoc::_description_filter_help}} + }\ + [set ::punk::netbox::argdoc::_create_update_options]\ + { + -q + -tag + }\ + [set ::punk::netbox::argdoc::_tenant_options]\ + [set ::punk::netbox::argdoc::_region_options]\ + [set ::punk::netbox::argdoc::_site_options]\ + [set ::punk::netbox::argdoc::_group_options]\ + [set ::punk::netbox::argdoc::_role_options]\ + { + -status + -available_on_device + -available_on_virtualmachine + }\ + [set ::punk::netbox::argdoc::_page_options]\ + [set ::punk::netbox::argdoc::_CUSTOM_PARAMS]\ + [set ::punk::netbox::argdoc::_RETURN]\ + { + @values -min 0 -max 0 + }] + ::punk::netbox::system::make_rest_func ::punk::netbox::ipam::vrfs api/ipam/vrfs/ -verb get -body none + + + + punk::args::define {*}[list\ + { + @dynamic + @id -id ::punk::netbox::ipam::prefixes + @cmd -name punk::netbox::ipam::prefixes -help\ + "ipam_prefixes_list + GET request for endpoint /ipam/prefixes/" + @leaders -min 1 -max 1 + apicontextid -help\ + "The name of the stored api context to use. + A contextid can be created in-memory using + api_context_create, or loaded from a .toml + file using api_context_load."\ + -choices {${[punk::netbox::api_context_names]}} + @opts + -id -type integer + -is_pool + -mark_utilized + -description -type string -help "Exact Match (case sensitive)" + -DESCRIPTION_FILTER -type list -minsize 2 -maxsize 2 -multiple 1 -help {${$::punk::netbox::argdoc::_description_filter_help}} + }\ + [set ::punk::netbox::argdoc::_create_update_options]\ + { + -q + -tag + }\ + [set ::punk::netbox::argdoc::_tenant_options]\ + [set ::punk::netbox::argdoc::_region_options]\ + [set ::punk::netbox::argdoc::_site_options]\ + [set ::punk::netbox::argdoc::_group_options]\ + [set ::punk::netbox::argdoc::_role_options]\ + { + -family + -prefix + -within + -within_include + -contains + -depth + -children + -mask_length + -mask_length__gte + -mask_length__lte + -vrf_id + -vrf + -role_id + -role + -status + -available_on_device + -available_on_virtualmachine + }\ + [set ::punk::netbox::argdoc::_page_options]\ + [set ::punk::netbox::argdoc::_CUSTOM_PARAMS]\ + [set ::punk::netbox::argdoc::_RETURN]\ + { + @values -min 0 -max 0 + }] + ::punk::netbox::system::make_rest_func ::punk::netbox::ipam::prefixes api/ipam/prefixes/ -verb get -body none + + punk::args::define {*}[list\ + { + @dynamic + @id -id ::punk::netbox::ipam::ip-addresses + @cmd -name punk::netbox::ipam::ip-addresses -help\ + "ipam_ip-addresses_list + GET request for endpoint /ipam/ip-addresses/" + @leaders -min 1 -max 1 + apicontextid -help\ + "The name of the stored api context to use. + A contextid can be created in-memory using + api_context_create, or loaded from a .toml + file using api_context_load."\ + -choices {${[punk::netbox::api_context_names]}} + @opts + -id -type integer + -dns_name + -DNS_NAME_FILTER -type list -minsize 2 -maxsize 2 -multiple 1 -help {${$::punk::netbox::argdoc::_string_filter_help}} + -description -type string -help "Exact Match (case sensitive)" + -DESCRIPTION_FILTER -type list -minsize 2 -maxsize 2 -multiple 1 -help {${$::punk::netbox::argdoc::_description_filter_help}} + }\ + [set ::punk::netbox::argdoc::_create_update_options]\ + { + -q + -tag + }\ + [set ::punk::netbox::argdoc::_tenant_options]\ + [set ::punk::netbox::argdoc::_region_options]\ + [set ::punk::netbox::argdoc::_site_options]\ + [set ::punk::netbox::argdoc::_group_options]\ + [set ::punk::netbox::argdoc::_role_options]\ + { + -family + -parent + -address + -mask_length + -vrf_id + -vrf + -present_in_vrf_id + -present_in_vrf + -device + -device_id + -virtual_machine + -virtual_machine_id + -interface + -interface_id + -vminterface + -vminterface_id + -fhrpgroup_id + -assigned_to_interface + -status + -role + -available_on_device + -available_on_virtualmachine + }\ + [set ::punk::netbox::argdoc::_page_options]\ + [set ::punk::netbox::argdoc::_CUSTOM_PARAMS]\ + [set ::punk::netbox::argdoc::_RETURN]\ + { + @values -min 0 -max 0 + }] + ::punk::netbox::system::make_rest_func ::punk::netbox::ipam::ip-addresses api/ipam/ip-addresses/ -verb get -body none +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::netbox::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::netbox::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::netbox::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + + + +# == === === === === === === === === === === === === === === +# Sample 'about' function with punk::args documentation +# == === === === === === === === === === === === === === === +tcl::namespace::eval punk::netbox { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + variable PUNKARGS + variable PUNKARGS_aliases + + lappend PUNKARGS [list { + @id -id "(package)punk::netbox" + @package -name "punk::netbox" -help\ + "Package + Description" + }] + + namespace eval argdoc { + #namespace for custom argument documentation + proc package_name {} { + return punk::netbox + } + proc about_topics {} { + #info commands results are returned in an arbitrary order (like array keys) + set topic_funs [info commands [namespace current]::get_topic_*] + set about_topics [list] + foreach f $topic_funs { + set tail [namespace tail $f] + lappend about_topics [string range $tail [string length get_topic_] end] + } + #Adjust this function or 'default_topics' if a different order is required + return [lsort $about_topics] + } + proc default_topics {} {return [list Description *]} + + # ------------------------------------------------------------- + # get_topic_ functions add more to auto-include in about topics + # ------------------------------------------------------------- + proc get_topic_Description {} { + punk::args::lib::tstr [string trim { + package punk::netbox + description to come.. + } \n] + } + proc get_topic_License {} { + return "MIT" + } + proc get_topic_Version {} { + return "$::punk::netbox::version" + } + proc get_topic_Contributors {} { + set authors {{Julian Noble }} + set contributors "" + foreach a $authors { + append contributors $a \n + } + if {[string index $contributors end] eq "\n"} { + set contributors [string range $contributors 0 end-1] + } + return $contributors + } + proc get_topic_custom-topic {} { + punk::args::lib::tstr -return string { + A custom + topic + etc + } + } + # ------------------------------------------------------------- + } + + # we re-use the argument definition from punk::args::standard_about and override some items + set overrides [dict create] + dict set overrides @id -id "::punk::netbox::about" + dict set overrides @cmd -name "punk::netbox::about" + dict set overrides @cmd -help [string trim [punk::args::lib::tstr { + About punk::netbox + }] \n] + dict set overrides topic -choices [list {*}[punk::netbox::argdoc::about_topics] *] + dict set overrides topic -choicerestricted 1 + dict set overrides topic -default [punk::netbox::argdoc::default_topics] ;#if -default is present 'topic' will always appear in parsed 'values' dict + set newdef [punk::args::resolved_def -antiglobs -package_about_namespace -override $overrides ::punk::args::package::standard_about *] + lappend PUNKARGS [list $newdef] + proc about {args} { + package require punk::args + #standard_about accepts additional choices for topic - but we need to normalize any abbreviations to full topic name before passing on + set argd [punk::args::parse $args withid ::punk::netbox::about] + lassign [dict values $argd] _leaders opts values _received + punk::args::package::standard_about -package_about_namespace ::punk::netbox::argdoc {*}$opts {*}[dict get $values topic] + } +} +# end of sample 'about' function +# == === === === === === === === === === === === === === === + + +# ----------------------------------------------------------------------------- +# register namespace(s) to have PUNKARGS,PUNKARGS_aliases variables checked +# ----------------------------------------------------------------------------- +# variable PUNKARGS +# variable PUNKARGS_aliases +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::netbox ::punk::netbox::ipam +} +# ----------------------------------------------------------------------------- + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::netbox [tcl::namespace::eval punk::netbox { + variable pkg punk::netbox + variable version + set version 999999.0a1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/netbox-buildversion.txt b/src/modules/punk/netbox-buildversion.txt new file mode 100644 index 00000000..f47d01c8 --- /dev/null +++ b/src/modules/punk/netbox-buildversion.txt @@ -0,0 +1,3 @@ +0.1.0 +#First line must be a semantic version number +#all other lines are ignored. diff --git a/src/modules/punk/path-999999.0a1.0.tm b/src/modules/punk/path-999999.0a1.0.tm index a91c94bb..dd6bd041 100644 --- a/src/modules/punk/path-999999.0a1.0.tm +++ b/src/modules/punk/path-999999.0a1.0.tm @@ -20,7 +20,7 @@ #[manpage_begin punkshell_module_punk::path 0 999999.0a1.0] #[copyright "2023"] #[titledesc {Filesystem path utilities}] [comment {-- Name section and table of contents description --}] -#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] +#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] #[require punk::path] #[description] #[keywords module path filesystem] @@ -104,21 +104,21 @@ namespace eval punk::path { #*** !doctools #[subsection {Namespace punk::path}] - #[para] Core API functions for punk::path + #[para] Core API functions for punk::path #[list_begin definitions] - # -- --- + # -- --- #punk::path::normjoin # - simplify . and .. segments as far as possible whilst respecting specific types of root. - # -- --- + # -- --- #a form of file normalize that supports //xxx to be treated as server path names #(ie regardless of unices ignoring (generally) leading double slashes, and regardless of windows volumerelative path syntax) - #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) - # -- --- + #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) + # -- --- #This is intended to be purely a string analysis - without reference to filesystem volumes or vfs or zipfs mountpoints etc # #TODO - option for caller to provide a -base below which we can't backtrack. - #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share + #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share #Our default is to allow trackback to: # :// # :/ @@ -128,7 +128,7 @@ namespace eval punk::path { # ./../ - (track back indefinitely on relpath as we are not resolving to anything physical and can't fully simplify the leading backtracks) # #The caller should do the file/vfs operations to determine this - not us. - # -- --- + # -- --- #simplify path with respect to /./ & /../ elements - independent of platform #NOTE: "anomalies" in standard tcl processing on windows: #e.g file normalize {//host} -> c:/host (or e.g d:/host if we happen to be on another volume) @@ -148,9 +148,9 @@ namespace eval punk::path { #known issues: #1) # normjoin d://a//b//c -> d://a/b/c - # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c + # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c # Not considered a problem - just potentially surprising. - # To avoid it we would have to enumerate possible schemes. + # To avoid it we would have to enumerate possible schemes. # As it stands a unix system could define a 'scheme' that happens to match windows style driveletters. Consider a 'feature' ? review. # won't fix? #2) @@ -164,16 +164,16 @@ namespace eval punk::path { # normjoin ///server/share -> ///server/share #This is effectively an empty servername in the input with 'server' being pushed one level down - and the output is consistent # possibly won't fix - review - #4) inconsistency + #4) inconsistency # we return normalized //server/share for //./UNC/server share # but other dos device paths are maintained # e.g //./c:/etc # This is because such paths could contain alternate segment names (windows shortnames) which we aren't in a position to resolve. - # caller should - # #as with 'case' below - caller will need to run a post 'file normalize' + # caller should + # #as with 'case' below - caller will need to run a post 'file normalize' #5) we don't normalize case like file normalize does on windows platform. # This is intentional. It could only be done with reference to underlying filesystem which we don't want here. - # + # # ================ # #relpaths all end up with leading . - while not always the simplest form, this is ok. (helps stop inadvertent conversions to absolutes) @@ -194,14 +194,14 @@ namespace eval punk::path { /// { #if this is effectively //$emptyservername/ #then for consistency we should trail //=3 #todo - shortcircuit that here? } } - # /// - set doubleslash1_posn [string first // $path] + # /// + set doubleslash1_posn [string first // $path] # -- --- --- temp warning on windows only - no x-platform difference in result #on windows //host is of type volumerelative @@ -221,7 +221,7 @@ namespace eval punk::path { } # -- --- --- - set is_relpath 0 + set is_relpath 0 #set path [string map [list \\ /] $path] set finalparts [list] @@ -264,11 +264,11 @@ namespace eval punk::path { #normalize by dropping leading slash before split - and then treating first 2 segments as a root #set parts [file split [string range $path 1 end]] set parts [split $path /] - #assert parts here has {} {} as first 2 entries + #assert parts here has {} {} as first 2 entries set rootindex 2 #currently prefer can backtrack to the //zipfs:/ scheme (below the mountpoint - to browse other mounts) #alternative handling for //zipfs:/path - don't go below mountpoint - #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint + #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint #review - more generally //:/path ? #todo - make an option for zipfs and others to determine the 'base' #if {"zipfs:" eq [lindex $parts 2]} { @@ -281,7 +281,7 @@ namespace eval punk::path { #set parts [file split $path] set parts [::split $path /] #e.g /a/b/c -> {} a b c - #or relative path a/b/c -> a b c + #or relative path a/b/c -> a b c #or c:/a/b/c -> c: a b c if {[string match *: [lindex $parts 0]]} { if {[lindex $parts 1] eq ""} { @@ -295,9 +295,9 @@ namespace eval punk::path { } elseif {[lindex $parts 0] ne ""} { #relpath a/b/c set parts [linsert $parts 0 .] - set rootindex 0 - #allow backtracking arbitrarily for leading .. entries - simplify where possible - #also need to stop possible conversion to absolute path + set rootindex 0 + #allow backtracking arbitrarily for leading .. entries - simplify where possible + #also need to stop possible conversion to absolute path set is_relpath 1 } } @@ -306,7 +306,7 @@ namespace eval punk::path { #puts stderr "-->baseparts:$baseparts" #ensure that if our rootindex already spans a dotted segment (after the first one) we remove it #must maintain initial . for relpaths to stop them converting to absolute via backtrack - # + # set finalparts [list [lindex $baseparts 0]] foreach b [lrange $baseparts 1 end] { if {$b ni {. ..}} { @@ -333,7 +333,7 @@ namespace eval punk::path { lappend finalparts $p } } - incr i + incr i } } else { foreach p [lrange $parts $rootindex+1 end] { @@ -345,7 +345,7 @@ namespace eval punk::path { switch -exact -- $p { . - "" {} .. { - lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 + lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 } default { lappend finalparts $p @@ -403,16 +403,16 @@ namespace eval punk::path { } - #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' - # - no volumerelative + #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' + # - no volumerelative # - no lookup of file volumes (volume is a windows concept - but with //zipfs:/ somewhat applicable to other platforms) # - /* as absolute (covers also //zipfs:/ (volume), //server , //./etc , //./UNC) # - xxx:// as absolute (scheme) # - xxx:/ or x:/ as absolute - # - x: xxx: -> as absolute (volume-basic or volume-extended) + # - x: xxx: -> as absolute (volume-basic or volume-extended) #note also on windows - legacy name for COM devices - # COM1 = COM1: + # COM1 = COM1: # //./COM1 ?? review proc pathtype {str} { @@ -425,7 +425,7 @@ namespace eval punk::path { return absolute } - #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review + #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review #e.g a:b:/.. or a::/.. or :/.. is not treated as volume/scheme whereas ab:/ is. set firstslash [string first / $str] if {$firstslash == -1} { @@ -434,9 +434,9 @@ namespace eval punk::path { set firstsegment [string range $str 0 $firstslash-1] } if {[set firstc [string first : $firstsegment]] > 0} { - set lhs_firstsegment [string range $firstsegment 0 $firstc-1] + set lhs_firstsegment [string range $firstsegment 0 $firstc-1] set rhs_firstsegment [string range $firstsegment $firstc+1 end] ;#exclude a:b/ etc - if {$rhs_firstsegment eq ""} { + if {$rhs_firstsegment eq ""} { set rhs_entire_path [string range $str $firstc+1 end] #assert lhs_firstsegment not empty since firstc > 0 #count following / sequence @@ -466,7 +466,7 @@ namespace eval punk::path { } } } - #assert first element of any return has been absolute or relative + #assert first element of any return has been absolute or relative return relative } @@ -489,7 +489,7 @@ namespace eval punk::path { } return $str } - #purely string based - no reference to filesystem knowledge + #purely string based - no reference to filesystem knowledge #unix-style forward slash only proc plainjoin {args} { set args [lmap a $args {string map "\\\\ /" $a}] @@ -499,12 +499,12 @@ namespace eval punk::path { set out "" foreach a $args { if {![string length $out]} { - append out [plain $a] + append out [plain $a] } else { set a [plain $a] if {[string map {/ ""} $out] eq ""} { set out [string range $out 0 end-1] - } + } if {[string map {/ ""} $a] eq ""} { #all / segment @@ -512,16 +512,16 @@ namespace eval punk::path { } else { if {[string length $a] > 2 && [string match "./*" $a]} { set a [string range $a 2 end] - } + } if {[string index $out end] eq "/"} { append out $a } else { - append out / $a + append out / $a } } } } - return $out + return $out } proc plainjoin1 {args} { if {[llength $args] == 1} { @@ -530,9 +530,9 @@ namespace eval punk::path { set out [trim_final_slash [lindex $args 0]] foreach a [lrange $args 1 end] { set a [trim_final_slash $a] - append out / $a + append out / $a } - return $out + return $out } #intention? @@ -554,13 +554,13 @@ namespace eval punk::path { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure - #[para] ** matches any number of subdirectories. + #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc #[para] any segment that does not contain ** must match exactly one segment in the path - #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc + #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc #[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified. - #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals + #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals #todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? * @@ -572,9 +572,9 @@ namespace eval punk::path { } switch -- $seg { * {lappend pats {[^/]*}} - ** {lappend pats {.*}} + ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -614,14 +614,14 @@ namespace eval punk::path { } } } - # -- --- --- --- --- --- + # -- --- --- --- --- --- set opt_nocase [dict get $opts -nocase] - set explicit_nocase 1 ;#default to disprove + set explicit_nocase 1 ;#default to disprove if {$opt_nocase eq "\uFFFF"} { set opt_nocase 0 set explicit_nocase 0 - } - # -- --- --- --- --- --- + } + # -- --- --- --- --- --- if {$opt_nocase} { return [regexp -nocase [pathglob_as_re $pathglob] $path] } else { @@ -651,33 +651,33 @@ namespace eval punk::path { -call-depth-internal -default 0 -type integer -antiglob_paths -default {} -help\ "list of path patterns to exclude - may include * and ** path segments e.g + may include * and ** path segments e.g /usr/** (exlude subfolders based at /usr but not files within /usr itself) **/_aside (exlude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" - @values -min 0 -max -1 -optional 1 -type string + @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path within the directory tree being searched." } - #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ + #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ #then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase) proc treefilenames {args} { #*** !doctools #[call [fun treefilenames] [opt {option value...}] [opt {globpattern...}]] #[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive #[para] options: - #[para] [opt -dir] + #[para] [opt -dir] #[para] defaults to [lb]pwd[rb] - base path for tree to search - #[para] [opt -antiglob_paths] + #[para] [opt -antiglob_paths] #[para] list of path patterns to exclude - may include * and ** path segments e.g /usr/** #[para]no natsorting - so order is dependent on filesystem set argd [punk::args::parse $args withid ::punk::path::treefilenames] - lassign [dict values $argd] leaders opts values received + lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- set opt_antiglob_paths [dict get $opts -antiglob_paths] @@ -694,7 +694,7 @@ namespace eval punk::path { set opt_dir [dict get $opts -directory] } if {![file isdirectory $opt_dir]} { - return [list] + return [list] } } else { #assume/require to exist in any recursive call @@ -713,15 +713,26 @@ namespace eval punk::path { } #todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match? - set dirfiles [lsort [glob -nocomplain -dir $opt_dir -type f {*}$tailglobs]] + if {[catch {glob -nocomplain -dir $opt_dir -type f {*}$tailglobs} matches]} { + #we can get for example a permissions error + puts stderr "treefilenames error while listing files in dir $opt_dir\n $matches" + set dirfiles [list] + } else { + set dirfiles [lsort $matches] + } + lappend files {*}$dirfiles - set dirdirs [glob -nocomplain -dir $opt_dir -type d *] + if {[catch {glob -nocomplain -dir $opt_dir -type d *} dirdirs]} { + puts stderr "treefilenames error while listing subdirs in dir $opt_dir\n $dirdirs" + set dirdirs [list] + } + foreach dir $dirdirs { set skip 0 foreach anti $opt_antiglob_paths { if {[globmatchpath $anti $dir]} { set skip 1 - break + break } } if {$skip} { @@ -743,8 +754,8 @@ namespace eval punk::path { #[item] #[para] Arguments: # [list_begin arguments] - # [arg_def string reference] The path from which the relative path to location is determined. - # [arg_def string location] The location path which may be above or below the reference path + # [arg_def string reference] The path from which the relative path to location is determined. + # [arg_def string location] The location path which may be above or below the reference path # [list_end] #[item] #[para] Results: @@ -753,7 +764,7 @@ namespace eval punk::path { #[item] #[para] Notes: #[para] Both paths must be the same type - ie both absolute or both relative - #[para] Case sensitive. ie punk::path::relative /etc /etC + #[para] Case sensitive. ie punk::path::relative /etc /etC # will return ../etC #[para] On windows, the drive-letter component (only) is not case sensitive #[example_begin] @@ -774,7 +785,7 @@ namespace eval punk::path { #[example_begin] # P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below # - somewhere/below - # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here # - ../../lib/here #[example_end] #[list_end] @@ -791,7 +802,7 @@ namespace eval punk::path { #avoid normalizing if possible (file normalize *very* expensive on windows) set do_normalize 0 if {[file pathtype $reference] eq "relative"} { - #if reference is relative so is location + #if reference is relative so is location if {[regexp {[.]{2}} [list $reference $location]]} { set do_normalize 1 } @@ -857,7 +868,7 @@ namespace eval punk::path::lib { namespace path [namespace parent] #*** !doctools #[subsection {Namespace punk::path::lib}] - #[para] Secondary functions that are part of the API + #[para] Secondary functions that are part of the API #[list_begin definitions] @@ -877,17 +888,17 @@ namespace eval punk::path::lib { namespace eval punk::path::system { #*** !doctools #[subsection {Namespace punk::path::system}] - #[para] Internal functions that are not part of the API + #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Ready +## Ready package provide punk::path [namespace eval punk::path { variable pkg punk::path variable version - set version 999999.0a1.0 + set version 999999.0a1.0 }] return diff --git a/src/modules/textblock-999999.0a1.0.tm b/src/modules/textblock-999999.0a1.0.tm index 4f29176e..cc3d24c6 100644 --- a/src/modules/textblock-999999.0a1.0.tm +++ b/src/modules/textblock-999999.0a1.0.tm @@ -5974,13 +5974,40 @@ tcl::namespace::eval textblock { [>punk . rhs]\ [punk::lib::list_as_lines -- [lrepeat 8 " | "]] } + punk::args::define [punk::lib::tstr -return string { + @id -id ::textblock::table + @cmd -name "textblock::table" -help\ + "A wrapper for creating a textblock::class::table + + NOTE: more options available - argument definition + is incomplete" + @opts + -return -choices {table tableobject} + -rows -type list -default "" -help\ + "A list of lists. + Each toplevel element represents a row. + The number of elements in each row must + be the same. + e.g for 2 rows and 3 columns: + table -rows {{r0c0 r0c1 r0c2} {r1c0 r1c1 r1c2}} + " + -headers -type list -default "" -help\ + "This is a simplified form where each column + has a single header row. + Each element in this list goes into the top + header row for a column. + More complex header arrangements where each + column has multiple headers can be made + by using -return tableobject and calling + $tableobj configure_column -headers" + }] proc table {args} { #todo - use punk::args upvar ::textblock::class::opts_table_defaults toptdefaults set defaults [tcl::dict::create\ -rows [list]\ -headers [list]\ - -return string\ + -return table\ ] @@ -6017,7 +6044,7 @@ tcl::namespace::eval textblock { - if {$opt_return eq "string"} { + if {$opt_return eq "table"} { set result [$t print] $t destroy return $result diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm index 25b01d81..91f29aa5 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm @@ -331,26 +331,26 @@ tcl::namespace::eval punk::args { parsing and help display. directives include: %B%@id%N% ?opt val...? - options: -id + spec-options: -id %B%@cmd%N% ?opt val...? - options: -name -help + spec-options: -name -help %B%@leaders%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for leading args that come before switches/opts) %B%@opts%N% ?opt val...? - options: -any + spec-options: -any %B%@values%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for trailing args that come after switches/opts) %B%@argdisplay%N% ?opt val...? - options: -header (text for header row of table) - -body (text to replace autogenerated arg info) + spec-options: -header (text for header row of table) + -body (text to replace autogenerated arg info) %B%@doc%N% ?opt val...? - options: -name -url + spec-options: -name -url %B%@seealso%N% ?opt val...? - options: -name -url (for footer - unimplemented) + spec-options: -name -url (for footer - unimplemented) - Some other options normally present on custom arguments are available + Some other spec-options normally present on custom arguments are available to use with the @leaders @opts @values directives to set defaults for subsequent lines that represent your custom arguments. These directives should occur in exactly this order - but can be @@ -361,7 +361,12 @@ tcl::namespace::eval punk::args { or using the i .. function - an @id with -id is needed. All directives can be omitted, in which case every line represents - a custom value or option. + a custom leader, value or option. + All will be leaders by default if no options defined. + If options are defined (by naming with leading dash, or explicitly + specifying @opts) then the definitions prior to the options will be + categorised as leaders, and those following the options will be + categorised as values. Custom arguments are defined by using any word at the start of a line that doesn't begin with @ or - @@ -369,7 +374,7 @@ tcl::namespace::eval punk::args { that @@somearg becomes an argument named @somearg) custom leading args, switches/options (names starting with -) - and trailing values also take options: + and trailing values also take spec-options: -type defaults to string. If no other restrictions @@ -397,12 +402,22 @@ tcl::namespace::eval punk::args { -optional (defaults to true for flags/switches false otherwise) + For non flag/switch arguments - all arguments with + -optional true must sit consecutively within their group. + ie all optional leader arguments must be together, and all + optional value arguments must be together. Furthermore, + specifying both optional leaders and optional values will + often lead to ambiguous parsing results. Currently, all + optional non-flg/switch arguments should be either at the + trailing end of leaders or the trailing end of values. + Further unambiguous arrangements of optional args may be + made in future - but are currently considered 'unsupported' -default -multiple (for leaders & values defines whether - subsequent received values are stored agains the same - argument name - only applies to final leader or value) + subsequent received values are stored against the same + argument name - only applies to final leader OR final value) (for options/flags this allows the opt-val pair or solo - flag to appear multiple times - no necessarily contiguously) + flag to appear multiple times - not necessarily contiguously) -choices {} A list of allowable values for an argument. The -default value doesn't have to be in the list. @@ -438,7 +453,7 @@ tcl::namespace::eval punk::args { Max of -1 represents no upper limit. If allows more than one choice the value is a list consisting of items in the choices made available through - entries in -choices/-choicegrups. + entries in -choices/-choicegroups. -minsize (type dependant) -maxsize (type dependant) -range (type dependant) @@ -1667,6 +1682,7 @@ tcl::namespace::eval punk::args { " @leaders -min 0 -max 0 @opts + -return -default text -choices {text dict} -form -default 0 -help\ "Ordinal index or name of command form" @@ -1694,7 +1710,7 @@ tcl::namespace::eval punk::args { (directives are lines beginning with @ e.g @id, @cmd etc) - if -type is @leaders,@opts or @values matches from that type + if -type is leaders,opts or values matches from that type will be returned. if -type is another directive such as @id, @doc etc the @@ -1706,8 +1722,10 @@ tcl::namespace::eval punk::args { proc resolved_def {args} { + #not eating our own dogfood here as far as argument parsing. -id ::punk::args::resolved_def is for documentation/errors only. set opts [dict create\ - -types {}\ + -return text\ + -types {}\ -form 0\ -antiglobs {}\ -override {}\ @@ -1743,7 +1761,7 @@ tcl::namespace::eval punk::args { } dict for {k v} $opts { switch -- $k { - -form - -types - -antiglobs - -override {} + -return - -form - -types - -antiglobs - -override {} default { punk::args::parse $args withid ::punk::args::resolved_def return @@ -1764,163 +1782,185 @@ tcl::namespace::eval punk::args { variable id_cache_rawdef set realid [real_id $id] + if {$realid eq ""} { + return + } - if {$realid ne ""} { - set deflist [tcl::dict::get $id_cache_rawdef $realid] - set result "" - set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] + set deflist [tcl::dict::get $id_cache_rawdef $realid] + set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] - set opt_form [dict get $opts -form] - if {[string is integer -strict $opt_form]} { - set formname [lindex [dict get $specdict form_names] $opt_form] - } else { - set formname $opt_form - } - set opt_override [dict get $opts -override] - - #set arg_info [dict get $specdict ARG_INFO] - set arg_info [dict get $specdict FORMS $formname ARG_INFO] - set argtypes [dict create leaders leader opts option values value] - - set opt_antiglobs [dict get $opts -antiglobs] - set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] - set suppressed_directives [list] - set suppressed_args [list] - foreach ag $opt_antiglobs { - foreach d $directives { - if {[string match $ag $d]} { - lappend suppressed_directives $d - } + set opt_form [dict get $opts -form] + if {[string is integer -strict $opt_form]} { + set formname [lindex [dict get $specdict form_names] $opt_form] + } else { + set formname $opt_form + } + set opt_override [dict get $opts -override] + set opt_return [dict get $opts -return] + + #set arg_info [dict get $specdict ARG_INFO] + set arg_info [dict get $specdict FORMS $formname ARG_INFO] + set argtypes [dict create leaders leader opts option values value] + + set opt_antiglobs [dict get $opts -antiglobs] + set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] + set suppressed_directives [list] + set suppressed_args [list] + foreach ag $opt_antiglobs { + foreach d $directives { + if {[string match $ag $d]} { + lappend suppressed_directives $d } - foreach argname [dict keys $arg_info] { - if {[string match $ag $argname]} { - lappend suppressed_args $argname - } + } + foreach argname [dict keys $arg_info] { + if {[string match $ag $argname]} { + lappend suppressed_args $argname } } - set suppressed_directives [lsort -unique $suppressed_directives] - set suppressed_args [lsort -unique $suppressed_args] + } + set suppressed_directives [lsort -unique $suppressed_directives] + set suppressed_args [lsort -unique $suppressed_args] - set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] + set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] - set globbed [list] - foreach pat $patterns { - set matches [dict keys $arg_info $pat] - lappend globbed {*}$matches - } - set globbed [lsort -unique $globbed] - set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - - foreach type $typelist { - switch -exact -- $type { - * { - if {"@id" in $included_directives} { - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" - } else { - append result \n "@id -id [dict get $specdict id]" - } - } - foreach directive {@package @cmd @doc @seealso @argdisplay} { - set dshort [string range $directive 1 end] - if {"$directive" in $included_directives} { - if {[dict exists $opt_override $directive]} { - append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict ${dshort}_info]" - } - } - } - #output ordered by leader, option, value - foreach pseudodirective {leaders opts values} tp {leader option value} { - set directive "@$pseudodirective" - switch -- $directive { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - - if {"$directive" in $included_directives} { - if {[dict exists $opt_override "$directive"]} { - append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict $defaults_key]" - } - } + set globbed [list] + foreach pat $patterns { + set matches [dict keys $arg_info $pat] + lappend globbed {*}$matches + } + set globbed [lsort -unique $globbed] + set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - if {$pseudodirective in $included_directives} { - foreach m $included_args { - set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq $tp} { - set argspec [dict remove $argspec -ARGTYPE] - if {[dict exists $opt_override $m]} { - append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" - } else { - append result \n "$m $argspec" - } - } - } - } + set result "" + set resultdict [dict create] + foreach type $typelist { + switch -exact -- $type { + * { + if {"@id" in $included_directives} { + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] } - } - @id { - if {"@id" in $included_directives} { - #only a single id record can exist - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + foreach directive {@package @cmd @doc @seealso @argdisplay} { + set dshort [string range $directive 1 end] + if {"$directive" in $included_directives} { + if {[dict exists $opt_override $directive]} { + append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]] } else { - append result \n "@id -id [dict get $specdict id]" + append result \n "$directive [dict get $specdict ${dshort}_info]" + dict set resultdict $directive [dict get $specdict ${dshort}_info] } } } - @package - @cmd - @doc - @seealso - @argdisplay { - if {"$type" in $included_directives} { - set tp [string range $type 1 end] ;# @package -> package - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" - } else { - append result \n "$type [dict get $specdict ${tp}_info]" - } + #output ordered by leader, option, value + foreach pseudodirective {leaders opts values} tp {leader option value} { + set directive "@$pseudodirective" + switch -- $directive { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} } - } - @leaders - @opts - @values { - #these are the active defaults for further arguments - if {"$type" in $included_directives} { - switch -- $type { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + + if {"$directive" in $included_directives} { + if {[dict exists $opt_override "$directive"]} { + append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]] } else { - append result \n "$type [dict get $specdict leaderspec_defaults]" + append result \n "$directive [dict get $specdict $defaults_key]" + dict set resultdict $directive [dict get $specdict $defaults_key] } } - } - leaders - opts - values { - #pseudo-directives - if {$type in $included_directives} { + + if {$pseudodirective in $included_directives} { foreach m $included_args { set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + if {[dict get $argspec -ARGTYPE] eq $tp} { set argspec [dict remove $argspec -ARGTYPE] if {[dict exists $opt_override $m]} { append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] } else { append result \n "$m $argspec" + dict set resultdict $m $argspec } } } } } - default { + + } + @id { + if {"@id" in $included_directives} { + #only a single id record can exist + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] + } } } + @package - @cmd - @doc - @seealso - @argdisplay { + if {"$type" in $included_directives} { + set tp [string range $type 1 end] ;# @package -> package + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict ${tp}_info]" + dict set resultdict $type [dict get $specdict ${tp}_info] + } + } + } + @leaders - @opts - @values { + #these are the active defaults for further arguments + if {"$type" in $included_directives} { + switch -- $type { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} + } + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict leaderspec_defaults]" + dict set resultdict $type [dict get $specdict leaderspec_defaults] + } + } + } + leaders - opts - values { + #pseudo-directives + if {$type in $included_directives} { + foreach m $included_args { + set argspec [dict get $arg_info $m] + if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + set argspec [dict remove $argspec -ARGTYPE] + if {[dict exists $opt_override $m]} { + append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] + } else { + append result \n "$m $argspec" + dict set resultdict $m $argspec + } + } + } + } + } + default { + } + } + if {$opt_return eq "text"} { + return $result + } else { + return $resultdict } - - return $result } } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm index 51e74719..f0a4a444 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm @@ -20,7 +20,7 @@ #[manpage_begin punkshell_module_punk::path 0 0.1.0] #[copyright "2023"] #[titledesc {Filesystem path utilities}] [comment {-- Name section and table of contents description --}] -#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] +#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] #[require punk::path] #[description] #[keywords module path filesystem] @@ -104,21 +104,21 @@ namespace eval punk::path { #*** !doctools #[subsection {Namespace punk::path}] - #[para] Core API functions for punk::path + #[para] Core API functions for punk::path #[list_begin definitions] - # -- --- + # -- --- #punk::path::normjoin # - simplify . and .. segments as far as possible whilst respecting specific types of root. - # -- --- + # -- --- #a form of file normalize that supports //xxx to be treated as server path names #(ie regardless of unices ignoring (generally) leading double slashes, and regardless of windows volumerelative path syntax) - #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) - # -- --- + #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) + # -- --- #This is intended to be purely a string analysis - without reference to filesystem volumes or vfs or zipfs mountpoints etc # #TODO - option for caller to provide a -base below which we can't backtrack. - #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share + #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share #Our default is to allow trackback to: # :// # :/ @@ -128,7 +128,7 @@ namespace eval punk::path { # ./../ - (track back indefinitely on relpath as we are not resolving to anything physical and can't fully simplify the leading backtracks) # #The caller should do the file/vfs operations to determine this - not us. - # -- --- + # -- --- #simplify path with respect to /./ & /../ elements - independent of platform #NOTE: "anomalies" in standard tcl processing on windows: #e.g file normalize {//host} -> c:/host (or e.g d:/host if we happen to be on another volume) @@ -148,9 +148,9 @@ namespace eval punk::path { #known issues: #1) # normjoin d://a//b//c -> d://a/b/c - # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c + # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c # Not considered a problem - just potentially surprising. - # To avoid it we would have to enumerate possible schemes. + # To avoid it we would have to enumerate possible schemes. # As it stands a unix system could define a 'scheme' that happens to match windows style driveletters. Consider a 'feature' ? review. # won't fix? #2) @@ -164,16 +164,16 @@ namespace eval punk::path { # normjoin ///server/share -> ///server/share #This is effectively an empty servername in the input with 'server' being pushed one level down - and the output is consistent # possibly won't fix - review - #4) inconsistency + #4) inconsistency # we return normalized //server/share for //./UNC/server share # but other dos device paths are maintained # e.g //./c:/etc # This is because such paths could contain alternate segment names (windows shortnames) which we aren't in a position to resolve. - # caller should - # #as with 'case' below - caller will need to run a post 'file normalize' + # caller should + # #as with 'case' below - caller will need to run a post 'file normalize' #5) we don't normalize case like file normalize does on windows platform. # This is intentional. It could only be done with reference to underlying filesystem which we don't want here. - # + # # ================ # #relpaths all end up with leading . - while not always the simplest form, this is ok. (helps stop inadvertent conversions to absolutes) @@ -194,14 +194,14 @@ namespace eval punk::path { /// { #if this is effectively //$emptyservername/ #then for consistency we should trail //=3 #todo - shortcircuit that here? } } - # /// - set doubleslash1_posn [string first // $path] + # /// + set doubleslash1_posn [string first // $path] # -- --- --- temp warning on windows only - no x-platform difference in result #on windows //host is of type volumerelative @@ -221,7 +221,7 @@ namespace eval punk::path { } # -- --- --- - set is_relpath 0 + set is_relpath 0 #set path [string map [list \\ /] $path] set finalparts [list] @@ -264,11 +264,11 @@ namespace eval punk::path { #normalize by dropping leading slash before split - and then treating first 2 segments as a root #set parts [file split [string range $path 1 end]] set parts [split $path /] - #assert parts here has {} {} as first 2 entries + #assert parts here has {} {} as first 2 entries set rootindex 2 #currently prefer can backtrack to the //zipfs:/ scheme (below the mountpoint - to browse other mounts) #alternative handling for //zipfs:/path - don't go below mountpoint - #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint + #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint #review - more generally //:/path ? #todo - make an option for zipfs and others to determine the 'base' #if {"zipfs:" eq [lindex $parts 2]} { @@ -281,7 +281,7 @@ namespace eval punk::path { #set parts [file split $path] set parts [::split $path /] #e.g /a/b/c -> {} a b c - #or relative path a/b/c -> a b c + #or relative path a/b/c -> a b c #or c:/a/b/c -> c: a b c if {[string match *: [lindex $parts 0]]} { if {[lindex $parts 1] eq ""} { @@ -295,9 +295,9 @@ namespace eval punk::path { } elseif {[lindex $parts 0] ne ""} { #relpath a/b/c set parts [linsert $parts 0 .] - set rootindex 0 - #allow backtracking arbitrarily for leading .. entries - simplify where possible - #also need to stop possible conversion to absolute path + set rootindex 0 + #allow backtracking arbitrarily for leading .. entries - simplify where possible + #also need to stop possible conversion to absolute path set is_relpath 1 } } @@ -306,7 +306,7 @@ namespace eval punk::path { #puts stderr "-->baseparts:$baseparts" #ensure that if our rootindex already spans a dotted segment (after the first one) we remove it #must maintain initial . for relpaths to stop them converting to absolute via backtrack - # + # set finalparts [list [lindex $baseparts 0]] foreach b [lrange $baseparts 1 end] { if {$b ni {. ..}} { @@ -333,7 +333,7 @@ namespace eval punk::path { lappend finalparts $p } } - incr i + incr i } } else { foreach p [lrange $parts $rootindex+1 end] { @@ -345,7 +345,7 @@ namespace eval punk::path { switch -exact -- $p { . - "" {} .. { - lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 + lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 } default { lappend finalparts $p @@ -403,16 +403,16 @@ namespace eval punk::path { } - #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' - # - no volumerelative + #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' + # - no volumerelative # - no lookup of file volumes (volume is a windows concept - but with //zipfs:/ somewhat applicable to other platforms) # - /* as absolute (covers also //zipfs:/ (volume), //server , //./etc , //./UNC) # - xxx:// as absolute (scheme) # - xxx:/ or x:/ as absolute - # - x: xxx: -> as absolute (volume-basic or volume-extended) + # - x: xxx: -> as absolute (volume-basic or volume-extended) #note also on windows - legacy name for COM devices - # COM1 = COM1: + # COM1 = COM1: # //./COM1 ?? review proc pathtype {str} { @@ -425,7 +425,7 @@ namespace eval punk::path { return absolute } - #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review + #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review #e.g a:b:/.. or a::/.. or :/.. is not treated as volume/scheme whereas ab:/ is. set firstslash [string first / $str] if {$firstslash == -1} { @@ -434,9 +434,9 @@ namespace eval punk::path { set firstsegment [string range $str 0 $firstslash-1] } if {[set firstc [string first : $firstsegment]] > 0} { - set lhs_firstsegment [string range $firstsegment 0 $firstc-1] + set lhs_firstsegment [string range $firstsegment 0 $firstc-1] set rhs_firstsegment [string range $firstsegment $firstc+1 end] ;#exclude a:b/ etc - if {$rhs_firstsegment eq ""} { + if {$rhs_firstsegment eq ""} { set rhs_entire_path [string range $str $firstc+1 end] #assert lhs_firstsegment not empty since firstc > 0 #count following / sequence @@ -466,7 +466,7 @@ namespace eval punk::path { } } } - #assert first element of any return has been absolute or relative + #assert first element of any return has been absolute or relative return relative } @@ -489,7 +489,7 @@ namespace eval punk::path { } return $str } - #purely string based - no reference to filesystem knowledge + #purely string based - no reference to filesystem knowledge #unix-style forward slash only proc plainjoin {args} { set args [lmap a $args {string map "\\\\ /" $a}] @@ -499,12 +499,12 @@ namespace eval punk::path { set out "" foreach a $args { if {![string length $out]} { - append out [plain $a] + append out [plain $a] } else { set a [plain $a] if {[string map {/ ""} $out] eq ""} { set out [string range $out 0 end-1] - } + } if {[string map {/ ""} $a] eq ""} { #all / segment @@ -512,16 +512,16 @@ namespace eval punk::path { } else { if {[string length $a] > 2 && [string match "./*" $a]} { set a [string range $a 2 end] - } + } if {[string index $out end] eq "/"} { append out $a } else { - append out / $a + append out / $a } } } } - return $out + return $out } proc plainjoin1 {args} { if {[llength $args] == 1} { @@ -530,9 +530,9 @@ namespace eval punk::path { set out [trim_final_slash [lindex $args 0]] foreach a [lrange $args 1 end] { set a [trim_final_slash $a] - append out / $a + append out / $a } - return $out + return $out } #intention? @@ -554,13 +554,13 @@ namespace eval punk::path { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure - #[para] ** matches any number of subdirectories. + #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc #[para] any segment that does not contain ** must match exactly one segment in the path - #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc + #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc #[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified. - #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals + #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals #todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? * @@ -572,9 +572,9 @@ namespace eval punk::path { } switch -- $seg { * {lappend pats {[^/]*}} - ** {lappend pats {.*}} + ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -614,14 +614,14 @@ namespace eval punk::path { } } } - # -- --- --- --- --- --- + # -- --- --- --- --- --- set opt_nocase [dict get $opts -nocase] - set explicit_nocase 1 ;#default to disprove + set explicit_nocase 1 ;#default to disprove if {$opt_nocase eq "\uFFFF"} { set opt_nocase 0 set explicit_nocase 0 - } - # -- --- --- --- --- --- + } + # -- --- --- --- --- --- if {$opt_nocase} { return [regexp -nocase [pathglob_as_re $pathglob] $path] } else { @@ -651,33 +651,33 @@ namespace eval punk::path { -call-depth-internal -default 0 -type integer -antiglob_paths -default {} -help\ "list of path patterns to exclude - may include * and ** path segments e.g + may include * and ** path segments e.g /usr/** (exlude subfolders based at /usr but not files within /usr itself) **/_aside (exlude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" - @values -min 0 -max -1 -optional 1 -type string + @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path within the directory tree being searched." } - #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ + #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ #then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase) proc treefilenames {args} { #*** !doctools #[call [fun treefilenames] [opt {option value...}] [opt {globpattern...}]] #[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive #[para] options: - #[para] [opt -dir] + #[para] [opt -dir] #[para] defaults to [lb]pwd[rb] - base path for tree to search - #[para] [opt -antiglob_paths] + #[para] [opt -antiglob_paths] #[para] list of path patterns to exclude - may include * and ** path segments e.g /usr/** #[para]no natsorting - so order is dependent on filesystem set argd [punk::args::parse $args withid ::punk::path::treefilenames] - lassign [dict values $argd] leaders opts values received + lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- set opt_antiglob_paths [dict get $opts -antiglob_paths] @@ -694,7 +694,7 @@ namespace eval punk::path { set opt_dir [dict get $opts -directory] } if {![file isdirectory $opt_dir]} { - return [list] + return [list] } } else { #assume/require to exist in any recursive call @@ -713,15 +713,26 @@ namespace eval punk::path { } #todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match? - set dirfiles [lsort [glob -nocomplain -dir $opt_dir -type f {*}$tailglobs]] + if {[catch {glob -nocomplain -dir $opt_dir -type f {*}$tailglobs} matches]} { + #we can get for example a permissions error + puts stderr "treefilenames error while listing files in dir $opt_dir\n $matches" + set dirfiles [list] + } else { + set dirfiles [lsort $matches] + } + lappend files {*}$dirfiles - set dirdirs [glob -nocomplain -dir $opt_dir -type d *] + if {[catch {glob -nocomplain -dir $opt_dir -type d *} dirdirs]} { + puts stderr "treefilenames error while listing subdirs in dir $opt_dir\n $dirdirs" + set dirdirs [list] + } + foreach dir $dirdirs { set skip 0 foreach anti $opt_antiglob_paths { if {[globmatchpath $anti $dir]} { set skip 1 - break + break } } if {$skip} { @@ -743,8 +754,8 @@ namespace eval punk::path { #[item] #[para] Arguments: # [list_begin arguments] - # [arg_def string reference] The path from which the relative path to location is determined. - # [arg_def string location] The location path which may be above or below the reference path + # [arg_def string reference] The path from which the relative path to location is determined. + # [arg_def string location] The location path which may be above or below the reference path # [list_end] #[item] #[para] Results: @@ -753,7 +764,7 @@ namespace eval punk::path { #[item] #[para] Notes: #[para] Both paths must be the same type - ie both absolute or both relative - #[para] Case sensitive. ie punk::path::relative /etc /etC + #[para] Case sensitive. ie punk::path::relative /etc /etC # will return ../etC #[para] On windows, the drive-letter component (only) is not case sensitive #[example_begin] @@ -774,7 +785,7 @@ namespace eval punk::path { #[example_begin] # P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below # - somewhere/below - # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here # - ../../lib/here #[example_end] #[list_end] @@ -791,7 +802,7 @@ namespace eval punk::path { #avoid normalizing if possible (file normalize *very* expensive on windows) set do_normalize 0 if {[file pathtype $reference] eq "relative"} { - #if reference is relative so is location + #if reference is relative so is location if {[regexp {[.]{2}} [list $reference $location]]} { set do_normalize 1 } @@ -857,7 +868,7 @@ namespace eval punk::path::lib { namespace path [namespace parent] #*** !doctools #[subsection {Namespace punk::path::lib}] - #[para] Secondary functions that are part of the API + #[para] Secondary functions that are part of the API #[list_begin definitions] @@ -877,17 +888,17 @@ namespace eval punk::path::lib { namespace eval punk::path::system { #*** !doctools #[subsection {Namespace punk::path::system}] - #[para] Internal functions that are not part of the API + #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Ready +## Ready package provide punk::path [namespace eval punk::path { variable pkg punk::path variable version - set version 0.1.0 + set version 0.1.0 }] return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm index 2d185f01..c102ca29 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm @@ -5974,13 +5974,40 @@ tcl::namespace::eval textblock { [>punk . rhs]\ [punk::lib::list_as_lines -- [lrepeat 8 " | "]] } + punk::args::define [punk::lib::tstr -return string { + @id -id ::textblock::table + @cmd -name "textblock::table" -help\ + "A wrapper for creating a textblock::class::table + + NOTE: more options available - argument definition + is incomplete" + @opts + -return -choices {table tableobject} + -rows -type list -default "" -help\ + "A list of lists. + Each toplevel element represents a row. + The number of elements in each row must + be the same. + e.g for 2 rows and 3 columns: + table -rows {{r0c0 r0c1 r0c2} {r1c0 r1c1 r1c2}} + " + -headers -type list -default "" -help\ + "This is a simplified form where each column + has a single header row. + Each element in this list goes into the top + header row for a column. + More complex header arrangements where each + column has multiple headers can be made + by using -return tableobject and calling + $tableobj configure_column -headers" + }] proc table {args} { #todo - use punk::args upvar ::textblock::class::opts_table_defaults toptdefaults set defaults [tcl::dict::create\ -rows [list]\ -headers [list]\ - -return string\ + -return table\ ] @@ -6017,7 +6044,7 @@ tcl::namespace::eval textblock { - if {$opt_return eq "string"} { + if {$opt_return eq "table"} { set result [$t print] $t destroy return $result diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.2.tm new file mode 100644 index 00000000..9270ca9c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.2.tm @@ -0,0 +1,5566 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.2 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.2] +#[copyright "2024"] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module parsing toml configuration] +#[description] +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEY = bare key and value + #QKEY = double quoted key and value ;#todo - rename to DQKEY? + #SQKEY = single quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for to_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error "tomlish _get_keyval_value keyval_element doesn't seem to be a properly structured { = } list" + } + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "tomlish Failed to find value element in KEY. '$keyval_element'" + } + if {$found_value > 1} { + error "tomlish Found multiple value elements in KEY, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL - LITERALPART { + #REVIEW + set result [list type $type value $value] + } + TABLE { + #invalid? + error "tomlish _get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::to_dict [list $found_sub]] + } + ARRAY { + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + default { + error "tomlish Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "tomlish _get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } + + + #to_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # to_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + # + #Note, currently we get a plain sub dictionary when an inline table is a direct value for a key, but an ITABLE when it's in an ARRAY - REVIEW + #Namespacing? + #ie note the difference: + #[Data] + #temp = { cpu = 79.5, case = 72.0} + # versus + #[Data] + #temps = [{cpu = 79.5, case = 72.0}] + proc to_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEY - QKEY - SQKEY { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "to_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [tcl::string::range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$table_key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a key/qkey/skey ? + + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + + #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg + } + } + + } + + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? + set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::to_dict [list $element]]] + } + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted + switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "double quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + + proc _from_dictval_tomltype {parents tablestack keys typeval} { + set type [dict get $typeval type] + set val [dict get $typeval value] + switch -- $type { + ARRAY { + set subitems [list] + foreach item $val { + lappend subitems [_from_dictval [list {*}$parents ARRAY] $tablestack $keys $item] SEP + } + if {[lindex $subitems end] eq "SEP"} { + set subitems [lrange $subitems 0 end-1] + } + return [list ARRAY {*}$subitems] + } + ITABLE { + if {$val eq ""} { + return ITABLE + } else { + return [_from_dictval [list {*}$parents ITABLE] $tablestack $keys $val] + } + } + MULTISTRING { + #value is a raw string that isn't encoded as tomlish + #create a valid toml snippet with the raw value and decode it to the proper tomlish MULTISTRING format + set tomlpart "x=\"\"\"\\\n" + append tomlpart $val "\"\"\"" + set tomlish [tomlish::decode::toml $tomlpart] + #e.g if val = " etc\nblah" + #TOMLISH {DOTTEDKEY {{KEY x}} = {MULTISTRING CONT {NEWLINE LF} {WS { }} {STRINGPART etc} {NEWLINE lf} {STRINGPART blah} } } + #lindex 1 3 is the MULTISTRING tomlish list + return [lindex $tomlish 1 3] + } + MULTILITERAL { + set tomlpart "x='''\n" + append tomlpart $val ''' + set tomlish [tomlish::decode::toml $tomlpart] + return [lindex $tomlish 1 3] + } + default { + return [list $type $val] + } + } + } + + proc _from_dictval {parents tablestack keys vinfo} { + set k [lindex $keys end] + if {[regexp {\s} $k] || [string first . $k] >= 0} { + #Any dot in the key would have been split by to_dict - so if it's present here it's part of this key - not a level separator! + #todo - QKEY? + set K_PART [list SQKEY $k] + } else { + set K_PART [list KEY $k] + } + puts stderr "---parents:'$parents' keys:'$keys' vinfo: $vinfo---" + puts stderr "---tablestack: $tablestack---" + set result [list] + set lastparent [lindex $parents end] + if {$lastparent in [list "" do_inline]} { + if {[tomlish::dict::is_tomltype $vinfo]} { + set type [dict get $vinfo type] + #treat ITABLE differently? + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} + } else { + #set result [list TABLE $k {NEWLINE lf}] + if {$vinfo ne ""} { + + #set result [list DOTTEDKEY [list [list KEY $k]] = ] + #set records [list ITABLE] + + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + + if {$lastparent eq "do_inline"} { + set result [list DOTTEDKEY [list $K_PART] =] + set records [list ITABLE] + } else { + #review - quoted k ?? + set result [list TABLE $k {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $k]] + set records [list] + } + + + + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >= 0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART {WS { }}] = {WS { }} $sublist] + } else { + if {$vv eq ""} { + #experimental + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline KEY $vk (tablestack:$tablestack)" + #set tname [tomlish::dict::name_from_tablestack [list {*}$tablestack [list T $vk]]] + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + } else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + set tablestack [list {*}$tablestack [list I $vk]] + } + } else { + if { 0 } { + #experiment.. sort of getting there. + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline2 KEYS [list {*}$keys $vk] (tablestack:$tablestack)" + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + + #review - todo? + set dottedkey_value [_from_dictval [list {*}$parents TABLE] $tablestack [list {*}$keys $vk] $vv] + lappend record {*}$dottedkey_value + + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + } + if {$dictidx != $lastidx} { + #lappend record SEP + if {$lastparent eq "do_inline"} { + lappend record SEP + } else { + lappend record {NEWLINE lf} + } + } + lappend records $record + incr dictidx + } + if {$lastparent eq "do_inline"} { + lappend result $records {NEWLINE lf} + } else { + lappend result {*}$records {NEWLINE lf} + } + } else { + if {$lastparent eq "do_inline"} { + lappend result DOTTEDKEY [list [list KEY $k]] = ITABLE {NEWLINE lf} + } else { + lappend result TABLE $k {NEWLINE lf} + } + } + } + } else { + #lastparent is not toplevel "" or "do_inline" + if {[tomlish::dict::is_tomltype $vinfo]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result {*}$sublist + } else { + if {$lastparent eq "TABLE"} { + #review + dict for {vk vv} $vinfo { + set dottedkey_value [_from_dictval [list {*}$parents DOTTEDKEY] $tablestack [list {*}$keys $vk] $vv] + lappend result [list DOTTEDKEY [list [list KEY $vk]] = $dottedkey_value {NEWLINE lf}] + } + } else { + if {$vinfo ne ""} { + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + set sub [list] + set result $lastparent ;#e.g sets ITABLE + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >=0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART] = $sublist] + } else { + if {$vv eq ""} { + #can't just uninline at this level + #we need a better method to query main dict for uninlinability at each level + # (including what's been inlined already) + #if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + # puts stderr "_from_dictval uninline2 KEY $keys" + # set tname [join [list {*}$keys $vk] .] + # set record [list TABLE $tname {NEWLINE lf}] + # set tablestack [list {*}$tablestack [list T $vk]] + #} else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + #} + } else { + #set sub [_from_dictval ITABLE $vk $vv] + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + #set record [list DOTTEDKEY [list $VK_PART] = ITABLE $dottedkey_value] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + if {$dictidx != $lastidx} { + lappend record SEP + } + lappend result $record + incr dictidx + } + } else { + puts stderr "table x-1" + lappend result DOTTEDKEY [list $K_PART] = ITABLE + } + } + } + } + return $result + } + + + proc from_dict {d} { + #consider: + # t1={a=1,b=2} + # x = 1 + #If we represent t1 as an expanded table we get + # [t1] + # a=1 + # b=2 + # x=1 + # --- which is incorrect - as x was a toplevel key like t1! + #This issue doesn't occur if x is itself an inline table + # t1={a=1,b=2} + # x= {no="problem"} + # + # (or if we were to reorder x to come before t1) + + #As the dictionary form doesn't distinguish the structure used to create tables {[table1]\nk=v} vs inline {table1={k=v}} + #Without a solution, from_dict would have to always produce the inline form for toplevel tables unless we allowed re-ordering, + #which is unpreferred here. + + #A possible solution: + #scan the top level to see if all (trailing) elements are themselves dicts + # (ie not of form {type XXX value yyy}) + # + # A further point is that if all root level values are at the 'top' - we can treat lower table-like structures as {[table]} elements + #ie we don't need to force do_inline if all the 'simple' keys are before any compound keys + + #set root_has_values 0 + #approach 1) - the naive approach - forces inline when not always necessary + #dict for {k v} $d { + # if {[llength $v] == 4 && [lindex $v 0] eq "type"} { + # set root_has_values 1 + # break + # } + #} + + + #approach 2) - track the position of last {type x value y} in the dictionary built by to_dict + # - still not perfect. Inlines dotted tables unnecessarily + #This means from_dict doesn't produce output optimal for human editing. + set last_simple [tomlish::dict::last_tomltype_posn $d] + + + ## set parent "do_inline" ;#a value used in _from_dictval to distinguish from "" or other context based parent values + #Any keys that are themselves tables - will need to be represented inline + #to avoid reordering, or incorrect assignment of plain values to the wrong table. + + ## set parent "" + #all toplevel keys in the dict structure can represent subtables. + #we are free to use {[tablename]\n} syntax for toplevel elements. + + + set tomlish [list TOMLISH] + set dictposn 0 + set tablestack [list [list T root]] ;#todo + dict for {t tinfo} $d { + if {$last_simple > $dictposn} { + set parents [list do_inline] + } else { + set parents [list ""] + } + set keys [list $t] + set trecord [_from_dictval $parents $tablestack $keys $tinfo] + lappend tomlish $trecord + incr dictposn + } + return $tomlish + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + #TODO use huddle? + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::to_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![tcl::string::is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [::list BOOL true] + } else { + return [::list BOOL false] + } + } + } + + + #TODO + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" + } + lappend pairs [list KEY $keystr = $valuepart] + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEY $n = [list STRING $v]] + } else { + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" + foreach part [lrange $item 1 end] { + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$literal''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + # ---------------------------------------------------------------------------------------------- + # NOTE: the production of tomlish from toml source doesn't indicate the toml source was valid!!! + # e.g we deliberately don't check certain things such as duplicate table declarations here. + # ---------------------------------------------------------------------------------------------- + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + + proc toml {args} { + #*** !doctools + #[call [fun toml] [arg arg...]] + #[para] return a Tcl list of tomlish tokens + + set s [join $args \n] + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + set ::tomlish::parse::state_list [list] + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below + + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) + } + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" + } + endmultiquote { + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" + } + default { + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + + incr nest -1 + + } elseif {$last_space_action eq "push"} { + set prevnest $nest + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + barekey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + quotedkey - itablequotedkey { + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL + } + default { + error "---- push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" + } + startquote { + switch -exact -- $newstate { + string-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itable-quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startmultiquote { + #review + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped_value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" + } + lappend v($nest) [list $tag $tok] + + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end-state"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg " \t"] + } + return [join $trimmed_segments .] + } + + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set sLen [tcl::string::length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {set i 0} {$i < $sLen} {incr i} { + + if {$i > 0} { + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $tablename $i] + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[tcl::string::trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen-1} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [tcl::string::trim $seg " \t"] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[tcl::string::match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[tcl::string::match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [tcl::string::length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[tcl::string::length $buffer4] < 4} { + append buffer4 $c + } + if {[tcl::string::length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[tcl::string::length $buffer8] < 8} { + append buffer8 $c + } + if {[tcl::string::length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [tcl::string::map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [tcl::string::range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[tcl::string::length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[tcl::string::last - $str] > 0} { + return 0 + } + if {[tcl::string::last + $str] > 0} { + return 0 + } + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![tcl::string::is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[tcl::string::length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [tcl::string::map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![tcl::string::is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + #review - we + proc is_datetime {str} { + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... + # + # notes: + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases + + variable stateMatrix + set stateMatrix [dict create] + + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-testing"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + + + + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } + dict set stateMatrix\ + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + + dict set stateMatrix\ + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } + dict set stateMatrix\ + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline + dict set stateMatrix\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes + dict set stateMatrix\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + + dict set stateMatrix\ + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + + dict set stateMatrix\ + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next + variable spacePopTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + + + variable state_list ;#reset every tomlish::decode::toml + + namespace export tomlish toml + namespace ensemble create + + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state + variable nest + variable v + + set prevstate $currentstate + + + variable spacePopTransitions + variable spacePushTransitions + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + set starttok "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + POPSPACE { + spacestack pop + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + } else { + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } + } + set result $next + } + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" + } else { + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (table-space) + spacestack pop + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list PUSHSPACE [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] + } + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + + set last_space_action "push" + set last_space_type "space" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" + } else { + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } + } + set result $next + } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } + default { + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word + } + } + } else { + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" + } + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "tomlish set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "tomlish set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "tomlish set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "tomlish set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "tomlish Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 + #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + + variable tokenType + variable tokenType_list + + + variable endToken + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof + variable token_waiting + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] + return 1 + } + #------------------------------ + + set resultlist [list] + set sLen [tcl::string::length $s] + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do returns inside the loop + + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #set_token_waiting type comment value "" complete 1 + incr i -1 ;#leave comment for next run + return 1 + } + untyped_value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped_value. + incr i -1 + return 1 + } + starttablename - starttablearrayname { + #fix! + error "tomlish Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } + default { + #quotedkey, itablequotedkey, string,literal, multistring + append tok $c + } + } + } else { + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + } + } + lc { + #left curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "tomlish Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "tomlish state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + + } + rc { + #right curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "tomlish right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "tomlish endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "tomlish state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + itable-space { + #handle state just to give specific error msg + error "tomlish state: '$state'. Left square bracket invalid. Cannot start array in inline table without key. Use key=\[\] syntax. [tomlish::parse::report_line]" + } + default { + error "tomlish state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + rb { + #right square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } else { + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } + } + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "tomlish state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "tomlish Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + starttablename - starttablearrayname { + error "tomlish backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + barekey { + error "tomlish Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "tomlish Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tomlish tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + } + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + append tok $c + } + 2 { + #switch? + append tok $c + set_tokenType triple_squote + return 1 + } + default { + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" + } + } + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "tomlish unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 + return 1 + } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "tomlish unexpected _start_squote_sequence length $toklen" + } + } + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } + } + } + } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" + append tok $c + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "tomlish got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + itable-keyval-value-expected { + #JMN 2025 - review + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { + if {$multi_dquote eq "\"\""} { + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" + return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 + } else { + append multi_dquote "\"" + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { + #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } + } + barekey { + #set_token_waiting type equal value = complete 1 + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } + default { + error "tomlish unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } + default { + set_tokenType "equal" + set tok = + return 1 + } + } + } + } + cr { + #REVIEW! + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #part of MLL string (multi-line literal string) + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warn "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } + stringpart { + #part of MLB string (multi-line basic string) + #jmn2025 - review + #append tok $dquotes$c + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType "newline" + set tok cr + } + } + lf { + # \n newline + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + newline { + #review + #this lf is the trailing part of a crlf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } + starttablename - tablename - tablearrayname - starttablearrayname { + error "tomlish Character is invalid in $tokenType. [tomlish::parse::report_line]" + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } + } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } + default { + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" + set tok "," + } + default { + set_tokenType "comma" + set tok "," + return 1 + } + } + } + } + . { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "tomlish Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "tomlish Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #e.g x.y = 1 + #we need to transition the barekey to become a structured table name ??? review + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 + } + default { + error "tomlish Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." + } + multiliteral-space { + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 + } + default { + set_tokenType "untyped_value" + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[tcl::string::length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #todo had_slash - emit token or error + #whitespace is a terminator for bare keys + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $dquotes$c + } + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "whitespace" + append tok $c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + if {$had_slash} { + error "tomlish unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set_token_waiting type whitespace value $c complete 1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + table-space - itable-space { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { + set_tokenType "tablename" + set tok $c + } + tablearrayname-state { + set_tokenType "tablearrayname" + set tok $c + } + dottedkey-space { + set_tokenType barekey + set tok $c + } + default { + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[tcl::string::length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err-state"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } + } + } + set_token_waiting type eof value eof complete 1 startindex $i ;#review + return 1 + } else { + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +namespace eval tomlish::dict { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + + proc is_tomltype {d} { + expr {[dict size $d] == 2 && [dict exists $d type] && [dict exists $d value]} + } + proc is_tomltype2 {d} { + upvar ::tomlish::tags tags + expr {[lindex $d 0] eq "type" && [lindex $d 1] in $tags} + } + proc last_tomltype_posn {d} { + set last_simple -1 + set dictposn [expr {[dict size $d] -1}] + foreach k [lreverse [dict keys $d]] { + set dval [dict get $d $k] + if {[is_tomltype $dval]} { + set last_simple $dictposn + break + } + incr dictposn -1 + } + return $last_simple + } + + + #review + proc name_from_tablestack {tablestack} { + set name "" + foreach tinfo [lrange $tablestack 1 end] { + lassign $tinfo type namepart + switch -- $type { + T { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + I { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + default { + #end at first break in the leading sequence of T & I tablenames + break + } + } + } + return $name + } + +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.2 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.1.0.tm index 25b01d81..91f29aa5 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.1.0.tm @@ -331,26 +331,26 @@ tcl::namespace::eval punk::args { parsing and help display. directives include: %B%@id%N% ?opt val...? - options: -id + spec-options: -id %B%@cmd%N% ?opt val...? - options: -name -help + spec-options: -name -help %B%@leaders%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for leading args that come before switches/opts) %B%@opts%N% ?opt val...? - options: -any + spec-options: -any %B%@values%N% ?opt val...? - options: -min -max + spec-options: -min -max (used for trailing args that come after switches/opts) %B%@argdisplay%N% ?opt val...? - options: -header (text for header row of table) - -body (text to replace autogenerated arg info) + spec-options: -header (text for header row of table) + -body (text to replace autogenerated arg info) %B%@doc%N% ?opt val...? - options: -name -url + spec-options: -name -url %B%@seealso%N% ?opt val...? - options: -name -url (for footer - unimplemented) + spec-options: -name -url (for footer - unimplemented) - Some other options normally present on custom arguments are available + Some other spec-options normally present on custom arguments are available to use with the @leaders @opts @values directives to set defaults for subsequent lines that represent your custom arguments. These directives should occur in exactly this order - but can be @@ -361,7 +361,12 @@ tcl::namespace::eval punk::args { or using the i .. function - an @id with -id is needed. All directives can be omitted, in which case every line represents - a custom value or option. + a custom leader, value or option. + All will be leaders by default if no options defined. + If options are defined (by naming with leading dash, or explicitly + specifying @opts) then the definitions prior to the options will be + categorised as leaders, and those following the options will be + categorised as values. Custom arguments are defined by using any word at the start of a line that doesn't begin with @ or - @@ -369,7 +374,7 @@ tcl::namespace::eval punk::args { that @@somearg becomes an argument named @somearg) custom leading args, switches/options (names starting with -) - and trailing values also take options: + and trailing values also take spec-options: -type defaults to string. If no other restrictions @@ -397,12 +402,22 @@ tcl::namespace::eval punk::args { -optional (defaults to true for flags/switches false otherwise) + For non flag/switch arguments - all arguments with + -optional true must sit consecutively within their group. + ie all optional leader arguments must be together, and all + optional value arguments must be together. Furthermore, + specifying both optional leaders and optional values will + often lead to ambiguous parsing results. Currently, all + optional non-flg/switch arguments should be either at the + trailing end of leaders or the trailing end of values. + Further unambiguous arrangements of optional args may be + made in future - but are currently considered 'unsupported' -default -multiple (for leaders & values defines whether - subsequent received values are stored agains the same - argument name - only applies to final leader or value) + subsequent received values are stored against the same + argument name - only applies to final leader OR final value) (for options/flags this allows the opt-val pair or solo - flag to appear multiple times - no necessarily contiguously) + flag to appear multiple times - not necessarily contiguously) -choices {} A list of allowable values for an argument. The -default value doesn't have to be in the list. @@ -438,7 +453,7 @@ tcl::namespace::eval punk::args { Max of -1 represents no upper limit. If allows more than one choice the value is a list consisting of items in the choices made available through - entries in -choices/-choicegrups. + entries in -choices/-choicegroups. -minsize (type dependant) -maxsize (type dependant) -range (type dependant) @@ -1667,6 +1682,7 @@ tcl::namespace::eval punk::args { " @leaders -min 0 -max 0 @opts + -return -default text -choices {text dict} -form -default 0 -help\ "Ordinal index or name of command form" @@ -1694,7 +1710,7 @@ tcl::namespace::eval punk::args { (directives are lines beginning with @ e.g @id, @cmd etc) - if -type is @leaders,@opts or @values matches from that type + if -type is leaders,opts or values matches from that type will be returned. if -type is another directive such as @id, @doc etc the @@ -1706,8 +1722,10 @@ tcl::namespace::eval punk::args { proc resolved_def {args} { + #not eating our own dogfood here as far as argument parsing. -id ::punk::args::resolved_def is for documentation/errors only. set opts [dict create\ - -types {}\ + -return text\ + -types {}\ -form 0\ -antiglobs {}\ -override {}\ @@ -1743,7 +1761,7 @@ tcl::namespace::eval punk::args { } dict for {k v} $opts { switch -- $k { - -form - -types - -antiglobs - -override {} + -return - -form - -types - -antiglobs - -override {} default { punk::args::parse $args withid ::punk::args::resolved_def return @@ -1764,163 +1782,185 @@ tcl::namespace::eval punk::args { variable id_cache_rawdef set realid [real_id $id] + if {$realid eq ""} { + return + } - if {$realid ne ""} { - set deflist [tcl::dict::get $id_cache_rawdef $realid] - set result "" - set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] + set deflist [tcl::dict::get $id_cache_rawdef $realid] + set specdict [uplevel 1 [list ::punk::args::resolve {*}$deflist]] - set opt_form [dict get $opts -form] - if {[string is integer -strict $opt_form]} { - set formname [lindex [dict get $specdict form_names] $opt_form] - } else { - set formname $opt_form - } - set opt_override [dict get $opts -override] - - #set arg_info [dict get $specdict ARG_INFO] - set arg_info [dict get $specdict FORMS $formname ARG_INFO] - set argtypes [dict create leaders leader opts option values value] - - set opt_antiglobs [dict get $opts -antiglobs] - set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] - set suppressed_directives [list] - set suppressed_args [list] - foreach ag $opt_antiglobs { - foreach d $directives { - if {[string match $ag $d]} { - lappend suppressed_directives $d - } + set opt_form [dict get $opts -form] + if {[string is integer -strict $opt_form]} { + set formname [lindex [dict get $specdict form_names] $opt_form] + } else { + set formname $opt_form + } + set opt_override [dict get $opts -override] + set opt_return [dict get $opts -return] + + #set arg_info [dict get $specdict ARG_INFO] + set arg_info [dict get $specdict FORMS $formname ARG_INFO] + set argtypes [dict create leaders leader opts option values value] + + set opt_antiglobs [dict get $opts -antiglobs] + set directives [lsearch -all -inline -exact -not $::punk::args::argdoc::resolved_def_TYPE_CHOICES *] + set suppressed_directives [list] + set suppressed_args [list] + foreach ag $opt_antiglobs { + foreach d $directives { + if {[string match $ag $d]} { + lappend suppressed_directives $d } - foreach argname [dict keys $arg_info] { - if {[string match $ag $argname]} { - lappend suppressed_args $argname - } + } + foreach argname [dict keys $arg_info] { + if {[string match $ag $argname]} { + lappend suppressed_args $argname } } - set suppressed_directives [lsort -unique $suppressed_directives] - set suppressed_args [lsort -unique $suppressed_args] + } + set suppressed_directives [lsort -unique $suppressed_directives] + set suppressed_args [lsort -unique $suppressed_args] - set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] + set included_directives [punk::args::system::punklib_ldiff $directives $suppressed_directives] - set globbed [list] - foreach pat $patterns { - set matches [dict keys $arg_info $pat] - lappend globbed {*}$matches - } - set globbed [lsort -unique $globbed] - set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - - foreach type $typelist { - switch -exact -- $type { - * { - if {"@id" in $included_directives} { - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" - } else { - append result \n "@id -id [dict get $specdict id]" - } - } - foreach directive {@package @cmd @doc @seealso @argdisplay} { - set dshort [string range $directive 1 end] - if {"$directive" in $included_directives} { - if {[dict exists $opt_override $directive]} { - append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict ${dshort}_info]" - } - } - } - #output ordered by leader, option, value - foreach pseudodirective {leaders opts values} tp {leader option value} { - set directive "@$pseudodirective" - switch -- $directive { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - - if {"$directive" in $included_directives} { - if {[dict exists $opt_override "$directive"]} { - append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" - } else { - append result \n "$directive [dict get $specdict $defaults_key]" - } - } + set globbed [list] + foreach pat $patterns { + set matches [dict keys $arg_info $pat] + lappend globbed {*}$matches + } + set globbed [lsort -unique $globbed] + set included_args [punk::args::system::punklib_ldiff $globbed $suppressed_args] - if {$pseudodirective in $included_directives} { - foreach m $included_args { - set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq $tp} { - set argspec [dict remove $argspec -ARGTYPE] - if {[dict exists $opt_override $m]} { - append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" - } else { - append result \n "$m $argspec" - } - } - } - } + set result "" + set resultdict [dict create] + foreach type $typelist { + switch -exact -- $type { + * { + if {"@id" in $included_directives} { + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] } - } - @id { - if {"@id" in $included_directives} { - #only a single id record can exist - if {[dict exists $opt_override @id]} { - append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + foreach directive {@package @cmd @doc @seealso @argdisplay} { + set dshort [string range $directive 1 end] + if {"$directive" in $included_directives} { + if {[dict exists $opt_override $directive]} { + append result \n "$directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict ${dshort}_info] [dict get $opt_override $directive]] } else { - append result \n "@id -id [dict get $specdict id]" + append result \n "$directive [dict get $specdict ${dshort}_info]" + dict set resultdict $directive [dict get $specdict ${dshort}_info] } } } - @package - @cmd - @doc - @seealso - @argdisplay { - if {"$type" in $included_directives} { - set tp [string range $type 1 end] ;# @package -> package - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" - } else { - append result \n "$type [dict get $specdict ${tp}_info]" - } + #output ordered by leader, option, value + foreach pseudodirective {leaders opts values} tp {leader option value} { + set directive "@$pseudodirective" + switch -- $directive { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} } - } - @leaders - @opts - @values { - #these are the active defaults for further arguments - if {"$type" in $included_directives} { - switch -- $type { - @leaders {set defaults_key leaderspec_defaults} - @opts {set defaults_key optspec_defaults} - @values {set defaults_key valspec_defaults} - } - if {[dict exists $opt_override $type]} { - append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + + if {"$directive" in $included_directives} { + if {[dict exists $opt_override "$directive"]} { + append result \n "$directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]]" + dict set resultdict $directive [dict merge [dict get $specdict $defaults_key] [dict get $opt_override $directive]] } else { - append result \n "$type [dict get $specdict leaderspec_defaults]" + append result \n "$directive [dict get $specdict $defaults_key]" + dict set resultdict $directive [dict get $specdict $defaults_key] } } - } - leaders - opts - values { - #pseudo-directives - if {$type in $included_directives} { + + if {$pseudodirective in $included_directives} { foreach m $included_args { set argspec [dict get $arg_info $m] - if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + if {[dict get $argspec -ARGTYPE] eq $tp} { set argspec [dict remove $argspec -ARGTYPE] if {[dict exists $opt_override $m]} { append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] } else { append result \n "$m $argspec" + dict set resultdict $m $argspec } } } } } - default { + + } + @id { + if {"@id" in $included_directives} { + #only a single id record can exist + if {[dict exists $opt_override @id]} { + append result \n "@id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]]" + dict set resultdict @id [dict merge [dict create -id [dict get $specdict id]] [dict get $opt_override @id]] + } else { + append result \n "@id -id [dict get $specdict id]" + dict set resultdict @id [list -id [dict get $specdict id]] + } } } + @package - @cmd - @doc - @seealso - @argdisplay { + if {"$type" in $included_directives} { + set tp [string range $type 1 end] ;# @package -> package + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict ${tp}_info] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict ${tp}_info]" + dict set resultdict $type [dict get $specdict ${tp}_info] + } + } + } + @leaders - @opts - @values { + #these are the active defaults for further arguments + if {"$type" in $included_directives} { + switch -- $type { + @leaders {set defaults_key leaderspec_defaults} + @opts {set defaults_key optspec_defaults} + @values {set defaults_key valspec_defaults} + } + if {[dict exists $opt_override $type]} { + append result \n "$type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]]" + dict set resultdict $type [dict merge [dict get $specdict leaderspec_defaults] [dict get $opt_override $type]] + } else { + append result \n "$type [dict get $specdict leaderspec_defaults]" + dict set resultdict $type [dict get $specdict leaderspec_defaults] + } + } + } + leaders - opts - values { + #pseudo-directives + if {$type in $included_directives} { + foreach m $included_args { + set argspec [dict get $arg_info $m] + if {[dict get $argspec -ARGTYPE] eq [dict get $argtypes $type]} { + set argspec [dict remove $argspec -ARGTYPE] + if {[dict exists $opt_override $m]} { + append result \n "$m [dict merge $argspec [dict get $opt_override $m]]" + dict set resultdict $m [dict merge $argspec [dict get $opt_override $m]] + } else { + append result \n "$m $argspec" + dict set resultdict $m $argspec + } + } + } + } + } + default { + } + } + if {$opt_return eq "text"} { + return $result + } else { + return $resultdict } - - return $result } } diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm index 51e74719..f0a4a444 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm @@ -20,7 +20,7 @@ #[manpage_begin punkshell_module_punk::path 0 0.1.0] #[copyright "2023"] #[titledesc {Filesystem path utilities}] [comment {-- Name section and table of contents description --}] -#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] +#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] #[require punk::path] #[description] #[keywords module path filesystem] @@ -104,21 +104,21 @@ namespace eval punk::path { #*** !doctools #[subsection {Namespace punk::path}] - #[para] Core API functions for punk::path + #[para] Core API functions for punk::path #[list_begin definitions] - # -- --- + # -- --- #punk::path::normjoin # - simplify . and .. segments as far as possible whilst respecting specific types of root. - # -- --- + # -- --- #a form of file normalize that supports //xxx to be treated as server path names #(ie regardless of unices ignoring (generally) leading double slashes, and regardless of windows volumerelative path syntax) - #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) - # -- --- + #(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) + # -- --- #This is intended to be purely a string analysis - without reference to filesystem volumes or vfs or zipfs mountpoints etc # #TODO - option for caller to provide a -base below which we can't backtrack. - #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share + #This is preferable to setting policy here for example regarding forcing no trackback below //servername/share #Our default is to allow trackback to: # :// # :/ @@ -128,7 +128,7 @@ namespace eval punk::path { # ./../ - (track back indefinitely on relpath as we are not resolving to anything physical and can't fully simplify the leading backtracks) # #The caller should do the file/vfs operations to determine this - not us. - # -- --- + # -- --- #simplify path with respect to /./ & /../ elements - independent of platform #NOTE: "anomalies" in standard tcl processing on windows: #e.g file normalize {//host} -> c:/host (or e.g d:/host if we happen to be on another volume) @@ -148,9 +148,9 @@ namespace eval punk::path { #known issues: #1) # normjoin d://a//b//c -> d://a/b/c - # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c + # This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c # Not considered a problem - just potentially surprising. - # To avoid it we would have to enumerate possible schemes. + # To avoid it we would have to enumerate possible schemes. # As it stands a unix system could define a 'scheme' that happens to match windows style driveletters. Consider a 'feature' ? review. # won't fix? #2) @@ -164,16 +164,16 @@ namespace eval punk::path { # normjoin ///server/share -> ///server/share #This is effectively an empty servername in the input with 'server' being pushed one level down - and the output is consistent # possibly won't fix - review - #4) inconsistency + #4) inconsistency # we return normalized //server/share for //./UNC/server share # but other dos device paths are maintained # e.g //./c:/etc # This is because such paths could contain alternate segment names (windows shortnames) which we aren't in a position to resolve. - # caller should - # #as with 'case' below - caller will need to run a post 'file normalize' + # caller should + # #as with 'case' below - caller will need to run a post 'file normalize' #5) we don't normalize case like file normalize does on windows platform. # This is intentional. It could only be done with reference to underlying filesystem which we don't want here. - # + # # ================ # #relpaths all end up with leading . - while not always the simplest form, this is ok. (helps stop inadvertent conversions to absolutes) @@ -194,14 +194,14 @@ namespace eval punk::path { /// { #if this is effectively //$emptyservername/ #then for consistency we should trail //=3 #todo - shortcircuit that here? } } - # /// - set doubleslash1_posn [string first // $path] + # /// + set doubleslash1_posn [string first // $path] # -- --- --- temp warning on windows only - no x-platform difference in result #on windows //host is of type volumerelative @@ -221,7 +221,7 @@ namespace eval punk::path { } # -- --- --- - set is_relpath 0 + set is_relpath 0 #set path [string map [list \\ /] $path] set finalparts [list] @@ -264,11 +264,11 @@ namespace eval punk::path { #normalize by dropping leading slash before split - and then treating first 2 segments as a root #set parts [file split [string range $path 1 end]] set parts [split $path /] - #assert parts here has {} {} as first 2 entries + #assert parts here has {} {} as first 2 entries set rootindex 2 #currently prefer can backtrack to the //zipfs:/ scheme (below the mountpoint - to browse other mounts) #alternative handling for //zipfs:/path - don't go below mountpoint - #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint + #but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint #review - more generally //:/path ? #todo - make an option for zipfs and others to determine the 'base' #if {"zipfs:" eq [lindex $parts 2]} { @@ -281,7 +281,7 @@ namespace eval punk::path { #set parts [file split $path] set parts [::split $path /] #e.g /a/b/c -> {} a b c - #or relative path a/b/c -> a b c + #or relative path a/b/c -> a b c #or c:/a/b/c -> c: a b c if {[string match *: [lindex $parts 0]]} { if {[lindex $parts 1] eq ""} { @@ -295,9 +295,9 @@ namespace eval punk::path { } elseif {[lindex $parts 0] ne ""} { #relpath a/b/c set parts [linsert $parts 0 .] - set rootindex 0 - #allow backtracking arbitrarily for leading .. entries - simplify where possible - #also need to stop possible conversion to absolute path + set rootindex 0 + #allow backtracking arbitrarily for leading .. entries - simplify where possible + #also need to stop possible conversion to absolute path set is_relpath 1 } } @@ -306,7 +306,7 @@ namespace eval punk::path { #puts stderr "-->baseparts:$baseparts" #ensure that if our rootindex already spans a dotted segment (after the first one) we remove it #must maintain initial . for relpaths to stop them converting to absolute via backtrack - # + # set finalparts [list [lindex $baseparts 0]] foreach b [lrange $baseparts 1 end] { if {$b ni {. ..}} { @@ -333,7 +333,7 @@ namespace eval punk::path { lappend finalparts $p } } - incr i + incr i } } else { foreach p [lrange $parts $rootindex+1 end] { @@ -345,7 +345,7 @@ namespace eval punk::path { switch -exact -- $p { . - "" {} .. { - lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 + lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 } default { lappend finalparts $p @@ -403,16 +403,16 @@ namespace eval punk::path { } - #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' - # - no volumerelative + #x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' + # - no volumerelative # - no lookup of file volumes (volume is a windows concept - but with //zipfs:/ somewhat applicable to other platforms) # - /* as absolute (covers also //zipfs:/ (volume), //server , //./etc , //./UNC) # - xxx:// as absolute (scheme) # - xxx:/ or x:/ as absolute - # - x: xxx: -> as absolute (volume-basic or volume-extended) + # - x: xxx: -> as absolute (volume-basic or volume-extended) #note also on windows - legacy name for COM devices - # COM1 = COM1: + # COM1 = COM1: # //./COM1 ?? review proc pathtype {str} { @@ -425,7 +425,7 @@ namespace eval punk::path { return absolute } - #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review + #only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review #e.g a:b:/.. or a::/.. or :/.. is not treated as volume/scheme whereas ab:/ is. set firstslash [string first / $str] if {$firstslash == -1} { @@ -434,9 +434,9 @@ namespace eval punk::path { set firstsegment [string range $str 0 $firstslash-1] } if {[set firstc [string first : $firstsegment]] > 0} { - set lhs_firstsegment [string range $firstsegment 0 $firstc-1] + set lhs_firstsegment [string range $firstsegment 0 $firstc-1] set rhs_firstsegment [string range $firstsegment $firstc+1 end] ;#exclude a:b/ etc - if {$rhs_firstsegment eq ""} { + if {$rhs_firstsegment eq ""} { set rhs_entire_path [string range $str $firstc+1 end] #assert lhs_firstsegment not empty since firstc > 0 #count following / sequence @@ -466,7 +466,7 @@ namespace eval punk::path { } } } - #assert first element of any return has been absolute or relative + #assert first element of any return has been absolute or relative return relative } @@ -489,7 +489,7 @@ namespace eval punk::path { } return $str } - #purely string based - no reference to filesystem knowledge + #purely string based - no reference to filesystem knowledge #unix-style forward slash only proc plainjoin {args} { set args [lmap a $args {string map "\\\\ /" $a}] @@ -499,12 +499,12 @@ namespace eval punk::path { set out "" foreach a $args { if {![string length $out]} { - append out [plain $a] + append out [plain $a] } else { set a [plain $a] if {[string map {/ ""} $out] eq ""} { set out [string range $out 0 end-1] - } + } if {[string map {/ ""} $a] eq ""} { #all / segment @@ -512,16 +512,16 @@ namespace eval punk::path { } else { if {[string length $a] > 2 && [string match "./*" $a]} { set a [string range $a 2 end] - } + } if {[string index $out end] eq "/"} { append out $a } else { - append out / $a + append out / $a } } } } - return $out + return $out } proc plainjoin1 {args} { if {[llength $args] == 1} { @@ -530,9 +530,9 @@ namespace eval punk::path { set out [trim_final_slash [lindex $args 0]] foreach a [lrange $args 1 end] { set a [trim_final_slash $a] - append out / $a + append out / $a } - return $out + return $out } #intention? @@ -554,13 +554,13 @@ namespace eval punk::path { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure - #[para] ** matches any number of subdirectories. + #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc #[para] any segment that does not contain ** must match exactly one segment in the path - #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc + #[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc #[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified. - #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals + #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals #todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? * @@ -572,9 +572,9 @@ namespace eval punk::path { } switch -- $seg { * {lappend pats {[^/]*}} - ** {lappend pats {.*}} + ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -614,14 +614,14 @@ namespace eval punk::path { } } } - # -- --- --- --- --- --- + # -- --- --- --- --- --- set opt_nocase [dict get $opts -nocase] - set explicit_nocase 1 ;#default to disprove + set explicit_nocase 1 ;#default to disprove if {$opt_nocase eq "\uFFFF"} { set opt_nocase 0 set explicit_nocase 0 - } - # -- --- --- --- --- --- + } + # -- --- --- --- --- --- if {$opt_nocase} { return [regexp -nocase [pathglob_as_re $pathglob] $path] } else { @@ -651,33 +651,33 @@ namespace eval punk::path { -call-depth-internal -default 0 -type integer -antiglob_paths -default {} -help\ "list of path patterns to exclude - may include * and ** path segments e.g + may include * and ** path segments e.g /usr/** (exlude subfolders based at /usr but not files within /usr itself) **/_aside (exlude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" - @values -min 0 -max -1 -optional 1 -type string + @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path within the directory tree being searched." } - #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ + #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ #then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase) proc treefilenames {args} { #*** !doctools #[call [fun treefilenames] [opt {option value...}] [opt {globpattern...}]] #[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive #[para] options: - #[para] [opt -dir] + #[para] [opt -dir] #[para] defaults to [lb]pwd[rb] - base path for tree to search - #[para] [opt -antiglob_paths] + #[para] [opt -antiglob_paths] #[para] list of path patterns to exclude - may include * and ** path segments e.g /usr/** #[para]no natsorting - so order is dependent on filesystem set argd [punk::args::parse $args withid ::punk::path::treefilenames] - lassign [dict values $argd] leaders opts values received + lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- set opt_antiglob_paths [dict get $opts -antiglob_paths] @@ -694,7 +694,7 @@ namespace eval punk::path { set opt_dir [dict get $opts -directory] } if {![file isdirectory $opt_dir]} { - return [list] + return [list] } } else { #assume/require to exist in any recursive call @@ -713,15 +713,26 @@ namespace eval punk::path { } #todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match? - set dirfiles [lsort [glob -nocomplain -dir $opt_dir -type f {*}$tailglobs]] + if {[catch {glob -nocomplain -dir $opt_dir -type f {*}$tailglobs} matches]} { + #we can get for example a permissions error + puts stderr "treefilenames error while listing files in dir $opt_dir\n $matches" + set dirfiles [list] + } else { + set dirfiles [lsort $matches] + } + lappend files {*}$dirfiles - set dirdirs [glob -nocomplain -dir $opt_dir -type d *] + if {[catch {glob -nocomplain -dir $opt_dir -type d *} dirdirs]} { + puts stderr "treefilenames error while listing subdirs in dir $opt_dir\n $dirdirs" + set dirdirs [list] + } + foreach dir $dirdirs { set skip 0 foreach anti $opt_antiglob_paths { if {[globmatchpath $anti $dir]} { set skip 1 - break + break } } if {$skip} { @@ -743,8 +754,8 @@ namespace eval punk::path { #[item] #[para] Arguments: # [list_begin arguments] - # [arg_def string reference] The path from which the relative path to location is determined. - # [arg_def string location] The location path which may be above or below the reference path + # [arg_def string reference] The path from which the relative path to location is determined. + # [arg_def string location] The location path which may be above or below the reference path # [list_end] #[item] #[para] Results: @@ -753,7 +764,7 @@ namespace eval punk::path { #[item] #[para] Notes: #[para] Both paths must be the same type - ie both absolute or both relative - #[para] Case sensitive. ie punk::path::relative /etc /etC + #[para] Case sensitive. ie punk::path::relative /etc /etC # will return ../etC #[para] On windows, the drive-letter component (only) is not case sensitive #[example_begin] @@ -774,7 +785,7 @@ namespace eval punk::path { #[example_begin] # P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below # - somewhere/below - # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here # - ../../lib/here #[example_end] #[list_end] @@ -791,7 +802,7 @@ namespace eval punk::path { #avoid normalizing if possible (file normalize *very* expensive on windows) set do_normalize 0 if {[file pathtype $reference] eq "relative"} { - #if reference is relative so is location + #if reference is relative so is location if {[regexp {[.]{2}} [list $reference $location]]} { set do_normalize 1 } @@ -857,7 +868,7 @@ namespace eval punk::path::lib { namespace path [namespace parent] #*** !doctools #[subsection {Namespace punk::path::lib}] - #[para] Secondary functions that are part of the API + #[para] Secondary functions that are part of the API #[list_begin definitions] @@ -877,17 +888,17 @@ namespace eval punk::path::lib { namespace eval punk::path::system { #*** !doctools #[subsection {Namespace punk::path::system}] - #[para] Internal functions that are not part of the API + #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ -## Ready +## Ready package provide punk::path [namespace eval punk::path { variable pkg punk::path variable version - set version 0.1.0 + set version 0.1.0 }] return diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm index 2d185f01..c102ca29 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm @@ -5974,13 +5974,40 @@ tcl::namespace::eval textblock { [>punk . rhs]\ [punk::lib::list_as_lines -- [lrepeat 8 " | "]] } + punk::args::define [punk::lib::tstr -return string { + @id -id ::textblock::table + @cmd -name "textblock::table" -help\ + "A wrapper for creating a textblock::class::table + + NOTE: more options available - argument definition + is incomplete" + @opts + -return -choices {table tableobject} + -rows -type list -default "" -help\ + "A list of lists. + Each toplevel element represents a row. + The number of elements in each row must + be the same. + e.g for 2 rows and 3 columns: + table -rows {{r0c0 r0c1 r0c2} {r1c0 r1c1 r1c2}} + " + -headers -type list -default "" -help\ + "This is a simplified form where each column + has a single header row. + Each element in this list goes into the top + header row for a column. + More complex header arrangements where each + column has multiple headers can be made + by using -return tableobject and calling + $tableobj configure_column -headers" + }] proc table {args} { #todo - use punk::args upvar ::textblock::class::opts_table_defaults toptdefaults set defaults [tcl::dict::create\ -rows [list]\ -headers [list]\ - -return string\ + -return table\ ] @@ -6017,7 +6044,7 @@ tcl::namespace::eval textblock { - if {$opt_return eq "string"} { + if {$opt_return eq "table"} { set result [$t print] $t destroy return $result diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.2.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.2.tm new file mode 100644 index 00000000..9270ca9c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.2.tm @@ -0,0 +1,5566 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.2 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.2] +#[copyright "2024"] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module parsing toml configuration] +#[description] +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEY = bare key and value + #QKEY = double quoted key and value ;#todo - rename to DQKEY? + #SQKEY = single quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for to_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error "tomlish _get_keyval_value keyval_element doesn't seem to be a properly structured { = } list" + } + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "tomlish Failed to find value element in KEY. '$keyval_element'" + } + if {$found_value > 1} { + error "tomlish Found multiple value elements in KEY, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL - LITERALPART { + #REVIEW + set result [list type $type value $value] + } + TABLE { + #invalid? + error "tomlish _get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::to_dict [list $found_sub]] + } + ARRAY { + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + default { + error "tomlish Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "tomlish _get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } + + + #to_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # to_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + # + #Note, currently we get a plain sub dictionary when an inline table is a direct value for a key, but an ITABLE when it's in an ARRAY - REVIEW + #Namespacing? + #ie note the difference: + #[Data] + #temp = { cpu = 79.5, case = 72.0} + # versus + #[Data] + #temps = [{cpu = 79.5, case = 72.0}] + proc to_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEY - QKEY - SQKEY { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "to_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [tcl::string::range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$table_key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a key/qkey/skey ? + + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + + #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg + } + } + + } + + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? + set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::to_dict [list $element]]] + } + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted + switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "double quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + + proc _from_dictval_tomltype {parents tablestack keys typeval} { + set type [dict get $typeval type] + set val [dict get $typeval value] + switch -- $type { + ARRAY { + set subitems [list] + foreach item $val { + lappend subitems [_from_dictval [list {*}$parents ARRAY] $tablestack $keys $item] SEP + } + if {[lindex $subitems end] eq "SEP"} { + set subitems [lrange $subitems 0 end-1] + } + return [list ARRAY {*}$subitems] + } + ITABLE { + if {$val eq ""} { + return ITABLE + } else { + return [_from_dictval [list {*}$parents ITABLE] $tablestack $keys $val] + } + } + MULTISTRING { + #value is a raw string that isn't encoded as tomlish + #create a valid toml snippet with the raw value and decode it to the proper tomlish MULTISTRING format + set tomlpart "x=\"\"\"\\\n" + append tomlpart $val "\"\"\"" + set tomlish [tomlish::decode::toml $tomlpart] + #e.g if val = " etc\nblah" + #TOMLISH {DOTTEDKEY {{KEY x}} = {MULTISTRING CONT {NEWLINE LF} {WS { }} {STRINGPART etc} {NEWLINE lf} {STRINGPART blah} } } + #lindex 1 3 is the MULTISTRING tomlish list + return [lindex $tomlish 1 3] + } + MULTILITERAL { + set tomlpart "x='''\n" + append tomlpart $val ''' + set tomlish [tomlish::decode::toml $tomlpart] + return [lindex $tomlish 1 3] + } + default { + return [list $type $val] + } + } + } + + proc _from_dictval {parents tablestack keys vinfo} { + set k [lindex $keys end] + if {[regexp {\s} $k] || [string first . $k] >= 0} { + #Any dot in the key would have been split by to_dict - so if it's present here it's part of this key - not a level separator! + #todo - QKEY? + set K_PART [list SQKEY $k] + } else { + set K_PART [list KEY $k] + } + puts stderr "---parents:'$parents' keys:'$keys' vinfo: $vinfo---" + puts stderr "---tablestack: $tablestack---" + set result [list] + set lastparent [lindex $parents end] + if {$lastparent in [list "" do_inline]} { + if {[tomlish::dict::is_tomltype $vinfo]} { + set type [dict get $vinfo type] + #treat ITABLE differently? + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} + } else { + #set result [list TABLE $k {NEWLINE lf}] + if {$vinfo ne ""} { + + #set result [list DOTTEDKEY [list [list KEY $k]] = ] + #set records [list ITABLE] + + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + + if {$lastparent eq "do_inline"} { + set result [list DOTTEDKEY [list $K_PART] =] + set records [list ITABLE] + } else { + #review - quoted k ?? + set result [list TABLE $k {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $k]] + set records [list] + } + + + + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >= 0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART {WS { }}] = {WS { }} $sublist] + } else { + if {$vv eq ""} { + #experimental + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline KEY $vk (tablestack:$tablestack)" + #set tname [tomlish::dict::name_from_tablestack [list {*}$tablestack [list T $vk]]] + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + } else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + set tablestack [list {*}$tablestack [list I $vk]] + } + } else { + if { 0 } { + #experiment.. sort of getting there. + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline2 KEYS [list {*}$keys $vk] (tablestack:$tablestack)" + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + + #review - todo? + set dottedkey_value [_from_dictval [list {*}$parents TABLE] $tablestack [list {*}$keys $vk] $vv] + lappend record {*}$dottedkey_value + + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + } + if {$dictidx != $lastidx} { + #lappend record SEP + if {$lastparent eq "do_inline"} { + lappend record SEP + } else { + lappend record {NEWLINE lf} + } + } + lappend records $record + incr dictidx + } + if {$lastparent eq "do_inline"} { + lappend result $records {NEWLINE lf} + } else { + lappend result {*}$records {NEWLINE lf} + } + } else { + if {$lastparent eq "do_inline"} { + lappend result DOTTEDKEY [list [list KEY $k]] = ITABLE {NEWLINE lf} + } else { + lappend result TABLE $k {NEWLINE lf} + } + } + } + } else { + #lastparent is not toplevel "" or "do_inline" + if {[tomlish::dict::is_tomltype $vinfo]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result {*}$sublist + } else { + if {$lastparent eq "TABLE"} { + #review + dict for {vk vv} $vinfo { + set dottedkey_value [_from_dictval [list {*}$parents DOTTEDKEY] $tablestack [list {*}$keys $vk] $vv] + lappend result [list DOTTEDKEY [list [list KEY $vk]] = $dottedkey_value {NEWLINE lf}] + } + } else { + if {$vinfo ne ""} { + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + set sub [list] + set result $lastparent ;#e.g sets ITABLE + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >=0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART] = $sublist] + } else { + if {$vv eq ""} { + #can't just uninline at this level + #we need a better method to query main dict for uninlinability at each level + # (including what's been inlined already) + #if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + # puts stderr "_from_dictval uninline2 KEY $keys" + # set tname [join [list {*}$keys $vk] .] + # set record [list TABLE $tname {NEWLINE lf}] + # set tablestack [list {*}$tablestack [list T $vk]] + #} else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + #} + } else { + #set sub [_from_dictval ITABLE $vk $vv] + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + #set record [list DOTTEDKEY [list $VK_PART] = ITABLE $dottedkey_value] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + if {$dictidx != $lastidx} { + lappend record SEP + } + lappend result $record + incr dictidx + } + } else { + puts stderr "table x-1" + lappend result DOTTEDKEY [list $K_PART] = ITABLE + } + } + } + } + return $result + } + + + proc from_dict {d} { + #consider: + # t1={a=1,b=2} + # x = 1 + #If we represent t1 as an expanded table we get + # [t1] + # a=1 + # b=2 + # x=1 + # --- which is incorrect - as x was a toplevel key like t1! + #This issue doesn't occur if x is itself an inline table + # t1={a=1,b=2} + # x= {no="problem"} + # + # (or if we were to reorder x to come before t1) + + #As the dictionary form doesn't distinguish the structure used to create tables {[table1]\nk=v} vs inline {table1={k=v}} + #Without a solution, from_dict would have to always produce the inline form for toplevel tables unless we allowed re-ordering, + #which is unpreferred here. + + #A possible solution: + #scan the top level to see if all (trailing) elements are themselves dicts + # (ie not of form {type XXX value yyy}) + # + # A further point is that if all root level values are at the 'top' - we can treat lower table-like structures as {[table]} elements + #ie we don't need to force do_inline if all the 'simple' keys are before any compound keys + + #set root_has_values 0 + #approach 1) - the naive approach - forces inline when not always necessary + #dict for {k v} $d { + # if {[llength $v] == 4 && [lindex $v 0] eq "type"} { + # set root_has_values 1 + # break + # } + #} + + + #approach 2) - track the position of last {type x value y} in the dictionary built by to_dict + # - still not perfect. Inlines dotted tables unnecessarily + #This means from_dict doesn't produce output optimal for human editing. + set last_simple [tomlish::dict::last_tomltype_posn $d] + + + ## set parent "do_inline" ;#a value used in _from_dictval to distinguish from "" or other context based parent values + #Any keys that are themselves tables - will need to be represented inline + #to avoid reordering, or incorrect assignment of plain values to the wrong table. + + ## set parent "" + #all toplevel keys in the dict structure can represent subtables. + #we are free to use {[tablename]\n} syntax for toplevel elements. + + + set tomlish [list TOMLISH] + set dictposn 0 + set tablestack [list [list T root]] ;#todo + dict for {t tinfo} $d { + if {$last_simple > $dictposn} { + set parents [list do_inline] + } else { + set parents [list ""] + } + set keys [list $t] + set trecord [_from_dictval $parents $tablestack $keys $tinfo] + lappend tomlish $trecord + incr dictposn + } + return $tomlish + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + #TODO use huddle? + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::to_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![tcl::string::is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [::list BOOL true] + } else { + return [::list BOOL false] + } + } + } + + + #TODO + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" + } + lappend pairs [list KEY $keystr = $valuepart] + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEY $n = [list STRING $v]] + } else { + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" + foreach part [lrange $item 1 end] { + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$literal''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + # ---------------------------------------------------------------------------------------------- + # NOTE: the production of tomlish from toml source doesn't indicate the toml source was valid!!! + # e.g we deliberately don't check certain things such as duplicate table declarations here. + # ---------------------------------------------------------------------------------------------- + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + + proc toml {args} { + #*** !doctools + #[call [fun toml] [arg arg...]] + #[para] return a Tcl list of tomlish tokens + + set s [join $args \n] + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + set ::tomlish::parse::state_list [list] + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below + + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) + } + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" + } + endmultiquote { + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" + } + default { + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + + incr nest -1 + + } elseif {$last_space_action eq "push"} { + set prevnest $nest + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + barekey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + quotedkey - itablequotedkey { + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL + } + default { + error "---- push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" + } + startquote { + switch -exact -- $newstate { + string-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itable-quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startmultiquote { + #review + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped_value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" + } + lappend v($nest) [list $tag $tok] + + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end-state"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg " \t"] + } + return [join $trimmed_segments .] + } + + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set sLen [tcl::string::length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {set i 0} {$i < $sLen} {incr i} { + + if {$i > 0} { + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $tablename $i] + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[tcl::string::trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen-1} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [tcl::string::trim $seg " \t"] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[tcl::string::match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[tcl::string::match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [tcl::string::length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[tcl::string::length $buffer4] < 4} { + append buffer4 $c + } + if {[tcl::string::length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[tcl::string::length $buffer8] < 8} { + append buffer8 $c + } + if {[tcl::string::length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [tcl::string::map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [tcl::string::range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[tcl::string::length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[tcl::string::last - $str] > 0} { + return 0 + } + if {[tcl::string::last + $str] > 0} { + return 0 + } + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![tcl::string::is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[tcl::string::length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [tcl::string::map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![tcl::string::is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + #review - we + proc is_datetime {str} { + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... + # + # notes: + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases + + variable stateMatrix + set stateMatrix [dict create] + + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-testing"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + + + + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } + dict set stateMatrix\ + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + + dict set stateMatrix\ + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } + dict set stateMatrix\ + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline + dict set stateMatrix\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes + dict set stateMatrix\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + + dict set stateMatrix\ + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + + dict set stateMatrix\ + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next + variable spacePopTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + + + variable state_list ;#reset every tomlish::decode::toml + + namespace export tomlish toml + namespace ensemble create + + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state + variable nest + variable v + + set prevstate $currentstate + + + variable spacePopTransitions + variable spacePushTransitions + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + set starttok "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + POPSPACE { + spacestack pop + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + } else { + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } + } + set result $next + } + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" + } else { + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (table-space) + spacestack pop + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list PUSHSPACE [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] + } + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + + set last_space_action "push" + set last_space_type "space" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" + } else { + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } + } + set result $next + } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } + default { + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word + } + } + } else { + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" + } + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "tomlish set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "tomlish set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "tomlish set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "tomlish set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "tomlish Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 + #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + + variable tokenType + variable tokenType_list + + + variable endToken + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof + variable token_waiting + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] + return 1 + } + #------------------------------ + + set resultlist [list] + set sLen [tcl::string::length $s] + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do returns inside the loop + + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #set_token_waiting type comment value "" complete 1 + incr i -1 ;#leave comment for next run + return 1 + } + untyped_value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped_value. + incr i -1 + return 1 + } + starttablename - starttablearrayname { + #fix! + error "tomlish Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } + default { + #quotedkey, itablequotedkey, string,literal, multistring + append tok $c + } + } + } else { + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + } + } + lc { + #left curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "tomlish Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "tomlish state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + + } + rc { + #right curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "tomlish right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "tomlish endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "tomlish state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + itable-space { + #handle state just to give specific error msg + error "tomlish state: '$state'. Left square bracket invalid. Cannot start array in inline table without key. Use key=\[\] syntax. [tomlish::parse::report_line]" + } + default { + error "tomlish state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + rb { + #right square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } else { + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } + } + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "tomlish state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "tomlish Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + starttablename - starttablearrayname { + error "tomlish backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + barekey { + error "tomlish Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "tomlish Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tomlish tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + } + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + append tok $c + } + 2 { + #switch? + append tok $c + set_tokenType triple_squote + return 1 + } + default { + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" + } + } + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "tomlish unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 + return 1 + } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "tomlish unexpected _start_squote_sequence length $toklen" + } + } + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } + } + } + } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" + append tok $c + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "tomlish got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + itable-keyval-value-expected { + #JMN 2025 - review + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { + if {$multi_dquote eq "\"\""} { + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" + return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 + } else { + append multi_dquote "\"" + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { + #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } + } + barekey { + #set_token_waiting type equal value = complete 1 + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } + default { + error "tomlish unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } + default { + set_tokenType "equal" + set tok = + return 1 + } + } + } + } + cr { + #REVIEW! + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #part of MLL string (multi-line literal string) + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warn "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } + stringpart { + #part of MLB string (multi-line basic string) + #jmn2025 - review + #append tok $dquotes$c + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType "newline" + set tok cr + } + } + lf { + # \n newline + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + newline { + #review + #this lf is the trailing part of a crlf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } + starttablename - tablename - tablearrayname - starttablearrayname { + error "tomlish Character is invalid in $tokenType. [tomlish::parse::report_line]" + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } + } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } + default { + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" + set tok "," + } + default { + set_tokenType "comma" + set tok "," + return 1 + } + } + } + } + . { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "tomlish Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "tomlish Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #e.g x.y = 1 + #we need to transition the barekey to become a structured table name ??? review + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 + } + default { + error "tomlish Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." + } + multiliteral-space { + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 + } + default { + set_tokenType "untyped_value" + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[tcl::string::length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #todo had_slash - emit token or error + #whitespace is a terminator for bare keys + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $dquotes$c + } + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "whitespace" + append tok $c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + if {$had_slash} { + error "tomlish unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set_token_waiting type whitespace value $c complete 1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + table-space - itable-space { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { + set_tokenType "tablename" + set tok $c + } + tablearrayname-state { + set_tokenType "tablearrayname" + set tok $c + } + dottedkey-space { + set_tokenType barekey + set tok $c + } + default { + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[tcl::string::length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err-state"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } + } + } + set_token_waiting type eof value eof complete 1 startindex $i ;#review + return 1 + } else { + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +namespace eval tomlish::dict { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + + proc is_tomltype {d} { + expr {[dict size $d] == 2 && [dict exists $d type] && [dict exists $d value]} + } + proc is_tomltype2 {d} { + upvar ::tomlish::tags tags + expr {[lindex $d 0] eq "type" && [lindex $d 1] in $tags} + } + proc last_tomltype_posn {d} { + set last_simple -1 + set dictposn [expr {[dict size $d] -1}] + foreach k [lreverse [dict keys $d]] { + set dval [dict get $d $k] + if {[is_tomltype $dval]} { + set last_simple $dictposn + break + } + incr dictposn -1 + } + return $last_simple + } + + + #review + proc name_from_tablestack {tablestack} { + set name "" + foreach tinfo [lrange $tablestack 1 end] { + lassign $tinfo type namepart + switch -- $type { + T { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + I { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + default { + #end at first break in the leading sequence of T & I tablenames + break + } + } + } + return $name + } + +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.2 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vendormodules/tomlish-1.1.2.tm b/src/vendormodules/tomlish-1.1.2.tm new file mode 100644 index 00000000..9270ca9c --- /dev/null +++ b/src/vendormodules/tomlish-1.1.2.tm @@ -0,0 +1,5566 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.2 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.2] +#[copyright "2024"] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module parsing toml configuration] +#[description] +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + + #IDEAS: + # since get_toml produces tomlish with whitespace/comments intact: + # tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace + # - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? + # - separate addKey?? + # - deleteKey (delete leaf) + # - deleteTable (delete table - if only has leaves? - option to delete with child tables?) + # - set/add Table? - position in doc based on existing tables/subtables? + + #The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - + # because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. + #The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n + #The newline is part of the keyval structure so makes reordering easier + #example from_toml "a=1\nb=2\n\n\n" + # 0 = TOMLISH + # 1 = KEY a = {INT 1} {NEWLINE lf} + # 2 = NEWLINE lf + # 3 = KEY b = {INT 2} {NEWLINE lf} + # 4 = NEWLINE lf + # 5 = NEWLINE lf + + + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEY = bare key and value + #QKEY = double quoted key and value ;#todo - rename to DQKEY? + #SQKEY = single quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT DOTTEDKEY KEY QKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for to_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error "tomlish _get_keyval_value keyval_element doesn't seem to be a properly structured { = } list" + } + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "tomlish Failed to find value element in KEY. '$keyval_element'" + } + if {$found_value > 1} { + error "tomlish Found multiple value elements in KEY, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL - LITERALPART { + #REVIEW + set result [list type $type value $value] + } + TABLE { + #invalid? + error "tomlish _get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::to_dict [list $found_sub]] + } + ARRAY { + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::to_dict [list $found_sub]]] + } + default { + error "tomlish Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "tomlish _get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } + + + #to_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # to_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + # + #Note, currently we get a plain sub dictionary when an inline table is a direct value for a key, but an ITABLE when it's in an ARRAY - REVIEW + #Namespacing? + #ie note the difference: + #[Data] + #temp = { cpu = 79.5, case = 72.0} + # versus + #[Data] + #temps = [{cpu = 79.5, case = 72.0}] + proc to_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEY - QKEY - SQKEY { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + if {$tag eq "QKEY"} { + set key [::tomlish::utils::unescape_string $key] + } + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + DOTTEDKEY { + log::debug "--> processing $tag: $item" + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { + #empty?? probably invalid. review + #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively + error "DOTTED key has no parts - invalid? '$item'" + } elseif {[llength $dotted_key_hierarchy] == 1} { + #dottedkey is only a key - no table component + set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] + } else { + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] + } + + #ensure empty tables are still represented in the datastructure + set pathkeys [list] + foreach k $table_hierarchy { + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] + } else { + tomlish::log::notice "to_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" + } + } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [tcl::string::index $rawseg 0] + set c2 [tcl::string::index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [tcl::string::range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [tcl::string::range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$table_key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a key/qkey/skey ? + + set testkey [join $table_key_hierarchy_raw .] + + set testkey_length [llength $table_key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + + #VVV the test below is wrong VVV! + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + puts stderr "testkey:'$testkey' vs seen_match:'$seen_match'" + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + append msg "tablenames_seen:" + foreach ts $tablenames_seen { + append msg " " $ts \n + } + error $msg + } + } + + } + + #ensure empty tables are still represented in the datastructure + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" + } + } + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + KEY - QKEY - SQKEY { + #obsolete ? + set keyval_key [lindex $element 1] + if {$type eq "QKEY"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "to_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,QKEY,SQKEY,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::to_dict [list $element]]] + } + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted + switch -exact -- $type { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "double quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + + proc _from_dictval_tomltype {parents tablestack keys typeval} { + set type [dict get $typeval type] + set val [dict get $typeval value] + switch -- $type { + ARRAY { + set subitems [list] + foreach item $val { + lappend subitems [_from_dictval [list {*}$parents ARRAY] $tablestack $keys $item] SEP + } + if {[lindex $subitems end] eq "SEP"} { + set subitems [lrange $subitems 0 end-1] + } + return [list ARRAY {*}$subitems] + } + ITABLE { + if {$val eq ""} { + return ITABLE + } else { + return [_from_dictval [list {*}$parents ITABLE] $tablestack $keys $val] + } + } + MULTISTRING { + #value is a raw string that isn't encoded as tomlish + #create a valid toml snippet with the raw value and decode it to the proper tomlish MULTISTRING format + set tomlpart "x=\"\"\"\\\n" + append tomlpart $val "\"\"\"" + set tomlish [tomlish::decode::toml $tomlpart] + #e.g if val = " etc\nblah" + #TOMLISH {DOTTEDKEY {{KEY x}} = {MULTISTRING CONT {NEWLINE LF} {WS { }} {STRINGPART etc} {NEWLINE lf} {STRINGPART blah} } } + #lindex 1 3 is the MULTISTRING tomlish list + return [lindex $tomlish 1 3] + } + MULTILITERAL { + set tomlpart "x='''\n" + append tomlpart $val ''' + set tomlish [tomlish::decode::toml $tomlpart] + return [lindex $tomlish 1 3] + } + default { + return [list $type $val] + } + } + } + + proc _from_dictval {parents tablestack keys vinfo} { + set k [lindex $keys end] + if {[regexp {\s} $k] || [string first . $k] >= 0} { + #Any dot in the key would have been split by to_dict - so if it's present here it's part of this key - not a level separator! + #todo - QKEY? + set K_PART [list SQKEY $k] + } else { + set K_PART [list KEY $k] + } + puts stderr "---parents:'$parents' keys:'$keys' vinfo: $vinfo---" + puts stderr "---tablestack: $tablestack---" + set result [list] + set lastparent [lindex $parents end] + if {$lastparent in [list "" do_inline]} { + if {[tomlish::dict::is_tomltype $vinfo]} { + set type [dict get $vinfo type] + #treat ITABLE differently? + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} + } else { + #set result [list TABLE $k {NEWLINE lf}] + if {$vinfo ne ""} { + + #set result [list DOTTEDKEY [list [list KEY $k]] = ] + #set records [list ITABLE] + + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + + if {$lastparent eq "do_inline"} { + set result [list DOTTEDKEY [list $K_PART] =] + set records [list ITABLE] + } else { + #review - quoted k ?? + set result [list TABLE $k {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $k]] + set records [list] + } + + + + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >= 0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART {WS { }}] = {WS { }} $sublist] + } else { + if {$vv eq ""} { + #experimental + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline KEY $vk (tablestack:$tablestack)" + #set tname [tomlish::dict::name_from_tablestack [list {*}$tablestack [list T $vk]]] + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + } else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + set tablestack [list {*}$tablestack [list I $vk]] + } + } else { + if { 0 } { + #experiment.. sort of getting there. + if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + puts stderr "_from_dictval could uninline2 KEYS [list {*}$keys $vk] (tablestack:$tablestack)" + set tname [join [list {*}$keys $vk] .] + set record [list TABLE $tname {NEWLINE lf}] + set tablestack [list {*}$tablestack [list T $vk]] + + #review - todo? + set dottedkey_value [_from_dictval [list {*}$parents TABLE] $tablestack [list {*}$keys $vk] $vv] + lappend record {*}$dottedkey_value + + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } else { + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + } + if {$dictidx != $lastidx} { + #lappend record SEP + if {$lastparent eq "do_inline"} { + lappend record SEP + } else { + lappend record {NEWLINE lf} + } + } + lappend records $record + incr dictidx + } + if {$lastparent eq "do_inline"} { + lappend result $records {NEWLINE lf} + } else { + lappend result {*}$records {NEWLINE lf} + } + } else { + if {$lastparent eq "do_inline"} { + lappend result DOTTEDKEY [list [list KEY $k]] = ITABLE {NEWLINE lf} + } else { + lappend result TABLE $k {NEWLINE lf} + } + } + } + } else { + #lastparent is not toplevel "" or "do_inline" + if {[tomlish::dict::is_tomltype $vinfo]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] + lappend result {*}$sublist + } else { + if {$lastparent eq "TABLE"} { + #review + dict for {vk vv} $vinfo { + set dottedkey_value [_from_dictval [list {*}$parents DOTTEDKEY] $tablestack [list {*}$keys $vk] $vv] + lappend result [list DOTTEDKEY [list [list KEY $vk]] = $dottedkey_value {NEWLINE lf}] + } + } else { + if {$vinfo ne ""} { + set lastidx [expr {[dict size $vinfo] -1}] + set dictidx 0 + set sub [list] + set result $lastparent ;#e.g sets ITABLE + set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] + dict for {vk vv} $vinfo { + if {[regexp {\s} $vk] || [string first . $vk] >=0} { + set VK_PART [list SQKEY $vk] + } else { + set VK_PART [list KEY $vk] + } + if {[tomlish::dict::is_tomltype $vv]} { + #type x value y + set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] + set record [list DOTTEDKEY [list $VK_PART] = $sublist] + } else { + if {$vv eq ""} { + #can't just uninline at this level + #we need a better method to query main dict for uninlinability at each level + # (including what's been inlined already) + #if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { + # puts stderr "_from_dictval uninline2 KEY $keys" + # set tname [join [list {*}$keys $vk] .] + # set record [list TABLE $tname {NEWLINE lf}] + # set tablestack [list {*}$tablestack [list T $vk]] + #} else { + set record [list DOTTEDKEY [list $VK_PART] = ITABLE] + #} + } else { + #set sub [_from_dictval ITABLE $vk $vv] + set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] + #set record [list DOTTEDKEY [list $VK_PART] = ITABLE $dottedkey_value] + set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] + } + } + if {$dictidx != $lastidx} { + lappend record SEP + } + lappend result $record + incr dictidx + } + } else { + puts stderr "table x-1" + lappend result DOTTEDKEY [list $K_PART] = ITABLE + } + } + } + } + return $result + } + + + proc from_dict {d} { + #consider: + # t1={a=1,b=2} + # x = 1 + #If we represent t1 as an expanded table we get + # [t1] + # a=1 + # b=2 + # x=1 + # --- which is incorrect - as x was a toplevel key like t1! + #This issue doesn't occur if x is itself an inline table + # t1={a=1,b=2} + # x= {no="problem"} + # + # (or if we were to reorder x to come before t1) + + #As the dictionary form doesn't distinguish the structure used to create tables {[table1]\nk=v} vs inline {table1={k=v}} + #Without a solution, from_dict would have to always produce the inline form for toplevel tables unless we allowed re-ordering, + #which is unpreferred here. + + #A possible solution: + #scan the top level to see if all (trailing) elements are themselves dicts + # (ie not of form {type XXX value yyy}) + # + # A further point is that if all root level values are at the 'top' - we can treat lower table-like structures as {[table]} elements + #ie we don't need to force do_inline if all the 'simple' keys are before any compound keys + + #set root_has_values 0 + #approach 1) - the naive approach - forces inline when not always necessary + #dict for {k v} $d { + # if {[llength $v] == 4 && [lindex $v 0] eq "type"} { + # set root_has_values 1 + # break + # } + #} + + + #approach 2) - track the position of last {type x value y} in the dictionary built by to_dict + # - still not perfect. Inlines dotted tables unnecessarily + #This means from_dict doesn't produce output optimal for human editing. + set last_simple [tomlish::dict::last_tomltype_posn $d] + + + ## set parent "do_inline" ;#a value used in _from_dictval to distinguish from "" or other context based parent values + #Any keys that are themselves tables - will need to be represented inline + #to avoid reordering, or incorrect assignment of plain values to the wrong table. + + ## set parent "" + #all toplevel keys in the dict structure can represent subtables. + #we are free to use {[tablename]\n} syntax for toplevel elements. + + + set tomlish [list TOMLISH] + set dictposn 0 + set tablestack [list [list T root]] ;#todo + dict for {t tinfo} $d { + if {$last_simple > $dictposn} { + set parents [list do_inline] + } else { + set parents [list ""] + } + set keys [list $t] + set trecord [_from_dictval $parents $tablestack $keys $tinfo] + lappend tomlish $trecord + incr dictposn + } + return $tomlish + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + #TODO use huddle? + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::to_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [tcl::string::tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![tcl::string::is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [::list BOOL true] + } else { + return [::list BOOL false] + } + } + } + + + #TODO + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} + # (accept also key value {STRING }) + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 4} { + if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { + error "Only items tagged as KEY = currently accepted as name-value pairs for table command" + } + lassign $t _k keystr _eq valuepart + if {[llength $valuepart] != 2} { + error "supplied value must be typed. e.g {INT 1} or {STRING test}" + } + lappend pairs [list KEY $keystr = $valuepart] + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEY $n = [list STRING $v]] + } else { + error "'KEY = { toml but + # the first newline is not part of the data. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. + set literal "" + foreach part [lrange $item 1 end] { + append literal [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$literal''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + # ---------------------------------------------------------------------------------------------- + # NOTE: the production of tomlish from toml source doesn't indicate the toml source was valid!!! + # e.g we deliberately don't check certain things such as duplicate table declarations here. + # ---------------------------------------------------------------------------------------------- + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + + proc toml {args} { + #*** !doctools + #[call [fun toml] [arg arg...]] + #[para] return a Tcl list of tomlish tokens + + set s [join $args \n] + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "table-space" + ::tomlish::parse::spacestack push {type space state table-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + set ::tomlish::parse::state_list [list] + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] + #review goNextState could perform more than one space_action + set space_action [dict get $transition_info space_action] + set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below + + if {[tcl::string::match "err-*" $state]} { + ::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" + lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] + return $v(0) + } + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + + if {$space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + set parentlevel [expr {$nest -1}] + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + switch -exact -- $tokenType { + squote_seq { + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the xxx-squote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + switch -- $tok { + ' { + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + } + '' { + #review - we should perhaps return double_squote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] + } + ''' { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] + } + '''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 4 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the last for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + default { + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + ''''' { + switch -exact -- $prevstate { + leading-squote-space { + error "---- 5 squotes from leading-squote-space - shouldn't get here" + #we should have emitted the triple and left the following squotes for next loop + } + trailing-squote-space { + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE { + lappend v($parentlevel) [LITERALPART "''"] + } + default { + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" + } + } + } + default { + error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + } + } + } + } + puts "---- HERE squote_seq pop <$tok>" + } + triple_squote { + #presumably popping multiliteral-space + ::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTILITERAL { + lappend merged $part + } + LITERALPART { + if {$lasttype eq "LITERALPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + equal { + if {$prevstate eq "dottedkey-space"} { + tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" + #re-emit for parent space + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] + } + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + ::tomlish::log::debug "---- endinlinetable for last_space_action pop" + } + endmultiquote { + ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" + } + default { + error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + if {$do_append_to_parent} { + #e.g squote_seq does it's own appends as necessary - so won't get here + lappend v($parentlevel) [set v($nest)] + } + + incr nest -1 + + } elseif {$last_space_action eq "push"} { + set prevnest $nest + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + + + switch -exact -- $tokenType { + squote_seq_begin { + #### + if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { + lassign [dict get $transition_info starttok] starttok_type starttok_val + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType $starttok_type + set tok $starttok_val + } + } + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + barekey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + } + startsquote { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + quotedkey - itablequotedkey { + set v($nest) [list QKEY $tok] ;#$tok is the keyname + } + itablesquotedkey { + set v($nest) [list SQKEY $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + ::tomlish::log::debug "---- push trigger tokenType startmultiquote" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL + } + default { + error "---- push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + squotedkey { + puts "---- squotedkey in state $prevstate (no space level change)" + lappend v($nest) [list SQKEY $tok] + } + barekey { + lappend v($nest) [list KEY $tok] + } + dotsep { + lappend v($nest) [list DOTSEP] + } + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" + } + startquote { + switch -exact -- $newstate { + string-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itable-quoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startsquote { + switch -exact -- $newstate { + literal-state { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "literal" + set tok "" + } + squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itable-squoted-key { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + multiliteral-space { + #false alarm squote returned from squote_seq pop + ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" + #(single squote - not terminating space) + lappend v($nest) [list LITERALPART '] + #may need to be joined on pop if there are neighbouring LITERALPARTs + } + default { + error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + startmultiquote { + #review + puts stderr "---- got startmultiquote in state $prevstate (no space level change)" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } + double_squote { + switch -exact -- $prevstate { + keyval-value-expected { + lappend v($nest) [list LITERAL ""] + } + multiliteral-space { + #multiliteral-space to multiliteral-space + lappend v($nest) [list LITERALPART ''] + } + default { + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly + } + multiliteral { + lappend v($nest) [LIST MULTILITERAL $tok] + } + literalpart { + lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped_value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" + } + lappend v($nest) [list $tag $tok] + + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end-state"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg " \t"] + } + return [join $trimmed_segments .] + } + + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [tcl::string::length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set sLen [tcl::string::length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {set i 0} {$i < $sLen} {incr i} { + + if {$i > 0} { + set lastChar [tcl::string::index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $tablename $i] + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[tcl::string::trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen-1} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [tcl::string::trim $seg] + if {[tcl::string::index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [tcl::string::trim $seg " \t"] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[tcl::string::match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[tcl::string::match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [tcl::string::length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[tcl::string::length $buffer4] < 4} { + append buffer4 $c + } + if {[tcl::string::length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[tcl::string::length $buffer8] < 8} { + append buffer8 $c + } + if {[tcl::string::length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [tcl::string::map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [tcl::string::index $rawkey 0] + set c2 [tcl::string::index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [tcl::string::range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[tcl::string::length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[tcl::string::last - $str] > 0} { + return 0 + } + if {[tcl::string::last + $str] > 0} { + return 0 + } + set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![tcl::string::is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [tcl::string::map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[tcl::string::length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [tcl::string::map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![tcl::string::is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [tcl::string::length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + #review - we + proc is_datetime {str} { + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? + + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # table-space, itable-space, array-space + # value-expected, keyval-syntax, + # quoted-key, squoted-key + # string-state, literal-state, multistring... + # + # notes: + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + + # + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push instruction and the name of the space to push on the stack. + # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) + + # -- --- --- --- --- --- + #token/state naming guide + # -- --- --- --- --- --- + #tokens : underscore separated or bare name e.g newline, start_quote, start_squote + #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence + #states : always contain at least one dash e.g err-state, table-space + #instructions + # -- --- --- --- --- --- + + + #stateMatrix dict of elements mapping current state to next state based on returned tokens + # current-state {token-encountered next-state ... } + # where next-state can be a 1 or 2 element list. + #If 2 element - the first item is an instruction (ucase) + #If 1 element - it is either a lowercase dashed state name or an ucase instruction + #e.g {PUSHSPACE } or POPSPACE or SAMESPACE + + + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases + + variable stateMatrix + set stateMatrix [dict create] + + #xxx-space vs xxx-syntax inadequately documented - TODO + + # --------------------------------------------------------------------------------------------------------------# + # incomplete example of some state starting at table-space + # --------------------------------------------------------------------------------------------------------------# + # ( = -> value-expected) + # keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) + # keyval-space (autotransition on push ^) + # table-space (barekey^) (startquote -> quoted-key ^) + # --------------------------------------------------------------------------------------------------------------# + + dict set stateMatrix\ + table-space { + bom "table-space"\ + whitespace "table-space"\ + newline "table-space"\ + barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ + squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ + startquote "quoted-key"\ + XXXstartsquote "squoted-key"\ + comment "table-space"\ + starttablename "tablename-state"\ + starttablearrayname "tablearrayname-state"\ + startmultiquote "err-state"\ + endquote "err-state"\ + comma "err-state"\ + eof "end-state"\ + equal "err-state"\ + } + + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-testing"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + + + dict set stateMatrix\ + keyval-space {\ + whitespace "keyval-syntax"\ + equal "keyval-value-expected"\ + } + + # ' = ' portion of keyval + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "keyval-value-expected"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + #### + dict set stateMatrix\ + keyval-value-expected {\ + whitespace "keyval-value-expected"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ + } + #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} + dict set stateMatrix\ + leading-squote-space {\ + squote_seq "POPSPACE"\ + } + #dict set stateMatrix\ + # keyval-process-leading-squotes {\ + # startsquote "literal-state"\ + # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + # } + + dict set stateMatrix\ + keyval-tail {\ + whitespace "keyval-tail"\ + newline "POPSPACE"\ + comment "keyval-tail"\ + eof "end-state"\ + } + + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + + + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + untyped_value {"SAMESPACE" "" replay untyped_value}\ + startquote "string-state"\ + startsquote "literal-state"\ + startmultiquote {PUSHSPACE "multistring-space"}\ + triple_squote {PUSHSPACE "multiliteral-space"}\ + startinlinetable {PUSHSPACE itable-space}\ + startarray {PUSHSPACE array-space}\ + comment "err-state-value-expected-got-comment"\ + comma "err-state"\ + newline "err-state"\ + eof "err-state"\ + } + + #dottedkey-space is not used within [tablename] or [[tablearrayname]] + #it is for keyval ie x.y.z = value + dict set stateMatrix\ + dottedkey-space {\ + whitespace "dottedkey-space"\ + dotsep "dottedkey-space"\ + barekey "dottedkey-space"\ + squotedkey "dottedkey-space"\ + quotedkey "dottedkey-space"\ + equal "POPSPACE"\ + newline "err-state"\ + comma "err-state"\ + comment "err-state"\ + } + #dottedkeyend "POPSPACE" + + + + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ + endinlinetable "POPSPACE"\ + startquote "itable-quoted-key"\ + comma "itable-space"\ + comment "itable-space"\ + eof "err-state"\ + } + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? + + + #JMN REVIEW + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE "array-space"}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startinlinetable {PUSHSPACE itable-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ + comma "array-space"\ + comment "array-space"\ + eof "err-state-array-space-got-eof"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped_value "SAMESPACE"\ + startarray {PUSHSPACE array-space}\ + endarray "POPSPACE"\ + startmultiquote {PUSHSPACE multistring-space}\ + startquote "string-state"\ + startsquote "literal-state"\ + comma "array-space"\ + comment "err-state"\ + } + + + + #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space + dict set stateMatrix\ + quoted-key {\ + whitespace "NA"\ + quotedkey {PUSHSPACE "keyval-space"}\ + newline "err-state"\ + endquote "keyval-syntax"\ + } + dict set stateMatrix\ + squoted-key {\ + whitespace "NA"\ + squotedkey "squoted-key"\ + newline "err-state"\ + } + # endsquote {PUSHSPACE "keyval-space"} + + dict set stateMatrix\ + string-state {\ + whitespace "NA"\ + string "string-state"\ + endquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + literal-state {\ + whitespace "NA"\ + literal "literal-state"\ + endsquote "SAMESPACE"\ + newline "err-state"\ + eof "err-state"\ + } + + + #dict set stateMatrix\ + # stringpart {\ + # continuation "SAMESPACE"\ + # endmultiquote "POPSPACE"\ + # eof "err-state"\ + # } + dict set stateMatrix\ + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "POPSPACE"\ + eof "err-state"\ + } + + + #only valid subparts are literalpart and newline. other whitespace etc is within literalpart + #todo - treat sole cr as part of literalpart but crlf and lf as newline + dict set stateMatrix\ + multiliteral-space {\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ + eof "err-premature-eof-in-multiliteral-space"\ + } + + #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes + dict set stateMatrix\ + trailing-squote-space {\ + squote_seq "POPSPACE"\ + } + + + dict set stateMatrix\ + tablename-state {\ + whitespace "NA"\ + tablename {zeropoppushspace table-space}\ + tablename2 {PUSHSPACE table-space}\ + endtablename "tablename-tail"\ + comma "err-state"\ + newline "err-state"\ + } + dict set stateMatrix\ + tablearrayname-state {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace table-space}\ + tablearrayname2 {PUSHSPACE table-space}\ + endtablearray "tablearrayname-tail"\ + comma "err-state"\ + newline "err-state"\ + } + + dict set stateMatrix\ + tablename-tail {\ + whitespace "tablename-tail"\ + newline "table-space"\ + comment "tablename-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + tablearrayname-tail {\ + whitespace "tablearrayname-tail"\ + newline "table-space"\ + comment "tablearrayname-tail"\ + eof "end-state"\ + } + dict set stateMatrix\ + end-state {} + + set knowntokens [list] + set knownstates [list] + dict for {state transitions} $stateMatrix { + if {$state ni $knownstates} {lappend knownstates $state} + dict for {tok instructions} $transitions { + if {$tok ni $knowntokens} {lappend knowntokens $tok} + } + } + dict set stateMatrix nostate {} + foreach tok $knowntokens { + dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" + } + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set instruction [lindex $transition_to 0] + switch -exact -- $instruction { + PUSHSPACE - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' + + #Push to, next + #default first states when we push to these spaces + variable spacePushTransitions { + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state + } + #itable-space itable-space + #Pop to, next + variable spacePopTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + #review + #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail + #leave it out and make the POPSPACE caller explicitly specify it + #keyval-space keyval-tail + + variable spaceSameTransitions { + array-space array-syntax + } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail + + + variable state_list ;#reset every tomlish::decode::toml + + namespace export tomlish toml + namespace ensemble create + + #goNextState has various side-effects e.g pushes and pops spacestack + #REVIEW - setting nest and v elements here is ugly + #todo - make neater, more single-purpose? + proc goNextState {tokentype tok currentstate} { + variable state + variable nest + variable v + + set prevstate $currentstate + + + variable spacePopTransitions + variable spacePushTransitions + variable spaceSameTransitions + + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + set starttok "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + POPSPACE { + spacestack pop + set parent_info [spacestack peek] + set type [dict get $parent_info type] + set parentspace [dict get $parent_info state] + + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $parent_info returnstate]} { + set next [dict get $parent_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + } else { + ### + #review - do away with spacePopTransitions - which although useful to provide a default.. + # - involve error-prone configurations distant to the main state transition configuration in stateMatrix + if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { + set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + } else { + set next $parentspace + ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + } + } + set result $next + } + SAMESPACE { + set currentspace_info [spacestack peek] + ::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" + set type [dict get $currentspace_info type] + set currentspace [dict get $currentspace_info state] + + if {[dict exists $currentspace_info returnstate]} { + set next [dict get $currentspace_info returnstate] + #clear the returnstate on current level + set existing [spacestack pop] + dict unset existing returnstate + spacestack push $existing ;#re-push modification + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" + } else { + if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { + set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" + } else { + set next $currentspace + ::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" + } + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (table-space) + spacestack pop + set parentinfo [spacestack peek] + set type [dict get $parentinfo type] + set target [dict get $parentinfo state] + + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list PUSHSPACE [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + #::tomlish::log::notice "goNextState REENTRANCY. calling goNextState $nexttokentype $tokentype" + #set result [::tomlish::parse::goNextState $nexttokentype $tokentype] + ::tomlish::log::debug "--->> zeropoppushspace goNextState REENTRANCY. calling goNextState $nexttokentype $currentstate" + set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] + set result [dict get $transition_info newstate] + } + PUSHSPACE { + set original_target [dict get $transition_to PUSHSPACE] + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + if {[dict exists $transition_to starttok]} { + set starttok [dict get $transition_to starttok] + } + spacestack push [dict create type space state $original_target] + + set last_space_action "push" + set last_space_type "space" + + if {[dict exists $transition_to state]} { + #an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) + set next [dict get $transition_to state] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" + } else { + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $original_target] + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " + } else { + set next $original_target + ::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" + } + } + set result $next + } + TOSTATE { + if {[dict exists $transition_to returnstate]} { + #adjust the existing space record on the stack. + #struct::stack doesn't really support that - so we have to pop and re-push + #todo - investigate a custom stack implementation where we can efficiently lset the top of the stack + set currentspace [spacestack pop] + dict set currentspace returnstate [dict get $transition_to returnstate] + spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. + } + set result [dict get $transition_to TOSTATE] + } + default { + #simplified version of TOSTATE + set result [lindex $transition_to 0] ;#ignore everything but first word + } + } + } else { + ::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" + set result "nostate" + } + lappend state_list [list tokentype $tokentype from $currentstate to $result] + set state $result + ::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " + return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY QKEY SQKEY TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. + proc set_token_waiting {args} { + if {[llength $args] %2 != 0} { + error "tomlish set_token_waiting must have args of form: type value complete 0|1" + } + variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "tomlish set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] + dict for {k v} $args { + switch -exact $k { + type - complete { + dict set waiting $k $v + } + value { + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v + } + default { + error "tomlish set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" + } + } + } + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "tomlish set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "tomlish Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] + } + return + } + + #returns 0 or 1 + #tomlish::parse::tok + #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag + # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + + variable tokenType + variable tokenType_list + + + variable endToken + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof + variable token_waiting + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] + return 1 + } + #------------------------------ + + set resultlist [list] + set sLen [tcl::string::length $s] + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [tcl::string::index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [tcl::string::index $s $i] + set cindex $i + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do returns inside the loop + + set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #set_token_waiting type comment value "" complete 1 + incr i -1 ;#leave comment for next run + return 1 + } + untyped_value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped_value. + incr i -1 + return 1 + } + starttablename - starttablearrayname { + #fix! + error "tomlish Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } + default { + #quotedkey, itablequotedkey, string,literal, multistring + append tok $c + } + } + } else { + switch -- $state { + multistring-space { + set_tokenType stringpart + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + } + } + lc { + #left curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "tomlish Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + itable-keyval-value-expected - keyval-value-expected - value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\{" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\{" + } + default { + error "tomlish state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + + } + rc { + #right curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex + return 1 + } + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + itable-val-tail { + #review + error "tomlish right-curly in itable-val-tail" + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname-state problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + curly-syntax { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itable-val-tail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itable-keyval-syntax { + error "tomlish endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\}" + } + multiliteral-space { + set_tokenType "literalpart" ; #review + set tok "\}" + } + default { + #JMN2024b keyval-tail? + error "tomlish state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + + } + lb { + #left square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename { + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + keyval-value-expected - itable-keyval-value-expected - value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + table-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\[" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\[" + } + itable-space { + #handle state just to give specific error msg + error "tomlish state: '$state'. Left square bracket invalid. Cannot start array in inline table without key. Use key=\[\] syntax. [tomlish::parse::report_line]" + } + default { + error "tomlish state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + rb { + #right square bracket + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } else { + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess + return 1 + } + } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + set_token_waiting type endtablename value "" complete 1 startindex $cindex + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } + } + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + table-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename-state { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname-state { + error "tomlish unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\]" + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\]" + } + default { + error "tomlish state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "tomlish Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + literal - literalpart - squotedkey - itablesquotedkey { + #never need to set slash_active true when in single quoted tokens + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + starttablename - starttablearrayname { + error "tomlish backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + barekey { + error "tomlish Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "tomlish Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } + } + multiliteral-space { + #nothing can be escaped in multiliteral-space - not even squotes (?) review + set_tokenType "literalpart" + set tok "\\" + } + default { + error "tomlish tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + } + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + #short squote_seq tokens are returned if active during any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + switch -- $state { + leading-squote-space { + append tok $c + if {$existingtoklen > 2} { + error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" + } elseif {$existingtoklen == 2} { + return 1 ;#return tok ''' + } + } + trailing-squote-space { + append tok $c + if {$existingtoklen == 4} { + #maxlen to be an squote_seq is multisquote + 2 = 5 + #return tok ''''' + return 1 + } + } + default { + error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" + } + } + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + #temp token creatable only during value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + append tok $c + } + 2 { + #switch? + append tok $c + set_tokenType triple_squote + return 1 + } + default { + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" + } + } + } + literal { + #slash_active always false + #terminate the literal + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + literalpart { + #ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) + #todo + # idea: end this literalpart (possibly 'temporarily') + # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack + # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) + incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + return 1 + } + itablesquotedkey { + set_token_waiting type endsquote value "'" complete 1 startindex $cindex + return 1 + } + squotedkey { + ### + #set_token_waiting type endsquote value "'" complete 1 + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + set_tokenType "_start_squote_sequence" + set tok "'" + } + itable-keyval-value-expected - keyval-value-expected { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + table-space { + ### + set_tokenType "squotedkey" + set tok "" + } + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + tablename-state { + #first char in tablename-state/tablearrayname-state + set_tokenType tablename + append tok "'" + } + tablearrayname-state { + set_tokenType tablearrayname + append tok "'" + } + literal-state { + tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" + set_tokenType literal + incr -1 + return 1 + } + multistring-space { + error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" + } + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up an squote_seq to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "squote_seq_begin" + set tok "'" + return 1 + } + dottedkey-space { + set_tokenType squotedkey + } + default { + error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "tomlish unexpected token length $toklen in 'startquotesequence'" + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 + return 1 + } + 2 { + set_tokenType "startsquote" + incr i -2 + return 1 + } + default { + error "tomlish unexpected _start_squote_sequence length $toklen" + } + } + } + literal - literalpart { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } + } + } + } + keyval-value-expected - value-expected { + #end whitespace token and reprocess + incr i -1 + return 1 + + #if {$multi_dquote eq "\"\""} { + # set_token_waiting type startmultiquote value "\"\"\"" complete 1 + # set multi_dquote "" + # return 1 + #} else { + # #end whitespace token and reprocess + # incr i -1 + # return 1 + #} + } + default { + set_token_waiting type startquote value "\"" complete 1 startindex $cindex + return 1 + } + } + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" + append tok $c + } else { + set_token_waiting type endquote value "\"" complete 1 startindex $cindex + return 1 + } + } + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "tomlish got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + keyval-value-expected - value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + itable-keyval-value-expected { + #JMN 2025 - review + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { + if {$multi_dquote eq "\"\""} { + tomlish::log::debug "- tokloop char dq ---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" + return 1 + #set_token_waiting type endmultiquote value "\"\"\"" complete 1 + #set multi_dquote "" + #return 1 + } else { + append multi_dquote "\"" + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok "\"" + } + table-space { + set_tokenType "startquote" + set tok $c + return 1 + } + itable-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + dottedkey-space { + set_tokenType dquote_seq_begin + set tok $c + } + default { + error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { + #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type equal value = complete 1 startindex $cindex + return 1 + } + } + barekey { + #set_token_waiting type equal value = complete 1 + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } + default { + error "tomlish unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= + } + multiliteral-space { + set_tokenType "literalpart" + set tok "=" + } + dottedkey-space { + set_tokenType "equal" + set tok "=" + return 1 + } + default { + set_tokenType "equal" + set tok = + return 1 + } + } + } + } + cr { + #REVIEW! + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + append tok $c + } + literalpart { + #part of MLL string (multi-line literal string) + #we need to split out crlf as a separate NEWLINE to be consistent + ::tomlish::log::warn "literalpart ended by cr - needs testing" + #return literalpart temporarily - allow cr to be reprocessed from multiliteral-space + incr i -1 + return 1 + } + stringpart { + #part of MLB string (multi-line basic string) + #jmn2025 - review + #append tok $dquotes$c + incr i -1 + return 1 + } + starttablename - starttablearrayname { + error "tomlish Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType "newline" + set tok cr + } + } + lf { + # \n newline + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal { + #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' + #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + literalpart { + #we allow newlines - but store them within the multiliteral as their own element + #This is a legitimate end to the literalpart - but not the whole multiliteral + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + newline { + #review + #this lf is the trailing part of a crlf + append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] + incr i -1 + return 1 + } else { + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } + starttablename - tablename - tablearrayname - starttablearrayname { + error "tomlish Character is invalid in $tokenType. [tomlish::parse::report_line]" + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "newline" + set tok "lf" + return 1 + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } + } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + #stringpart can have up to 2 quotes too + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + set_token_waiting type comma value "," complete 1 startindex $cindex + return 1 + } + } + default { + set_token_waiting type comma value "," complete 1 startindex $cindex + if {$had_slash} {append tok "\\"} + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," + } + multiliteral-space { + #assert had_slash 0, multi_dquote "" + set_tokenType "literalpart" + set tok "," + } + default { + set_tokenType "comma" + set tok "," + return 1 + } + } + } + } + . { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + set had_slash $slash_active + set slash_active 0 + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped_value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + literal - literalpart - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + whitespace { + switch -exact -- $state { + multistring-space { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } + dottedkey-space { + incr i -1 + return 1 + } + default { + error "tomlish Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "tomlish Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #e.g x.y = 1 + #we need to transition the barekey to become a structured table name ??? review + #x is the tablename y is the key + set_token_waiting type dotsep value "." complete 1 startindex $cindex + return 1 + } + default { + error "tomlish Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #set_token_waiting type period value . complete 1 + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." + } + multiliteral-space { + set_tokenType "literalpart" + set tok "." + } + dottedkey-space { + ### + set_tokenType "dotsep" + set tok "." + return 1 + } + default { + set_tokenType "untyped_value" + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[tcl::string::length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #todo had_slash - emit token or error + #whitespace is a terminator for bare keys + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $dquotes$c + } + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart - squotedkey - itablesquotedkey { + append tok $c + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok "" + if {$had_slash} {append tok "\\"} + append tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "whitespace" + append tok $c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + if {$had_slash} { + error "tomlish unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set_token_waiting type whitespace value $c complete 1 + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #set_token_waiting type whitespace value $c complete 1 + incr i -1 + return 1 + } + quotedkey - itablequotedkey - squotedkey - itablesquotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + #for stringpart we store WS separately for ease of processing continuations (CONT stripping) + if {$had_slash} { + #REVIEW + #emit the stringpart - go back to the slash + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + append tok $dquotes + incr i -1 + return 1 + } + } + literal - literalpart { + append tok $c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "tomlish Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename-state { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + if {[tcl::string::length $tokenType]} { + switch -exact -- $tokenType { + _start_squote_sequence { + #assert - tok will be one or two squotes only + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + literal - literalpart { + append tok $c + } + default { + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex + return 1 + } + } + } else { + switch -exact -- $state { + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[tcl::string::length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + newline { + #incomplete newline + set_tokenType "cr" + incr i -1 + return 1 + } + squote_seq { + incr i -1 + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + _start_squote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + table-space - itable-space { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + multiliteral-space { + set_tokenType "literalpart" + set tok $c + } + tablename-state { + set_tokenType "tablename" + set tok $c + } + tablearrayname-state { + set_tokenType "tablearrayname" + set tok $c + } + dottedkey-space { + set_tokenType barekey + set tok $c + } + default { + tomlish::log::debug "- tokloop char '$c' setting to untyped_value while state:$state" + set_tokenType "untyped_value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[tcl::string::length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err-state"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + switch -exact -- $tokenType { + startquotesequence { + set toklen [tcl::string::length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #set_token_waiting type string value "" complete 1 + return 1 + } + } + _start_squote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open literal + error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + set_tokenType "literal" + set tok "" + return 1 + } + } + } + } + set_token_waiting type eof value eof complete 1 startindex $i ;#review + return 1 + } else { + ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +namespace eval tomlish::dict { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + + proc is_tomltype {d} { + expr {[dict size $d] == 2 && [dict exists $d type] && [dict exists $d value]} + } + proc is_tomltype2 {d} { + upvar ::tomlish::tags tags + expr {[lindex $d 0] eq "type" && [lindex $d 1] in $tags} + } + proc last_tomltype_posn {d} { + set last_simple -1 + set dictposn [expr {[dict size $d] -1}] + foreach k [lreverse [dict keys $d]] { + set dval [dict get $d $k] + if {[is_tomltype $dval]} { + set last_simple $dictposn + break + } + incr dictposn -1 + } + return $last_simple + } + + + #review + proc name_from_tablestack {tablestack} { + set name "" + foreach tinfo [lrange $tablestack 1 end] { + lassign $tinfo type namepart + switch -- $type { + T { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + I { + if {$name eq ""} { + append name $namepart + } else { + append name .$namepart + } + } + default { + #end at first break in the leading sequence of T & I tablenames + break + } + } + } + return $name + } + +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[tcl::string::tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.2 +}] +return + +#*** !doctools +#[manpage_end] +