You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8408 lines
407 KiB
8408 lines
407 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2024 |
|
# |
|
# @@ Meta Begin |
|
# Application tomlish 1.1.6 |
|
# Meta platform tcl |
|
# Meta license <unspecified> |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin tomlish_module_tomlish 0 1.1.6] |
|
#[copyright "2024"] |
|
#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] |
|
#[require tomlish] |
|
#[keywords module parsing toml configuration] |
|
#[description] |
|
#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) |
|
#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml |
|
#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, |
|
#[para] although these other formats are generally unlikely to retain whitespace or comments |
|
#[para] The other formats also won't preserve roundtripability e.g \t and a literal tab coming from a toml file will be indistinguishable. |
|
#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. |
|
#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions |
|
#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key |
|
#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) |
|
#[para] will need a -type option (-force ?) to force overriding with another type such as an int. |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of tomlish |
|
#[subsection Concepts] |
|
#[para] - |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by tomlish |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
package require struct::stack |
|
package require logger |
|
|
|
#*** !doctools |
|
#[item] [package {Tcl 8.6-}] |
|
#[item] [package {struct::stack}] |
|
|
|
#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') |
|
|
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval tomlish { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
variable types |
|
|
|
#*** !doctools |
|
#[subsection {Namespace tomlish}] |
|
#[para] Core API functions for tomlish |
|
#[list_begin definitions] |
|
|
|
#default interp recursionlimit of 1000 is insufficient to pass 1000 deep nested structures as in certain toml tests. |
|
#e.g https://github.com/iarna/toml-spec-tests/tree/latest/values |
|
#1000 seems deep for a 'configuration' format - but toml sometimes used for other serialisation purposes. |
|
#todo - review |
|
set existing_recursionlimit [interp recursionlimit {}] |
|
if {$existing_recursionlimit < 5000} { |
|
interp recursionlimit {} 5000 |
|
} |
|
|
|
#IDEAS: |
|
# since get_toml produces tomlish with whitespace/comments intact: |
|
# tomldoc object - allow (at least basic?) editing of toml whilst preserving comments & whitespace |
|
# - setKey (set leaf only to value) how to specify type? -type option? - whole array vs index into arrays and further nested objects? - option for raw toml additions? |
|
# - separate addKey?? |
|
# - deleteKey (delete leaf) |
|
# - deleteTable (delete table - if only has leaves? - option to delete with child tables?) |
|
# - set/add Table? - position in doc based on existing tables/subtables? |
|
|
|
#The tomlish intermediate representation allows things such as sorting the toml document by table name or other re-ordering of tables - |
|
# because the tables include subkeys, comments and newlines within their structure - those elements all come along with it nicely during reordering. |
|
#The same goes for the first newline following a keyval e.g x=1 \ny=2\n\n |
|
#The newline is part of the keyval structure so makes reordering easier |
|
#example from_toml "a=1\nb=2\n\n\n" |
|
# 0 = TOMLISH |
|
# 1 = KEY a = {INT 1} {NEWLINE lf} |
|
# 2 = NEWLINE lf |
|
# 3 = KEY b = {INT 2} {NEWLINE lf} |
|
# 4 = NEWLINE lf |
|
# 5 = NEWLINE lf |
|
|
|
#This reordering idea is complicated by the nature of tablearrays - especially as a table header references last tablearrayname, |
|
# and duplicate table headers are allowed in that context. |
|
#e.g |
|
#[[fruits]] |
|
# name="apple" |
|
# [fruits.metadata] |
|
# id=1 |
|
# |
|
#[unrelated1] |
|
# |
|
#[[fruits]] |
|
# name="pear" |
|
# |
|
#[unrelated2] |
|
# silly="ordering" |
|
# |
|
#[fruits.metadata] |
|
#id=2 |
|
#The TABLEARRAY record can't be completely selfcontained on the default parsing mechanism - because it is legal (though not recommended) to have unrelated tables in between. |
|
#If we were to 'insert' later related records (such as the 2nd [fruits.metadata] above) into the TABLEARRAY structure - then, even though it might produce 'nicer' toml, |
|
# we would lose roundtripability toml->tomlish->toml |
|
# ----------------------------------------------------- |
|
#REVIEW |
|
#todo - some sort of 'normalize'/'grouping' function on tomlish that at least makes records self-contained, and perhaps then (optionally) reorders resulting records sensibly. |
|
#such a function on the tomlish may work - although it would be unwise to duplicate the validation aspects of dict::from_tomlish |
|
#The most practical way might be to use dict::from_tomlish followed by from_dict - but that would lose comment info and formatting. |
|
#In the above example - The decision by the toml author to put [unrelated1] between related tablearrays should be respected, |
|
#but the positioning of [unrelated2] between a tablearray and one of its contained tables is suspect. |
|
#Both [fruits.metadata] table records should theoretically be added as children to their corresponding [[fruits]] tablearray record in the tomlish. (just as their name keys are) |
|
# ----------------------------------------------------- |
|
|
|
|
|
|
|
#ARRAY is analogous to a Tcl list |
|
#TABLE is analogous to a Tcl dict |
|
#WS = inline whitespace |
|
#KEY = bare key and value |
|
#DQKEY = double quoted key and value |
|
#SQKEY = single quoted key and value |
|
#ITABLE = inline table (*can* be anonymous table) |
|
# inline table values immediately create a table with the opening brace |
|
# inline tables are fully defined between their braces, as are dotted-key subtables defined within |
|
# No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained |
|
|
|
set tags [list TOMLISH BOM ARRAY TABLE ITABLE TABLEARRAY WS NEWLINE COMMENT DOTTEDKEY KEY DQKEY SQKEY STRING STRINGPART MULTISTRING LITERAL LITERALPART MULTILITERAL INT FLOAT BOOL] |
|
#DDDD |
|
lappend tags {*}[list\ |
|
DATETIME\ |
|
DATETIME-LOCAL\ |
|
DATE-LOCAL\ |
|
TIME-LOCAL\ |
|
] |
|
|
|
#removed - ANONTABLE |
|
#tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) |
|
#we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) |
|
#todo - configurable - allow empty string for 'unlimited' |
|
set min_int -9223372036854775808 ;#-2^63 |
|
set max_int +9223372036854775807 ;#2^63-1 |
|
|
|
proc Dolog {lvl txt} { |
|
#return "$lvl -- $txt" |
|
set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" |
|
puts stderr $msg |
|
} |
|
logger::initNamespace ::tomlish |
|
foreach lvl [logger::levels] { |
|
interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl |
|
log::logproc $lvl tomlish_log_$lvl |
|
} |
|
|
|
|
|
proc tags {} { |
|
return $::tomlish::tags |
|
} |
|
|
|
proc get_dottedkey_info {dottedkeyrecord} { |
|
set key_hierarchy [list] |
|
set key_hierarchy_raw [list] |
|
if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { |
|
error "tomlish::get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" |
|
} |
|
set compoundkeylist [lindex $dottedkeyrecord 1] |
|
set expect_sep 0 |
|
foreach part $compoundkeylist { |
|
set parttag [lindex $part 0] |
|
if {$parttag eq "WS"} { |
|
continue |
|
} |
|
if {$expect_sep} { |
|
if {$parttag ne "DOTSEP"} { |
|
error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" |
|
} |
|
set expect_sep 0 |
|
} else { |
|
set val [lindex $part 1] |
|
switch -exact -- $parttag { |
|
KEY { |
|
lappend key_hierarchy $val |
|
lappend key_hierarchy_raw $val |
|
} |
|
DQKEY { |
|
#REVIEW unescape or not? |
|
#JJJJ |
|
lappend key_hierarchy [::tomlish::utils::unescape_string $val] |
|
lappend key_hierarchy_raw \"$val\" |
|
} |
|
SQKEY { |
|
lappend key_hierarchy $val |
|
lappend key_hierarchy_raw "'$val'" |
|
} |
|
default { |
|
error "tomlish::get_dottedkey_info DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$compoundkeylist'" |
|
} |
|
} |
|
set expect_sep 1 |
|
} |
|
} |
|
return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] |
|
} |
|
|
|
#helper function for tomlish::dict::from_tomlish |
|
proc _get_keyval_value {keyval_element} { |
|
#e.g |
|
#DOTTEDKEY {{KEY a} {WS { }}} = {WS { }} {ARRAY {INT 1} SEP {ITABLE {DOTTEDKEY {{KEY x}} = {INT 1} SEP} {DOTTEDKEY {{KEY y}} = {INT 2}}}} |
|
|
|
log::notice ">>> _get_keyval_value from '$keyval_element'<<<" |
|
#find the value (or 2 values if space separated datetime - and stitch back into one) |
|
# 3 is the earliest index at which the value could occur (depending on whitespace) |
|
if {[lindex $keyval_element 2] ne "="} { |
|
error "tomlish _get_keyval_value keyval_element doesn't seem to be a properly structured {<keytype> <keydata> = <valuedata>} list\n $keyval_element" |
|
} |
|
|
|
#review |
|
if {[uplevel 1 [list info exists tablenames_info]]} { |
|
upvar tablenames_info tablenames_info |
|
} else { |
|
set tablenames_info [dict create] ;#keys are @@ paths {@@parenttable @@arrayable <index> @@etc} corresponding to parenttable.arraytable[<idx>].etc |
|
#value is a dict with keys such as ttype, tdefined |
|
} |
|
set sublist [lrange $keyval_element 3 end] ;# rhs of = |
|
|
|
set values [list] |
|
set value_posns [list] |
|
set posn 0 |
|
foreach sub $sublist { |
|
#note that a barekey/dquotedkey won't occur directly inside a barekey/dquotedkey |
|
#DDDD |
|
switch -exact -- [lindex $sub 0] { |
|
STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - TABLE - ARRAY - ITABLE { |
|
lappend values $sub |
|
lappend value_posns $posn |
|
} |
|
DOTTEDKEY { |
|
#we should never see DOTTEDKEY as a toplevel element on RHS |
|
#sanity check in case manually manipulated tomlish - or something went very wrong |
|
set msg "tomlish::_get_keyval_value Unexpected toplevel value element DOTTEDKEY after =" |
|
return -code error -errorcode {TOMLISH SYNTAX UNEXPECTEDDOTTEDKEYRHS} $msg |
|
} |
|
WS - NEWLINE - COMMENT {} |
|
SEP {} |
|
default { |
|
set msg "tomlish::_get_keyval_value Unexpected toplevel value element [lindex $sub 0] after =" |
|
return -code error -errorcode {TOMLISH SYNTAX UNEXPECTED} $msg |
|
} |
|
} |
|
incr posn |
|
} |
|
switch -- [llength $values] { |
|
0 { |
|
error "tomlish Failed to find value element in KEY. '$keyval_element'" |
|
} |
|
1 { |
|
lassign [lindex $values 0] type value |
|
} |
|
2 { |
|
#we generally expect a single 'value' item on RHS of = |
|
#(ignoring WS,NEWLINE,SEP |
|
#(either a simple type, or a container which has multiple values inside) |
|
#exception for space separated datetime which is two toplevel values |
|
|
|
#validate than exactly single space was between the two values |
|
lassign $value_posns p1 p2 |
|
if {$p2 != $p1 +2} { |
|
#sanity check |
|
#can probably only get here through manual manipulation of the tomlish list to an unprocessable form |
|
error "tomlish KEY appears to have more than one part - but not separated by whitespace - invalid '$keyval_element'" |
|
} |
|
set between_token [lindex $sublist $p1+1] |
|
if {[lindex $between_token 1] ne " "} { |
|
error "tomlish KEY in 2 parts is not separated by a single space - cannot consider for datetime '$keyval_element'" |
|
} |
|
lassign [lindex $values 0] type_d1 value_d1 |
|
lassign [lindex $values 1] type_d2 value_d2 |
|
#DDDD |
|
if {$type_d1 ne "DATE-LOCAL" || $type_d2 ni {DATETIME TIME-LOCAL}} { |
|
#we reuse DATETIME tag for standalone time with tz offset (or zZ) |
|
error "tomlish KEY in 2 parts does not appear to be datetime '$keyval_element'" |
|
} |
|
if {$type_d2 eq "TIME-LOCAL"} { |
|
set type DATETIME-LOCAL |
|
} else { |
|
#extra check that 2nd part is actually a time |
|
if {![tomlish::utils::is_timepart $value_d2]} { |
|
error "tomlish KEY in 2 parts does not appear to be datetime. (part 2 not a time value) '$keyval_element'" |
|
} |
|
set type DATETIME |
|
} |
|
set value "${value_d1}T${value_d2}" |
|
} |
|
default { |
|
error "tomlish Found multiple value elements in KEY, expected one. (or 2 for space-separated datetime) '$keyval_element'" |
|
} |
|
} |
|
set sub_tablenames_info [dict create] |
|
switch -exact -- $type { |
|
INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL { |
|
#DDDD |
|
#simple (non-container, no-substitution) datatype |
|
set result [list type $type value $value] |
|
} |
|
STRING - STRINGPART { |
|
#JJJ |
|
#!!! review |
|
#set result [list type $type value [::tomlish::utils::unescape_string $value]] |
|
set result [list type $type value $value] |
|
} |
|
LITERAL - LITERALPART { |
|
#REVIEW |
|
set result [list type $type value $value] |
|
} |
|
TABLE { |
|
#invalid? |
|
error "tomlish _get_keyval_value invalid to have type TABLE on rhs of =" |
|
} |
|
ITABLE { |
|
#This one should not be returned as a type <tag> value <something> structure! |
|
# |
|
set prev_tablenames_info $tablenames_info |
|
set tablenames_info [dict create] |
|
set result [::tomlish::dict::from_tomlish [ list [lindex $values 0] ]] |
|
set sub_tablenames_info $tablenames_info |
|
set tablenames_info $prev_tablenames_info |
|
} |
|
ARRAY { |
|
#we need to recurse to get the corresponding dict for the contained item(s) |
|
#pass in the whole [lindex $values 0] (type val) - not just the $value! |
|
set prev_tablenames_info $tablenames_info |
|
set tablenames_info [dict create] |
|
set result [list type $type value [ ::tomlish::dict::from_tomlish [ list [lindex $values 0] ] ]] |
|
set sub_tablenames_info $tablenames_info |
|
set tablenames_info $prev_tablenames_info |
|
} |
|
MULTISTRING - MULTILITERAL { |
|
#review - mapping these to STRING might make some conversions harder? |
|
#if we keep the MULTI<type> - we know we have to look for newlines for example when converting to json |
|
#without specific types we'd have to check every STRING - and lose info about how best to map chars within it |
|
set result [list type $type value [ ::tomlish::dict::from_tomlish [ list [lindex $values 0] ] ]] |
|
} |
|
default { |
|
error "tomlish Unexpected value type '$type' found in keyval '$keyval_element'" |
|
} |
|
} |
|
return [dict create result $result tablenames_info $sub_tablenames_info] |
|
} |
|
|
|
|
|
proc to_dict {tomlish} { |
|
tomlish::dict::from_tomlish $tomlish |
|
} |
|
|
|
|
|
|
|
proc _from_dictval_tomltype {parents tablestack keys typeval} { |
|
set type [dict get $typeval type] |
|
set val [dict get $typeval value] |
|
#These are the restricted sets of typed used in the tomlish::dict representation |
|
#They are a subset of the types in tomlish: data types plus ARRAY, arranged in a dictionary form. |
|
#The container types: ITABLE, TABLE, TABLEARRAY are not used as they are represented as dictionary keys and ARRAY items. |
|
#The WS, COMMENT, and NEWLINE elements are also unrepresented in the dict structure. |
|
switch -- $type { |
|
ARRAY { |
|
set subitems [list] |
|
foreach item $val { |
|
lappend subitems [_from_dictval [list {*}$parents ARRAY] $tablestack $keys $item] SEP |
|
} |
|
if {[lindex $subitems end] eq "SEP"} { |
|
set subitems [lrange $subitems 0 end-1] |
|
} |
|
return [list ARRAY {*}$subitems] |
|
} |
|
ITABLE { |
|
error "not applicable" |
|
if {$val eq ""} { |
|
return ITABLE |
|
} else { |
|
return [_from_dictval [list {*}$parents ITABLE] $tablestack $keys $val] |
|
} |
|
} |
|
STRING { |
|
#JSJS |
|
#if our dict came from json - we have already decided what type of STRING/LITERAL etc to use when building the dict |
|
|
|
#do not validate like this - important that eg json val\\ue -> dict val\ue -> tomlish/toml val\\ue |
|
#see toml-tests |
|
#if {![tomlish::utils::rawstring_is_valid_tomlstring $val]} { |
|
# #todo? |
|
# return -code error -errorcode {TOML SYNTAX INVALIDSTRING} "Unescaped controls in string or invalid escapes" |
|
#} |
|
return [list STRING [tomlish::utils::rawstring_to_Bstring_with_escaped_controls $val]] |
|
} |
|
MULTISTRING { |
|
#value is a raw string that isn't encoded as tomlish |
|
#create a valid toml snippet with the raw value and decode it to the proper tomlish MULTISTRING format |
|
#We need to convert controls in $val to escape sequences - except for newlines |
|
# |
|
#consider an *option* to reformat for long lines? (perhaps overcomplex - byte equiv - but may fold in ugly places) |
|
#we could use a line-length limit to decide when to put in a "line ending backslash" |
|
#and even format it with a reasonable indent so that proper CONT and WS entries are made (?) REVIEW |
|
# |
|
#TODO |
|
set tomlpart "x=\"\"\"\\\n" |
|
append tomlpart [tomlish::utils::rawstring_to_MultiBstring_with_escaped_controls $val] |
|
append tomlpart "\"\"\"" |
|
set tomlish [tomlish::from_toml $tomlpart] |
|
#e.g if val = " etc\nblah" |
|
#TOMLISH {DOTTEDKEY {{KEY x}} = {MULTISTRING CONT {NEWLINE LF} {WS { }} {STRINGPART etc} {NEWLINE lf} {STRINGPART blah} } } |
|
#lindex 1 3 is the MULTISTRING tomlish list |
|
return [lindex $tomlish 1 3] |
|
} |
|
MULTILITERAL { |
|
#MLL string can contain newlines - but still no control chars |
|
#todo - validate - e.g val can't contain more than 2 squotes in a row |
|
if {[string first ''' $val] >=0} { |
|
set msg "_from_dictval_tomltype error: more than 2 single quotes in a row found in MULTILITERAL - cannot encode dict to TOML-VALID TOMLISH" |
|
return -code error -errorcode {TOML SYNTAX INVALIDLITERAL} $msg |
|
} |
|
|
|
#rawstring_is_valid_multiliteral - allow newlines as lf or crlf - but not bare cr |
|
if {![tomlish::utils::rawstring_is_valid_multiliteral $val]} { |
|
return -code error -errorcode {TOML SYNTAX INVALIDMULTILITERAL} "Controls other than tab or newlines found in multiliteral" |
|
} |
|
|
|
set tomlpart "x='''\n" |
|
append tomlpart $val ''' |
|
set tomlish [tomlish::from_toml $tomlpart] |
|
return [lindex $tomlish 1 3] |
|
} |
|
LITERAL { |
|
#from v1.0 spec - "Control characters other than tab are not permitted in a literal string" |
|
#(This rules out raw ANSI SGR - which is somewhat restrictive - but perhaps justified for a config format |
|
# as copy-pasting ansi to a config value is probably not always wise, and it's not something that can be |
|
# easily input via a text editor. ANSI can go in Basic strings using the \e escape if that's accepted v1.1?) |
|
#we could choose to change the type to another format here when encountering invalid chars - but that seems |
|
#like too much magic. We elect to error out and require the dict to have valid data for the types it specifies. |
|
if {[string first ' $val] >=0} { |
|
set msg "_from_dictval_tomltype error: single quote found in LITERAL - cannot encode dict to TOML-VALID TOMLISH" |
|
return -code error -errorcode {TOML SYNTAX INVALIDLITERAL} $msg |
|
} |
|
#JJJJ |
|
if {![tomlish::utils::rawstring_is_valid_literal $val]} { |
|
#has controls other than tab |
|
#todo - squote? |
|
return -code error -errorcode {TOML SYNTAX INVALIDLITERAL} "Controls other than tab found in literal" |
|
} |
|
return [list LITERAL $val] |
|
} |
|
INT { |
|
if {![::tomlish::utils::is_int $val]} { |
|
error "_from_dictval_tomltype error: bad INT value '$val' - cannot encode dict to TOML-VALID TOMLISH" |
|
} |
|
return [list INT $val] |
|
} |
|
FLOAT { |
|
if {![::tomlish::utils::is_float $val]} { |
|
error "_from_dictval_tomltype error: bad FLOAT value '$val' - cannot encode dict to TOML-VALID TOMLISH" |
|
} |
|
return [list FLOAT $val] |
|
} |
|
default { |
|
if {$type ni [::tomlish::tags]} { |
|
error "_from_dictval_tomltype error: Unrecognised typename '$type' in {type <typename> value <value>} - cannot encode dict to TOML-VALID TOMLISH" |
|
} |
|
return [list $type $val] |
|
} |
|
} |
|
} |
|
|
|
proc _from_dictval {parents tablestack keys vinfo} { |
|
set k [lindex $keys end] |
|
set K_PART [tomlish::dict::classify_rawkey $k] ;#get [list SQKEY <val] [list DQKEY <val] or [list KEY <val>] |
|
#puts stderr "---parents:'$parents' keys:'$keys' vinfo: $vinfo---" |
|
#puts stderr "---tablestack: $tablestack---" |
|
set result [list] |
|
set lastparent [lindex $parents end] |
|
if {$lastparent in [list "" do_inline]} { |
|
if {[tomlish::dict::is_typeval $vinfo]} { |
|
set type [dict get $vinfo type] |
|
#treat ITABLE differently? |
|
set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] |
|
lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} |
|
} else { |
|
if {$vinfo ne ""} { |
|
|
|
#set result [list DOTTEDKEY [list [list KEY $k]] = ] |
|
#set records [list ITABLE] |
|
|
|
set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] |
|
|
|
if {$lastparent eq "do_inline"} { |
|
set result [list DOTTEDKEY [list $K_PART] =] |
|
set records [list ITABLE] |
|
} else { |
|
set tname [tomlish::dict::join_and_quote_rawkey_list [list $k]] |
|
set result [list TABLE $tname {NEWLINE lf}] |
|
set tablestack [list {*}$tablestack [list T $k]] |
|
set records [list] |
|
} |
|
|
|
|
|
|
|
set lastidx [expr {[dict size $vinfo] -1}] |
|
set dictidx 0 |
|
dict for {vk vv} $vinfo { |
|
set VK_PART [tomlish::dict::classify_rawkey $vk] ;#get [list SQKEY <val] [list DQKEY <val] or [list KEY <val>] |
|
#(SQKEY & DQKEY do not have the enclosing quotes in their returned val) |
|
#if {[regexp {\s} $vk] || [string first . $vk] >= 0} { |
|
# set VK_PART [list SQKEY $vk] |
|
#} else { |
|
# set VK_PART [list KEY $vk] |
|
#} |
|
if {[tomlish::dict::is_typeval $vv]} { |
|
#type x value y |
|
#REVIEW - we could detect if value is an array of objects, |
|
#and depending on parent context - emit a series of TABLEARRAY records instead of a DOTTEDKEY record containing an ARRAY of objects |
|
set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] |
|
set record [list DOTTEDKEY [list $VK_PART {WS { }}] = {WS { }} $sublist] |
|
} else { |
|
if {$vv eq ""} { |
|
#experimental |
|
if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { |
|
::tomlish::log::notice "_from_dictval could uninline KEY $vk (tablestack:$tablestack)" |
|
#set tname [tomlish::dict::name_from_tablestack [list {*}$tablestack [list T $vk]]] |
|
|
|
#we can't just join normalized keys - need keys with appropriate quotes and escapes |
|
#set tname [join [list {*}$keys $vk] .] ;#WRONG |
|
set tq [tomlish::dict::join_and_quote_rawkey_list [list {*}$keys $vk]] |
|
|
|
|
|
##wrong? results in TABLE within TABLE record?? todo pop? |
|
#set record [list TABLE $tq {NEWLINE lf}] |
|
#set tablestack [list {*}$tablestack [list T $vk]] |
|
|
|
#REVIEW!!! |
|
|
|
set record [list DOTTEDKEY [list $VK_PART] = ITABLE] |
|
set tablestack [list {*}$tablestack [list I $vk]] |
|
|
|
} else { |
|
set record [list DOTTEDKEY [list $VK_PART] = ITABLE] |
|
set tablestack [list {*}$tablestack [list I $vk]] |
|
} |
|
} else { |
|
if { 0 } { |
|
#experiment.. sort of getting there. |
|
if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { |
|
::tomlish::log::notice "_from_dictval could uninline2 KEYS [list {*}$keys $vk] (tablestack:$tablestack)" |
|
set tq [tomlish::dict::join_and_quote_rawkey_list [list {*}$keys $vk]] |
|
set record [list TABLE $tq {NEWLINE lf}] |
|
set tablestack [list {*}$tablestack [list T $vk]] |
|
|
|
#review - todo? |
|
set dottedkey_value [_from_dictval [list {*}$parents TABLE] $tablestack [list {*}$keys $vk] $vv] |
|
lappend record {*}$dottedkey_value |
|
|
|
} else { |
|
set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] |
|
set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] |
|
} |
|
} else { |
|
set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] |
|
set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] |
|
} |
|
} |
|
} |
|
if {$dictidx != $lastidx} { |
|
#lappend record SEP |
|
if {$lastparent eq "do_inline"} { |
|
lappend record SEP |
|
} else { |
|
lappend record {NEWLINE lf} |
|
} |
|
} |
|
if {[llength $record]} { |
|
lappend records $record |
|
} |
|
incr dictidx |
|
} |
|
if {$lastparent eq "do_inline"} { |
|
lappend result $records {NEWLINE lf} |
|
} else { |
|
lappend result {*}$records {NEWLINE lf} |
|
} |
|
} else { |
|
if {$lastparent eq "do_inline"} { |
|
lappend result DOTTEDKEY [list $K_PART] = ITABLE {NEWLINE lf} |
|
} else { |
|
set tname [tomlish::dict::join_and_quote_rawkey_list [list $k]] |
|
#REVIEW |
|
lappend result TABLE $tname {NEWLINE lf} |
|
} |
|
} |
|
} |
|
} else { |
|
#lastparent is not toplevel "" or "do_inline" |
|
if {[tomlish::dict::is_typeval $vinfo]} { |
|
#type x value y |
|
set sublist [_from_dictval_tomltype $parents $tablestack $keys $vinfo] |
|
lappend result {*}$sublist |
|
} else { |
|
if {$lastparent eq "TABLE"} { |
|
#review |
|
dict for {vk vv} $vinfo { |
|
set VK_PART [tomlish::dict::classify_rawkey $vk] ;#get [list SQKEY <val] [list DQKEY <val] or [list KEY <val>] |
|
set dottedkey_value [_from_dictval [list {*}$parents DOTTEDKEY] $tablestack [list {*}$keys $vk] $vv] |
|
lappend result [list DOTTEDKEY [list $VK_PART] = $dottedkey_value {NEWLINE lf}] |
|
} |
|
} else { |
|
if {$vinfo ne ""} { |
|
set lastidx [expr {[dict size $vinfo] -1}] |
|
set dictidx 0 |
|
set sub [list] |
|
#REVIEW |
|
#set result $lastparent ;#e.g sets ITABLE |
|
set result ITABLE |
|
set last_tomltype_posn [tomlish::dict::last_tomltype_posn $vinfo] |
|
dict for {vk vv} $vinfo { |
|
set VK_PART [tomlish::dict::classify_rawkey $vk] ;#get [list SQKEY <val] [list DQKEY <val] or [list KEY <val>] |
|
if {[tomlish::dict::is_typeval $vv]} { |
|
#type x value y |
|
set sublist [_from_dictval_tomltype $parents $tablestack $keys $vv] |
|
set record [list DOTTEDKEY [list $VK_PART] = $sublist] |
|
} else { |
|
if {$vv eq ""} { |
|
#can't just uninline at this level |
|
#we need a better method to query main dict for uninlinability at each level |
|
# (including what's been inlined already) |
|
#if {[lindex $parents 0] eq "" && $dictidx > $last_tomltype_posn} { |
|
# puts stderr "_from_dictval uninline2 KEY $keys" |
|
# set tname [tomlish::dict::join_and_quote_rawkey_list [list {*}$keys $vk]] |
|
# set record [list TABLE $tname {NEWLINE lf}] |
|
# set tablestack [list {*}$tablestack [list T $vk]] |
|
#} else { |
|
set record [list DOTTEDKEY [list $VK_PART] = ITABLE] |
|
#} |
|
} else { |
|
#set sub [_from_dictval ITABLE $vk $vv] |
|
set dottedkey_value [_from_dictval [list {*}$parents ITABLE] $tablestack [list {*}$keys $vk] $vv] |
|
#set record [list DOTTEDKEY [list $VK_PART] = ITABLE $dottedkey_value] |
|
set record [list DOTTEDKEY [list $VK_PART] = $dottedkey_value] |
|
} |
|
} |
|
if {$dictidx != $lastidx} { |
|
lappend record SEP |
|
} |
|
lappend result $record |
|
incr dictidx |
|
} |
|
} else { |
|
#e.g x=[{}] |
|
log::debug "---> _from_dictval empty ITABLE x-1" |
|
#lappend result DOTTEDKEY [list $K_PART] = ITABLE ;#wrong |
|
lappend result ITABLE |
|
} |
|
} |
|
} |
|
} |
|
return $result |
|
} |
|
|
|
|
|
proc from_dict {d} { |
|
#consider: |
|
# t1={a=1,b=2} |
|
# x = 1 |
|
|
|
# from_dict gives us: t1 {a {type INT value 1} b {type INT value 2}} x {type INT value 1} |
|
|
|
#If we represent t1 as an expanded table we get |
|
# [t1] |
|
# a=1 |
|
# b=2 |
|
# x=1 |
|
# --- which is incorrect - as x was a toplevel key like t1! |
|
#This issue doesn't occur if x is itself an inline table |
|
# t1={a=1,b=2} |
|
# x= {no="problem"} |
|
# |
|
# (or if we were to reorder x to come before t1) |
|
|
|
#ie the order of the dict elements influences how the toml can be represented. |
|
|
|
#As the dictionary form doesn't distinguish the structure used to create tables {[table1]\nk=v} vs inline {table1={k=v}} |
|
#Without a solution, from_dict would have to always produce the inline form for toplevel tables unless we allowed re-ordering, |
|
#which is unpreferred here. |
|
|
|
#A possible solution: |
|
#scan the top level to see if all (trailing) elements are themselves dicts |
|
# (ie not of form {type XXX value yyy}) |
|
# |
|
# A further point is that if all root level values are at the 'top' - we can treat lower table-like structures as {[table]} elements |
|
#ie we don't need to force do_inline if all the 'simple' keys are before any compound keys |
|
|
|
#set root_has_values 0 |
|
#approach 1) - the naive approach - forces inline when not always necessary |
|
#dict for {k v} $d { |
|
# if {[llength $v] == 4 && [lindex $v 0] eq "type"} { |
|
# set root_has_values 1 |
|
# break |
|
# } |
|
#} |
|
|
|
|
|
#approach 2) - track the position of last {type x value y} in the dictionary built by dict::from_tomlish |
|
# - still not perfect. Inlines dotted tables unnecessarily |
|
#This means from_dict doesn't produce output optimal for human editing. |
|
set last_simple [tomlish::dict::last_tomltype_posn $d] |
|
|
|
|
|
## set parent "do_inline" ;#a value used in _from_dictval to distinguish from "" or other context based parent values |
|
#Any keys that are themselves tables - will need to be represented inline |
|
#to avoid reordering, or incorrect assignment of plain values to the wrong table. |
|
|
|
## set parent "" |
|
#all toplevel keys in the dict structure can represent subtables. |
|
#we are free to use {[tablename]\n} syntax for toplevel elements. |
|
|
|
|
|
set tomlish [list TOMLISH] |
|
set dictposn 0 |
|
set tablestack [list [list T root]] ;#todo |
|
dict for {t tinfo} $d { |
|
if {$last_simple > $dictposn} { |
|
set parents [list do_inline] |
|
} else { |
|
set parents [list ""] |
|
} |
|
set keys [list $t] |
|
#review - where to make decision on |
|
# DOTTEDKEY containing array of objs |
|
#vs |
|
# list of TABLEARRAY records |
|
#At least for the top |
|
set trecord [_from_dictval $parents $tablestack $keys $tinfo] |
|
lappend tomlish $trecord |
|
incr dictposn |
|
} |
|
return $tomlish |
|
} |
|
|
|
proc typedjson_to_toml {json} { |
|
#*** !doctools |
|
#[call [fun typedjson_to_toml] [arg json]] |
|
#[para] |
|
|
|
set tomlish [::tomlish::from_dict_from_typedjson $json] |
|
lappend tomlish [list NEWLINE lf] |
|
set toml [::tomlish::to_toml $tomlish] |
|
} |
|
|
|
set json1 {{ "a": {"type": "integer", "value": "42"}}} |
|
set json2 {{ |
|
"a": {"type": "integer", "value": "42"}, |
|
"b": {"type": "string", "value": "test"} |
|
}} |
|
set json3 { |
|
{ |
|
"best-day-ever": {"type": "datetime", "value": "1987-07-05T17:45:00Z"}, |
|
"numtheory": { |
|
"boring": {"type": "bool", "value": "false"}, |
|
"perfection": [ |
|
{"type": "integer", "value": "6"}, |
|
{"type": "integer", "value": "28"}, |
|
{"type": "integer", "value": "496"} |
|
] |
|
} |
|
} |
|
} |
|
|
|
set json4 { |
|
{ |
|
"best-day-ever": {"type": "datetime", "value": "1987-07-05T17:45:00Z"}, |
|
"numtheory": { |
|
"boring": {"type": "bool", "value": "false"}, |
|
"perfection": [ |
|
{"type": "integer", "value": "6"}, |
|
{"type": "integer", "value": "28"}, |
|
{"type": "integer", "value": "496"} |
|
] |
|
}, |
|
"emptyobj": {}, |
|
"emptyarray": [] |
|
} |
|
} |
|
|
|
set json5 { |
|
{ |
|
"a": { |
|
" x ": {}, |
|
"b.c": {}, |
|
"d.e": {}, |
|
"b": { |
|
"c": {} |
|
} |
|
} |
|
} |
|
} |
|
|
|
#surrogate pair face emoji |
|
set json6 { |
|
{ |
|
"surrogatepair": {"type": "string", "value": "\uD83D\uDE10"} |
|
} |
|
} |
|
|
|
|
|
set json7 { |
|
{ |
|
"escapes": {"type": "string", "value": "val\\ue"} |
|
} |
|
} |
|
|
|
|
|
proc from_dict_from_typedjson {json} { |
|
set d [tomlish::dict::from_typedjson $json] |
|
tomlish::from_dict $d ;#return tomlish |
|
} |
|
|
|
|
|
proc toml_to_typedjson {toml} { |
|
set tomlish [::tomlish::from_toml $toml] |
|
set d [tomlish::dict::from_tomlish $tomlish] |
|
#full validation only occurs by re-encoding dict to tomlish |
|
set test [tomlish::from_dict $d] |
|
|
|
set h [tomlish::typedhuddle::from_dict $d] |
|
#huddle jsondump $h |
|
tomlish::huddle::jsondumpraw $h |
|
} |
|
|
|
#proc get_json {tomlish} { |
|
# package require fish::json |
|
# set d [::tomlish::dict::from_tomlish $tomlish] |
|
|
|
# #return [::tomlish::dict_to_json $d] |
|
# return [fish::json::from "struct" $d] |
|
#} |
|
|
|
#return a Tcl list of tomlish tokens |
|
#i.e get a standard list of all the toml terms in string $s |
|
#where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. |
|
#(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) |
|
# ---------------------------------------------------------------------------------------------- |
|
# NOTE: the production of tomlish from toml source doesn't indicate the toml source was valid!!! |
|
# e.g we deliberately don't check certain things such as duplicate table declarations here. |
|
# ---------------------------------------------------------------------------------------------- |
|
#Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. |
|
# (e.g perhaps a toml editor to highlight violations for fixing) |
|
# A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. |
|
# e.g dicts or an object oriented structure |
|
#Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage |
|
#e.g dict::from_tomlish will substitute \r \n \uHHHH \UHHHHHHH etc |
|
#This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. |
|
# (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) |
|
#If we were to unescape a tab character for example |
|
# - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. |
|
# For this reason, we also do absolutely no line-ending transformations based on platform. |
|
# All line-endings are maintained as is, and even a file with mixed lf crlf line-endings will be correctly interpreted and can be 'roundtripped' |
|
|
|
proc from_toml {args} { |
|
|
|
namespace upvar ::tomlish::parse s s |
|
set s [join $args \n] |
|
namespace upvar ::tomlish::parse i i |
|
set i 0 ;#index into s |
|
|
|
namespace upvar ::tomlish::parse is_parsing is_parsing |
|
set is_parsing 1 |
|
|
|
if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { |
|
tomlish::parse::spacestack destroy |
|
} |
|
struct::stack ::tomlish::parse::spacestack |
|
|
|
namespace upvar ::tomlish::parse last_space_action last_space_action |
|
namespace upvar ::tomlish::parse last_space_type last_space_type |
|
|
|
namespace upvar ::tomlish::parse tok tok |
|
set tok "" |
|
|
|
namespace upvar ::tomlish::parse type type |
|
namespace upvar ::tomlish::parse tokenType tokenType |
|
::tomlish::parse::set_tokenType "" |
|
namespace upvar ::tomlish::parse tokenType_list tokenType_list |
|
set tokenType [list] ;#Flat (un-nested) list of tokentypes found |
|
|
|
namespace upvar ::tomlish::parse lastChar lastChar |
|
set lastChar "" |
|
|
|
|
|
set result "" |
|
namespace upvar ::tomlish::parse nest nest |
|
set nest 0 |
|
|
|
namespace upvar ::tomlish::parse v v ;#array keyed on nest level |
|
|
|
|
|
set v(0) {TOMLISH} |
|
array set s0 [list] ;#whitespace data to go in {SPACE {}} element. |
|
set parentlevel 0 |
|
|
|
|
|
namespace upvar ::tomlish::parse state state |
|
|
|
namespace upvar ::tomlish::parse braceCount braceCount |
|
set barceCount 0 |
|
namespace upvar ::tomlish::parse bracketCount bracketCount |
|
set bracketCount 0 |
|
|
|
set sep 0 |
|
set r 1 |
|
namespace upvar ::tomlish::parse token_waiting token_waiting |
|
set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. |
|
|
|
|
|
|
|
set state "table-space" |
|
::tomlish::parse::spacestack push {type space state table-space} |
|
namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) |
|
set linenum 1 |
|
|
|
set ::tomlish::parse::state_list [list] |
|
try { |
|
while {$r} { |
|
set r [::tomlish::parse::tok] |
|
#puts stdout "got tok: '$tok' while parsing string '$s' " |
|
set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' |
|
|
|
|
|
#puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" |
|
#puts "-->tok: $tok tokenType='$tokenType'" |
|
set prevstate $state |
|
set transition_info [::tomlish::parse::goNextState $tokenType $tok $state] |
|
#review goNextState could perform more than one space_action |
|
set space_action [dict get $transition_info space_action] |
|
set newstate [dict get $transition_info newstate] ;#use of 'newstate' vs 'state' makes code clearer below |
|
|
|
if {[tcl::string::match "err-*" $state]} { |
|
::tomlish::log::warn "---- State error in state $prevstate for tokenType: $tokenType token value: $tok. $state aborting parse. [tomlish::parse::report_line]" |
|
lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] |
|
return $v(0) |
|
} |
|
# --------------------------------------------------------- |
|
#NOTE there may already be a token_waiting at this point |
|
#set_token_waiting can raise an error here, |
|
# in which case the space_action branch needs to be rewritten to handle the existing token_waiting |
|
# --------------------------------------------------------- |
|
|
|
if {$space_action eq "pop"} { |
|
#pop_trigger_tokens: newline tablename endarray endinlinetable |
|
#note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. |
|
set parentlevel [expr {$nest -1}] |
|
set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote { |
|
#should only apply within a multiliteral |
|
#### |
|
set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed |
|
#Without this - we would get extraneous empty list entries in the parent |
|
# - as the xxx-squote-space isn't a space level from the toml perspective |
|
# - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop |
|
#assert prevstate always trailing-squote-space |
|
#dev guardrail - remove? assertion lib? |
|
switch -exact -- $prevstate { |
|
trailing-squote-space { |
|
} |
|
default { |
|
error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" |
|
} |
|
} |
|
switch -- $tok { |
|
' { |
|
tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] |
|
} |
|
'' { |
|
#review - we should perhaps return double_squote instead? |
|
#tomlish::parse::set_token_waiting type literal value "" complete 1 |
|
tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] |
|
} |
|
''' { |
|
#### |
|
#if already an eof in token_waiting - set_token_waiting will insert before it |
|
tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] |
|
} |
|
'''' { |
|
tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] |
|
#todo integrate left squote with nest data at this level |
|
set lastpart [lindex $v($parentlevel) end] |
|
switch -- [lindex $lastpart 0] { |
|
LITERALPART { |
|
set newval "[lindex $lastpart 1]'" |
|
set parentdata $v($parentlevel) |
|
lset parentdata end [list LITERALPART $newval] |
|
set v($parentlevel) $parentdata |
|
} |
|
NEWLINE { |
|
lappend v($parentlevel) [list LITERALPART "'"] |
|
} |
|
MULTILITERAL { |
|
#empty |
|
lappend v($parentlevel) [list LITERALPART "'"] |
|
} |
|
default { |
|
error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" |
|
} |
|
} |
|
} |
|
''''' { |
|
tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] |
|
#todo integrate left 2 squotes with nest data at this level |
|
set lastpart [lindex $v($parentlevel) end] |
|
switch -- [lindex $lastpart 0] { |
|
LITERALPART { |
|
set newval "[lindex $lastpart 1]''" |
|
set parentdata $v($parentlevel) |
|
lset parentdata end [list LITERALPART $newval] |
|
set v($parentlevel) $parentdata |
|
} |
|
NEWLINE { |
|
lappend v($parentlevel) [list LITERALPART "''"] |
|
} |
|
MULTILITERAL { |
|
lappend v($parentlevel) [list LITERALPART "''"] |
|
} |
|
default { |
|
error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
triple_squote { |
|
#presumably popping multiliteral-space |
|
::tomlish::log::debug "---- triple_squote for last_space_action pop leveldata: $v($nest)" |
|
set merged [list] |
|
set lasttype "" |
|
foreach part $v($nest) { |
|
switch -exact -- [lindex $part 0] { |
|
MULTILITERAL { |
|
lappend merged $part |
|
} |
|
LITERALPART { |
|
if {$lasttype eq "LITERALPART"} { |
|
set prevpart [lindex $merged end] |
|
lset prevpart 1 [lindex $prevpart 1][lindex $part 1] |
|
lset merged end $prevpart |
|
} else { |
|
lappend merged $part |
|
} |
|
} |
|
NEWLINE { |
|
#note that even though first newline ultimately gets stripped from multiliterals - that isn't done here |
|
#we still need the first one for roundtripping. The datastructure stage is where it gets stripped. |
|
lappend merged $part |
|
} |
|
default { |
|
error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" |
|
} |
|
} |
|
set lasttype [lindex $part 0] |
|
} |
|
set v($nest) $merged |
|
} |
|
tentative_accum_dquote { |
|
#should only apply within a multistring |
|
#### |
|
set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed |
|
#Without this - we would get extraneous empty list entries in the parent |
|
# - as the trailing-dquote-space isn't a space level from the toml perspective |
|
# - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop |
|
#assert prevstate always trailing-dquote-space |
|
#dev guardrail - remove? assertion lib? |
|
switch -exact -- $prevstate { |
|
trailing-dquote-space { |
|
} |
|
default { |
|
error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" |
|
} |
|
} |
|
switch -- $tok { |
|
{"} { |
|
tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] |
|
} |
|
{""} { |
|
#review - we should perhaps return double_dquote instead? |
|
#tomlish::parse::set_token_waiting type literal value "" complete 1 |
|
tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] |
|
} |
|
{"""} { |
|
#### |
|
#if already an eof in token_waiting - set_token_waiting will insert before it |
|
tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] |
|
} |
|
{""""} { |
|
tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] |
|
#todo integrate left dquote with nest data at this level |
|
set lastpart [lindex $v($parentlevel) end] |
|
switch -- [lindex $lastpart 0] { |
|
STRINGPART { |
|
set newval "[lindex $lastpart 1]\"" |
|
set parentdata $v($parentlevel) |
|
lset parentdata end [list STRINGPART $newval] |
|
set v($parentlevel) $parentdata |
|
} |
|
NEWLINE - CONT - WS { |
|
lappend v($parentlevel) [list STRINGPART {"}] |
|
} |
|
MULTISTRING { |
|
#empty |
|
lappend v($parentlevel) [list STRINGPART {"}] |
|
} |
|
default { |
|
error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" |
|
} |
|
} |
|
} |
|
{"""""} { |
|
tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] |
|
#todo integrate left 2 dquotes with nest data at this level |
|
set lastpart [lindex $v($parentlevel) end] |
|
switch -- [lindex $lastpart 0] { |
|
STRINGPART { |
|
set newval "[lindex $lastpart 1]\"\"" |
|
set parentdata $v($parentlevel) |
|
lset parentdata end [list STRINGPART $newval] |
|
set v($parentlevel) $parentdata |
|
} |
|
NEWLINE - CONT - WS { |
|
lappend v($parentlevel) [list STRINGPART {""}] |
|
} |
|
MULTISTRING { |
|
lappend v($parentlevel) [list STRINGPART {""}] |
|
} |
|
default { |
|
error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
triple_dquote { |
|
#presumably popping multistring-space |
|
::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" |
|
set merged [list] |
|
set lasttype "" |
|
foreach part $v($nest) { |
|
switch -exact -- [lindex $part 0] { |
|
MULTISTRING { |
|
lappend merged $part |
|
} |
|
STRINGPART { |
|
if {$lasttype eq "STRINGPART"} { |
|
set prevpart [lindex $merged end] |
|
lset prevpart 1 [lindex $prevpart 1][lindex $part 1] |
|
lset merged end $prevpart |
|
} else { |
|
lappend merged $part |
|
} |
|
} |
|
CONT - WS { |
|
lappend merged $part |
|
} |
|
NEWLINE { |
|
#note that even though first newline ultimately gets stripped from multiliterals - that isn't done here |
|
#we still need the first one for roundtripping. The datastructure stage is where it gets stripped. |
|
lappend merged $part |
|
} |
|
default { |
|
error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" |
|
} |
|
} |
|
set lasttype [lindex $part 0] |
|
} |
|
set v($nest) $merged |
|
} |
|
equal { |
|
#pop caused by = |
|
switch -exact -- $prevstate { |
|
dottedkey-space { |
|
tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" |
|
#re-emit for parent space |
|
tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] |
|
} |
|
dottedkey-space-tail { |
|
#experiment? |
|
tomlish::log::debug "---- equal ending dottedkey-space-tail for last_space_action pop" |
|
#re-emit for parent space |
|
tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] |
|
} |
|
} |
|
} |
|
newline { |
|
incr linenum |
|
lappend v($nest) [list NEWLINE $tok] |
|
} |
|
tablename { |
|
#note: a tablename only 'pops' if we are greater than zero |
|
error "---- tablename pop should already have been handled as special case zeropoppushspace in goNextState" |
|
} |
|
tablearrayname { |
|
#!review - tablearrayname different to tablename regarding push/pop? |
|
#note: a tablename only 'pops' if we are greater than zero |
|
error "---- tablearrayname pop should already have been handled as special case zeropoppushspace in goNextState" |
|
} |
|
endarray { |
|
#nothing to do here. |
|
} |
|
comma { |
|
#comma for inline table will pop the keyvalue space |
|
lappend v($nest) "SEP" |
|
} |
|
endinlinetable { |
|
::tomlish::log::debug "---- endinlinetable for last_space_action pop" |
|
} |
|
default { |
|
error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" |
|
} |
|
} |
|
if {$do_append_to_parent} { |
|
#e.g tentative_accum_squote does it's own appends as necessary - so won't get here |
|
lappend v($parentlevel) [set v($nest)] |
|
} |
|
|
|
incr nest -1 |
|
|
|
} elseif {$last_space_action eq "push"} { |
|
set prevnest $nest |
|
incr nest 1 |
|
set v($nest) [list] |
|
# push_trigger_tokens: barekey dquotedkey startinlinetable startarray tablename tablearrayname |
|
|
|
|
|
switch -exact -- $tokenType { |
|
tentative_trigger_squote - tentative_trigger_dquote { |
|
#### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote |
|
if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { |
|
lassign [dict get $transition_info starttok] starttok_type starttok_val |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType $starttok_type |
|
set tok $starttok_val |
|
} |
|
} |
|
single_squote { |
|
#JMN - REVIEW |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "squotedkey" |
|
set tok "" |
|
} |
|
triple_squote { |
|
::tomlish::log::debug "---- push trigger tokenType triple_squote" |
|
set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART |
|
} |
|
squotedkey { |
|
switch -exact -- $prevstate { |
|
table-space - itable-space { |
|
set v($nest) [list DOTTEDKEY] |
|
} |
|
} |
|
#todo - check not something already waiting? |
|
tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space |
|
} |
|
triple_dquote { |
|
set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT |
|
} |
|
dquotedkey { |
|
switch -exact -- $prevstate { |
|
table-space - itable-space { |
|
set v($nest) [list DOTTEDKEY] |
|
} |
|
} |
|
#todo - check not something already waiting? |
|
tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space |
|
} |
|
barekey { |
|
switch -exact -- $prevstate { |
|
table-space - itable-space { |
|
set v($nest) [list DOTTEDKEY] |
|
} |
|
} |
|
#todo - check not something already waiting? |
|
set waiting [tomlish::parse::get_token_waiting] |
|
if {[llength $waiting]} { |
|
set i [dict get $waiting startindex] |
|
tomlish::parse::clear_token_waiting |
|
tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space |
|
} else { |
|
tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space |
|
} |
|
} |
|
tablename { |
|
#note: we do not use the output of tablename_trim to produce a tablename for storage in the tomlish list! |
|
#The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish |
|
# back to toml file will be identical. |
|
#It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. |
|
# we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, |
|
# so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from |
|
# a structural perspective. |
|
|
|
#todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, |
|
# so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the |
|
# tomlish list? |
|
|
|
#set trimtable [tablename_trim $tok] |
|
#::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" |
|
set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name |
|
#note also that equivalent tablenames may have different toml representations even after being trimmed! |
|
#e.g ["x\t\t"] & ["x "] (tab escapes vs literals) |
|
#These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. |
|
} |
|
tablearrayname { |
|
#set trimtable [tablename_trim $tok] |
|
#::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" |
|
set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name |
|
} |
|
startarray { |
|
set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. |
|
} |
|
startinlinetable { |
|
set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. |
|
} |
|
default { |
|
error "---- push trigger tokenType '$tokenType' not yet implemented" |
|
} |
|
} |
|
|
|
} else { |
|
#no space level change |
|
switch -exact -- $tokenType { |
|
squotedkey { |
|
#puts "---- squotedkey in state $prevstate (no space level change)" |
|
lappend v($nest) [list SQKEY $tok] |
|
} |
|
dquotedkey { |
|
#puts "---- dquotedkey in state $prevstate (no space level change)" |
|
lappend v($nest) [list DQKEY $tok] |
|
} |
|
barekey { |
|
lappend v($nest) [list KEY $tok] |
|
} |
|
dotsep { |
|
lappend v($nest) [list DOTSEP] |
|
} |
|
starttablename { |
|
#$tok is triggered by the opening bracket and sends nothing to output |
|
} |
|
starttablearrayname { |
|
#$tok is triggered by the double opening brackets and sends nothing to output |
|
} |
|
tablename - tablenamearray { |
|
error "---- did not expect 'tablename/tablearrayname' without space level change (no space level change)" |
|
#set v($nest) [list TABLE $tok] |
|
} |
|
endtablename - endtablearrayname { |
|
#no output into the tomlish list for this token |
|
} |
|
startinlinetable { |
|
puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" |
|
} |
|
single_dquote { |
|
switch -exact -- $newstate { |
|
string-state { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "string" |
|
set tok "" |
|
} |
|
dquoted-key { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "dquotedkey" |
|
set tok "" |
|
} |
|
multistring-space { |
|
lappend v($nest) [list STRINGPART {"}] |
|
#may need to be joined on pop if there are neighbouring STRINGPARTS |
|
} |
|
default { |
|
error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" |
|
} |
|
} |
|
} |
|
double_dquote { |
|
#leading extra quotes - test: toml_multistring_startquote2 |
|
switch -exact -- $prevstate { |
|
itable-keyval-value-expected - keyval-value-expected { |
|
puts stderr "tomlish::decode::toml double_dquote TEST" |
|
#empty string |
|
lappend v($nest) [list STRINGPART ""] |
|
} |
|
multistring-space { |
|
#multistring-space to multistring-space |
|
lappend v($nest) [list STRINGPART {""}] |
|
} |
|
default { |
|
error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" |
|
} |
|
} |
|
|
|
} |
|
single_squote { |
|
switch -exact -- $newstate { |
|
literal-state { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "literal" |
|
set tok "" |
|
} |
|
squoted-key { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "squotedkey" |
|
set tok "" |
|
} |
|
multiliteral-space { |
|
#false alarm squote returned from tentative_accum_squote pop |
|
::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" |
|
#(single squote - not terminating space) |
|
lappend v($nest) [list LITERALPART '] |
|
#may need to be joined on pop if there are neighbouring LITERALPARTs |
|
} |
|
default { |
|
error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" |
|
} |
|
} |
|
} |
|
double_squote { |
|
switch -exact -- $prevstate { |
|
keyval-value-expected { |
|
lappend v($nest) [list LITERAL ""] |
|
} |
|
multiliteral-space { |
|
#multiliteral-space to multiliteral-space |
|
lappend v($nest) [list LITERALPART ''] |
|
} |
|
default { |
|
error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" |
|
} |
|
} |
|
} |
|
enddquote { |
|
#nothing to do? |
|
set tok "" |
|
} |
|
endsquote { |
|
set tok "" |
|
} |
|
string { |
|
#JJJJ |
|
set tok [tomlish::from_Bstring $tok] |
|
lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes |
|
} |
|
literal { |
|
lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes |
|
} |
|
multistring { |
|
#review |
|
#JJJJ ? |
|
lappend v($nest) [list MULTISTRING $tok] |
|
} |
|
stringpart { |
|
#JJJJ |
|
set tok [tomlish::from_Bstring $tok] |
|
lappend v($nest) [list STRINGPART $tok] ;#will not get wrapped in dquotes directly |
|
} |
|
multiliteral { |
|
lappend v($nest) [LIST MULTILITERAL $tok] |
|
} |
|
literalpart { |
|
lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly |
|
} |
|
untyped_value { |
|
#would be better termed unclassified_value |
|
#we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. |
|
unset -nocomplain tag |
|
if {$tok in {true false}} { |
|
set tag BOOL |
|
} else { |
|
if {[::tomlish::utils::is_int $tok]} { |
|
set tag INT |
|
} else { |
|
if {[string is integer -strict $tok]} { |
|
#didn't qualify as a toml int - but still an int |
|
#probably means is_int is limiting size and not accepting bigints (configurable?) |
|
#or it didn't qualify due to more than 1 leading zero |
|
#or other integer format issue such as repeated underscores |
|
error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. (looks close to being an int. Formatting or range issue?) [tomlish::parse::report_line] (no space level change)" |
|
} else { |
|
#DDDD |
|
if {[::tomlish::utils::is_float $tok]} { |
|
set tag FLOAT |
|
} elseif {[::tomlish::utils::is_localtime $tok]} { |
|
set tag TIME-LOCAL |
|
} elseif {[::tomlish::utils::is_timepart $tok]} { |
|
#Note we must allow lone timepart here (not just is_localtime which doesn't allow tz offsets) in case it followed a previous localdate |
|
set tag DATETIME ;#?? review standalone time with tz - no specific tag - only allowed as followup value from DATETIME-LOCAL |
|
} elseif {[::tomlish::utils::is_datepart $tok]} { |
|
set tag DATE-LOCAL |
|
} elseif {[::tomlish::utils::is_datetime $tok]} { |
|
#not just a date or just a time |
|
#could be either local or have tz offset |
|
#DDDD JJJ |
|
set norm [string map {" " T} $tok];#prob unneeded - we won't get here if there was a space - would arrive as 2 separate tokens review. |
|
lassign [split $norm T] dp tp |
|
if {[::tomlish::utils::is_localtime $tp]} { |
|
set tag DATETIME-LOCAL |
|
} else { |
|
set tag DATETIME |
|
} |
|
} elseif {[::tomlish::utils::is_datetime X$tok] || [::tomlish::utils::is_timepart X$tok]} { |
|
# obsolete |
|
#Note we must allow lone timepart here (not just is_localtime which doesn't allow tz offsets) in case it followed a previous localdate |
|
#e.g x= 2025-01-01 02:34Z |
|
#The dict::from_tomlish validation will catch an invalid standaline timepart, or combine with leading date if applicable. |
|
set tag DATETIME |
|
} else { |
|
error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" |
|
} |
|
} |
|
} |
|
} |
|
#assert either tag is set, or we errored out. |
|
lappend v($nest) [list $tag $tok] |
|
|
|
} |
|
comment { |
|
#puts stdout "----- comment token returned '$tok'------" |
|
#JJJJ |
|
set tok [tomlish::from_comment $tok] |
|
lappend v($nest) [list COMMENT "$tok"] |
|
} |
|
equal { |
|
#we append '=' to the nest so that any surrounding whitespace is retained. |
|
lappend v($nest) = |
|
} |
|
comma { |
|
lappend v($nest) SEP |
|
} |
|
newline { |
|
incr linenum |
|
lappend v($nest) [list NEWLINE $tok] |
|
} |
|
whitespace { |
|
lappend v($nest) [list WS $tok] |
|
} |
|
continuation { |
|
lappend v($nest) CONT |
|
} |
|
bom { |
|
lappend v($nest) BOM |
|
} |
|
eof { |
|
#ok - nothing more to add to the tomlish list. |
|
#!todo - check previous tokens are complete/valid? |
|
} |
|
default { |
|
error "--- unknown tokenType '$tokenType' during state $prevstate [::tomlish::parse::report_line] (no space level change)" |
|
} |
|
} |
|
} |
|
|
|
if {!$next_tokenType_known} { |
|
::tomlish::log::notice "---- tomlish::decode::toml - current tokenType:$tokenType Next token type not known" |
|
::tomlish::parse::set_tokenType "" |
|
set tok "" |
|
} |
|
|
|
if {$state eq "end-state"} { |
|
break |
|
} |
|
|
|
|
|
} |
|
|
|
#while {$nest > 0} { |
|
# lappend v([expr {$nest -1}]) [set v($nest)] |
|
# incr nest -1 |
|
#} |
|
while {[::tomlish::parse::spacestack size] > 1} { |
|
::tomlish::parse::spacestack pop |
|
lappend v([expr {$nest -1}]) [set v($nest)] |
|
incr nest -1 |
|
|
|
#set parent [spacestack peek] ;#the level being appended to |
|
#lassign $parent type state |
|
#if {$type eq "space"} { |
|
# |
|
#} elseif {$type eq "buffer"} { |
|
# lappend v([expr {$nest -1}]) {*}[set v($nest)] |
|
#} else { |
|
# error "invalid spacestack item: $parent" |
|
#} |
|
} |
|
|
|
} finally { |
|
set is_parsing 0 |
|
} |
|
return $v(0) |
|
} |
|
|
|
#toml dquoted string to tomlish STRING |
|
# - only allow specified escape sequences |
|
# - allow any unicode except those that must be escaped: dquote, bsl, and control chars(except tab) |
|
proc from_Bstring {bstr} { |
|
#JJJJ |
|
if {[catch { |
|
tomlish::utils::unescape_string $bstr |
|
} errM]} { |
|
return -code error -errorcode {TOML SYNTAX INVALIDESCAPE} "tomlish::from_Bstring toml Bstring contains invalid escape sequence\n$errM" ;#review |
|
} |
|
#assert: all escapes are now valid |
|
|
|
if {[regexp {[\u0000-\u0008\u000A-\u001F\u007f]} $bstr]} { |
|
set msg "tomlish::from_Bstring toml Bstring contains controls that must be escaped" |
|
return -code error -errorcode {TOML SYNTAX BSTRINGUNESCAPEDCONTROLS} $msg ;#review |
|
} |
|
return $bstr |
|
} |
|
#validate toml comment |
|
# - disallow controls that must be escaped |
|
#from spec: |
|
# "Control characters other than tab (U+0000 to U+0008, U+000A to U+001F, U+007F) are not permitted in comments." |
|
proc from_comment {comment} { |
|
if {[regexp {[\u0000-\u0008\u000A-\u001F\u007f]} $comment]} { |
|
set msg "tomlish::from_comment toml comment contains controls that must be escaped" |
|
return -code error -errorcode {TOML SYNTAX COMMENTUNESCAPEDCONTROLS} $msg ;#review |
|
} |
|
return $comment |
|
} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
namespace eval tomlish::build { |
|
#STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness |
|
# take a value of the appropriate type and wrap as a tomlish tagged item |
|
proc STRING {s} { |
|
return [list STRING [::tomlish::utils::rawstring_to_Bstring_with_escaped_controls $s]] |
|
} |
|
proc LITERAL {litstring} { |
|
error todo |
|
} |
|
|
|
proc INT {i} { |
|
#whole numbers, may be prefixed with a + or - |
|
#Leading zeros are not allowed |
|
#Hex,octal binary forms are allowed (toml 1.0) |
|
#We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) |
|
#!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. |
|
# - We should probably raise an error for number larger than this and suggest the user supply it as a string? |
|
if {[tcl::string::last , $i] > -1} { |
|
error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" |
|
} |
|
if {![::tomlish::utils::int_validchars $i]} { |
|
error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" |
|
} |
|
|
|
if {[::tomlish::utils::is_int $i]} { |
|
return [list INT $i] |
|
} else { |
|
error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" |
|
} |
|
|
|
} |
|
|
|
proc FLOAT {f} { |
|
#convert any non-lower case variants of special values to lowercase for Toml |
|
if {[::tcl::string::tolower $f] in {nan +nan -nan inf +inf -inf}} { |
|
return [list FLOAT [tcl::string::tolower $f]] |
|
} |
|
if {[::tomlish::utils::is_float $f]} { |
|
return [list FLOAT $f] |
|
} else { |
|
error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" |
|
} |
|
} |
|
|
|
proc DATETIME {str} { |
|
if {[::tomlish::utils::is_datetime $str]} { |
|
return [list DATETIME $str] |
|
} else { |
|
error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" |
|
} |
|
} |
|
proc DATETIME-LOCAL {str} { |
|
error "build::DATETIME-LOCAL todo" |
|
} |
|
|
|
proc BOOLEAN {b} { |
|
#convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false |
|
if {![tcl::string::is boolean -strict $b]} { |
|
error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" |
|
} else { |
|
if {$b && 1} { |
|
return [::list BOOL true] |
|
} else { |
|
return [::list BOOL false] |
|
} |
|
} |
|
} |
|
|
|
#REVIEW |
|
#Take tablename followed by |
|
# a) *tomlish* name-value pairs e.g table mydata [list KEY item11 = [list STRING "test"]] {KEY item2 = [list INT 1]} |
|
# (accept also key <keyname> value {STRING <value>}) |
|
# b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types |
|
proc _table {name args} { |
|
set pairs [list] |
|
foreach t $args { |
|
if {[llength $t] == 4} { |
|
if {[tcl::string::tolower [lindex $t 0]] ne "key" || [tcl::string::tolower [lindex $t 2]] ni "= value"} { |
|
error "Only items tagged as KEY <keyname> = <value> currently accepted as name-value pairs for table command" |
|
} |
|
lassign $t _k keystr _eq valuepart |
|
if {[llength $valuepart] != 2} { |
|
error "supplied value must be typed. e.g {INT 1} or {STRING test}" |
|
} |
|
lappend pairs [list KEY $keystr = $valuepart] |
|
} elseif {[llength $t] == 2} { |
|
#!todo - type heuristics |
|
lassign $t n v |
|
lappend pairs [list KEY $n = [list STRING $v]] |
|
} else { |
|
error "'KEY <name> = {<type> <val}' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" |
|
} |
|
} |
|
set result [list TABLE $name {NEWLINE lf}] |
|
foreach p $pairs { |
|
lappend result $p {NEWLINE lf} |
|
} |
|
return $result |
|
#return [list TABLE $name $pairs] |
|
} |
|
|
|
|
|
#REVIEW - root & table are not correct |
|
#the tomlish root is basically a nameless table representing the root of the document |
|
proc _root {args} { |
|
set table [::tomlish::encode::table TOMLISH {*}$args] |
|
set result [lrange $table 2 end] |
|
} |
|
|
|
} |
|
|
|
namespace eval tomlish::encode { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::encode}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
|
|
|
|
|
|
#WS = whitepace, US = underscore |
|
#--------------------------------------------------------------------------------------------------------- |
|
#NOTE - this DELIBERATELY does not validate the data, or process escapes etc |
|
#It encodes the tomlish records as they are. |
|
#ie it only produces toml shaped data from a tomlish list. |
|
# |
|
#It is part of the roundtripability of data from toml to tomlish |
|
#!! ie - it is not the place to do formatting of inline vs multiline !! |
|
# That needs to be encoded in the tomlish data that is being passed in |
|
# (e.g from_dict could make formatting decisions in the tomlish it produces) |
|
# |
|
#e.g duplicate keys etc can exist in the toml output. |
|
#The dict::from_tomlish tomlish::from_dict (or any equivalent processor pair) is responsible for validation and conversion |
|
#back and forth of escape sequences where appropriate. |
|
#--------------------------------------------------------------------------------------------------------- |
|
proc tomlish {list {context ""}} { |
|
if {![tcl::string::is list $list]} { |
|
error "Supplied 'tomlish' is not a valid Tcl list. Expected a tagged list (parsed Toml)" |
|
} |
|
set toml "" ;#result string |
|
|
|
foreach item $list { |
|
set tag [lindex $item 0] |
|
#puts "tomlish::encode::tomlish processing item '$item', tag '$tag'" |
|
#during recursion, some tags require different error checking in different contexts. |
|
set nextcontext $tag ; |
|
|
|
#Handle invalid tag nestings |
|
switch -- $context { |
|
DQKEY - |
|
SQKEY - |
|
KEY { |
|
if {$tag in {KEY DQKEY SQKEY}} { |
|
error "Invalid tag '$tag' encountered within '$context'" |
|
} |
|
} |
|
MULTISTRING { |
|
#explicitly list the valid child tags |
|
set ms_parts {STRING STRINGPART WS NEWLINE CONT} |
|
if {$tag ni $ms_parts} { |
|
error "Invalid tag '$tag' encountered within a MULTISTRING must belong to: $ms_parts" |
|
} |
|
} |
|
MULTILITERAL { |
|
set ml_parts {LITERALPART NEWLINE} |
|
if {$tag ni $ml_parts} { |
|
error "Invalid tag '$tag' encountered within a MULTILITERAL must belong to: $ml_parts" |
|
} |
|
} |
|
default { |
|
#no context, or no defined nesting error for this context |
|
} |
|
} |
|
|
|
switch -- $tag { |
|
TOMLISH { |
|
#optional root tag. Ignore. |
|
} |
|
DQKEY - |
|
SQKEY - |
|
KEY { |
|
# |
|
if {$tag eq "KEY"} { |
|
append toml [lindex $item 1] ;#Key |
|
} elseif {$tag eq "SQKEY"} { |
|
append toml '[lindex $item 1]' ;#SQuoted Key |
|
} else { |
|
append toml \"[lindex $item 1]\" ;#DQuoted Key |
|
} |
|
#= could be at various positions depending on WS |
|
foreach part [lrange $item 2 end] { |
|
if {$part eq "="} { |
|
append toml "=" |
|
} else { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
} |
|
} |
|
DOTTEDKEY { |
|
#DQKEY, SQKEY, BAREKEY, WS, DOTSEP |
|
foreach part [lindex $item 1] { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
#whitespace at tail of dotted key elements is within the list in element 1 |
|
#generally we expect the = to be at element 2 - review |
|
foreach part [lrange $item 2 end] { |
|
if {$part eq "="} { |
|
append toml "=" |
|
} else { |
|
#puts "encoding [list $part] (context:$nextcontext)" |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
} |
|
} |
|
TABLE { |
|
append toml "\[[lindex $item 1]\]" ;#table name |
|
foreach part [lrange $item 2 end] { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
} |
|
ITABLE { |
|
#inline table - e.g within array or on RHS of keyval/qkeyval |
|
set data "" |
|
foreach part [lrange $item 1 end] { |
|
append data [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\{$data\}" |
|
} |
|
TABLEARRAY { |
|
append toml "\[\[[lindex $item 1]\]\]" ;#tablearray name |
|
foreach part [lrange $item 2 end] { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
} |
|
ARRAY { |
|
set arraystr "" |
|
foreach part [lrange $item 1 end] { |
|
append arraystr [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\[$arraystr\]" |
|
} |
|
WS { |
|
append toml [lindex $item 1] |
|
} |
|
SEP { |
|
append toml "," |
|
} |
|
DOTSEP { |
|
append toml "." |
|
} |
|
NEWLINE { |
|
set chartype [lindex $item 1] |
|
if {$chartype eq "lf"} { |
|
append toml \n |
|
} elseif {$chartype eq "crlf"} { |
|
append toml \r\n |
|
} else { |
|
error "Unrecognized newline type '$chartype'" |
|
} |
|
} |
|
CONT { |
|
#line continuation character "\" |
|
append toml "\\" |
|
} |
|
STRING { |
|
#Basic string (Bstring) |
|
#simple double quoted strings only |
|
# |
|
append toml \"[lindex $item 1]\" |
|
} |
|
STRINGPART { |
|
append toml [lindex $item 1] |
|
} |
|
MULTISTRING { |
|
#Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts |
|
set multistring "" ;#variable to build up the string |
|
foreach part [lrange $item 1 end] { |
|
append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\"\"\"$multistring\"\"\"" |
|
} |
|
LITERAL { |
|
#Single Quoted string(literal string) |
|
append toml '[lindex $item 1]' |
|
} |
|
LITERALPART { |
|
append toml [lindex $item 1] |
|
} |
|
MULTILITERAL { |
|
#multiliteral could be handled as a single literal if we allowed literal to contain newlines |
|
#- except that the first newline must be retained for roundtripping tomlish <-> toml but |
|
# the first newline is not part of the data. |
|
# we elect instead to maintain a basic LITERALPART that must not contain newlines.. |
|
# and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, |
|
#with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. |
|
set literal "" |
|
foreach part [lrange $item 1 end] { |
|
append literal [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml '''$literal''' |
|
} |
|
INT - |
|
BOOL - |
|
FLOAT - |
|
DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL { |
|
#DDDD |
|
append toml [lindex $item 1] |
|
} |
|
INCOMPLETE { |
|
error "cannot process tomlish term tagged as INCOMPLETE" |
|
} |
|
COMMENT { |
|
append toml "#[lindex $item 1]" |
|
} |
|
BOM { |
|
#Byte Order Mark may appear at beginning of a file. Needs to be preserved. |
|
append toml "\uFEFF" |
|
} |
|
default { |
|
error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." |
|
} |
|
} |
|
|
|
} |
|
return $toml |
|
} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::encode ---}] |
|
} |
|
#fish toml from tomlish |
|
|
|
#(encode tomlish as toml) |
|
interp alias {} tomlish::to_toml {} tomlish::encode::tomlish |
|
|
|
# |
|
|
|
|
|
namespace eval tomlish::decode { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::decode}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::decode ---}] |
|
} |
|
#decode toml to tomlish |
|
#interp alias {} tomlish::from_toml {} tomlish::decode::toml |
|
|
|
namespace eval tomlish::utils { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::utils}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
|
|
|
|
#basic generic quote matching for single and double quotes |
|
#note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes |
|
proc tok_in_quotedpart {tok} { |
|
set sLen [tcl::string::length $tok] |
|
set quote_type "" |
|
set had_slash 0 |
|
for {set i 0} {$i < $sLen} {incr i} { |
|
set c [tcl::string::index $tok $i] |
|
if {$quote_type eq ""} { |
|
if {$had_slash} { |
|
#don't enter quote mode |
|
#leave slash_mode because even if current char is slash - it is escaped |
|
set had_slash 0 |
|
} else { |
|
set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] |
|
switch -- $ctype { |
|
dq { |
|
set quote_type dq |
|
} |
|
sq { |
|
set quote_type sq |
|
} |
|
bsl { |
|
set had_slash 1 |
|
} |
|
} |
|
} |
|
} else { |
|
if {$had_slash} { |
|
#don't leave quoted mode |
|
#leave slash_mode because even if current char is slash - it is escaped |
|
set had_slash 0 |
|
} else { |
|
set ctype [tcl::string::map [list {"} dq {'} sq \\ bsl] $c] |
|
switch -- $ctype { |
|
dq { |
|
if {$quote_type eq "dq"} { |
|
set quote_type "" |
|
} |
|
} |
|
sq { |
|
if {$quote_type eq "sq"} { |
|
set quote_type "" |
|
} |
|
} |
|
bsl { |
|
set had_slash 1 |
|
} |
|
} |
|
} |
|
} |
|
} |
|
return $quote_type ;#dq | sq |
|
} |
|
|
|
proc hex_escape_info {slashx} { |
|
set exp {^\\x([0-9a-fA-F]{2}$)} |
|
if {[regexp $exp $slashx match hex]} { |
|
return [list ok [list char [subst -nocommand -novariable $slashx]]] |
|
} else { |
|
return [list err [list reason "Supplied string not of the form \\xHH where H in \[0-9a-fA-F\]"]] |
|
} |
|
} |
|
proc unicode_escape_info {slashu} { |
|
#!todo |
|
# validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and |
|
# is a valid 'unicode scalar value' (any Unicode code point except high-surrogate and low-surrogate code points) |
|
# ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive |
|
#expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} |
|
if {[tcl::string::match {\\u*} $slashu]} { |
|
set exp {^\\u([0-9a-fA-F]{4}$)} |
|
if {[regexp $exp $slashu match hex]} { |
|
if {[scan $hex %4x dec] != 1} { |
|
#why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? |
|
return [list err [list reason "Failed to convert '$hex' to decimal"]] |
|
} else { |
|
return [list ok [list char [subst -nocommand -novariable $slashu]]] |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] |
|
} |
|
} elseif {[tcl::string::match {\\U*} $slashu]} { |
|
set exp {^\\U([0-9a-fA-F]{8}$)} |
|
if {[regexp $exp $slashu match hex]} { |
|
if {[scan $hex %8x dec] != 1} { |
|
#why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? |
|
return [list err [list reason "Failed to convert '$hex' to decimal"]] |
|
} else { |
|
if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { |
|
return [list ok [list char [subst -nocommand -novariable $slashu]]] |
|
} else { |
|
return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] |
|
} |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string did not start with \\u or \\U" ]] |
|
} |
|
|
|
} |
|
|
|
#Note that unicode characters don't *have* to be escaped. |
|
#So if we provide a function named 'escape_string', the name implies the inverse of unescape_string which unescapes unicode \u \U values. |
|
#- an inverse of unescape_string would encode all unicode chars unnecessarily. |
|
#- as toml accepts a compact escape sequence for common chars such as tab,backspace,linefeed etc but also allows the full form \u009 etc |
|
#- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. |
|
#REVIEW - provide it anyway? When would it be desirable to use? |
|
|
|
variable Bstring_control_map [dict create] |
|
dict set Bstring_control_map \b {\b} |
|
dict set Bstring_control_map \n {\n} |
|
dict set Bstring_control_map \r {\r} |
|
dict set Bstring_control_map \" {\"} |
|
dict set Bstring_control_map \x1b {\e} ;#In spec it's included in the list of 'must be escaped', as well as the 'convenience' escapes - so we make it go both ways. |
|
dict set Bstring_control_map \\ "\\\\" |
|
|
|
#\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ |
|
#for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. |
|
#8 = \b - already in list. |
|
#built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list |
|
for {set cdec 0} {$cdec <= 7} {incr cdec} { |
|
set hhhh [format %.4X $cdec] |
|
set char [format %c $cdec] |
|
if {![dict exists $Bstring_control_map $char]} { |
|
dict set Bstring_control_map $char \\u$hhhh |
|
} |
|
} |
|
for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { |
|
set hhhh [format %.4X $cdec] |
|
set char [format %c $cdec] |
|
if {![dict exists $Bstring_control_map $char]} { |
|
dict set Bstring_control_map $char \\u$hhhh |
|
} |
|
} |
|
# \u007F = 127 |
|
dict set Bstring_control_map [format %c 127] \\u007F |
|
|
|
# ------------------------------------------------------------------ |
|
variable Literal_control_map [dict create] |
|
#controls other than tab |
|
for {set cdec 0} {$cdec <= 8} {incr cdec} { |
|
set hhhh [format %.4X $cdec] |
|
set char [format %c $cdec] |
|
if {![dict exists $Literal_control_map $char]} { |
|
dict set Literal_control_map $char \\u$hhhh |
|
} |
|
} |
|
for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { |
|
set hhhh [format %.4X $cdec] |
|
set char [format %c $cdec] |
|
if {![dict exists $Literal_control_map $char]} { |
|
dict set Literal_control_map $char \\u$hhhh |
|
} |
|
} |
|
# \u007F = 127 |
|
dict set Literal_control_map [format %c 127] \\u007F |
|
# ------------------------------------------------------------------ |
|
variable Multiliteral_control_map |
|
set Multiliteral_control_map [dict remove $Literal_control_map \n] |
|
|
|
variable String_control_map |
|
set String_control_map [dict remove $Literal_control_map \\] |
|
|
|
|
|
variable MultiBstring_totoml_map |
|
#'minimally' escaped sequences of double quotes. |
|
#e.g {""\"""\"} vs {\"\"\"\"\"} |
|
#This produces easier to read toml - and in many cases may be more likely to match original format when roundtripped from dict datastructure |
|
# REVIEW - should this be configurable? |
|
set MultiBstring_totoml_map [dict remove $Bstring_control_map {"} \r \n] |
|
dict set MultiBstring_totoml_map {"""} {""\"} ;#" editor hack: commented quote for dumb syntax highlighers |
|
|
|
#Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! |
|
#escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) |
|
#for example - can be used by from_dict to produce valid Bstring data for a tomlish record |
|
proc rawstring_to_Bstring_with_escaped_controls {str} { |
|
#for the well known chars that have compact escape sequences allowed by toml - we choose that form over the full \u form. |
|
#we'll use a string map with an explicit list rather than algorithmic at runtime |
|
# - the string map is probably more performant than splitting a string, especially if it's large |
|
|
|
upvar ::tomlish::utils::Bstring_control_map map |
|
|
|
return [string map $map $str] |
|
} |
|
proc rawstring_to_MultiBstring_with_escaped_controls {str} { |
|
#for the well known chars that have compact escape sequences allowed by toml - we choose that form over the full \u form. |
|
#we'll use a string map with an explicit list rather than algorithmic at runtime |
|
# - the string map is probably more performant than splitting a string, especially if it's large |
|
|
|
upvar ::tomlish::utils::MultiBstring_totoml_map map |
|
|
|
return [string map $map $str] |
|
} |
|
|
|
proc rawstring_is_valid_tomlstring {str} { |
|
#controls are allowed in this direction dict -> toml (they get quoted) |
|
|
|
#check any existing escapes are valid |
|
if {[catch { |
|
unescape_string $str |
|
} errM]} { |
|
return 0 |
|
} |
|
return 1 |
|
} |
|
|
|
proc rawstring_is_valid_literal {str} { |
|
#detect control chars other than tab |
|
variable Literal_control_map |
|
set testval [string map $Literal_control_map $str] |
|
return [expr {$testval eq $str}] |
|
} |
|
proc rawstring_is_valid_multiliteral {str} { |
|
#detect control chars other than tab |
|
variable Multiliteral_control_map |
|
|
|
set teststr [string map [list \r\n ok] $str] |
|
|
|
set testval [string map $Multiliteral_control_map $teststr] |
|
return [expr {$testval eq $teststr}] |
|
} |
|
|
|
#review - unescape what string? Bstring vs MLBstring? |
|
#we should be specific in the function naming here |
|
#used by dict::from_tomlish - so part of validation? - REVIEW |
|
proc unescape_string {str} { |
|
#note we can't just use Tcl subst because: |
|
# it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. |
|
# it would strip out backslashes inappropriately: e.g "\j" becomes just j |
|
# it recognizes other escapes which aren't approprite e.g octal \nnn |
|
# it replaces \<newline><whitespace> with a single whitespace (trailing backslash) |
|
#This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh |
|
#plus \e for \x1b? |
|
|
|
set buffer "" |
|
set buffer2 "" ;#buffer for 2 hex characters following a \x |
|
set buffer4 "" ;#buffer for 4 hex characters following a \u |
|
set buffer8 "" ;#buffer for 8 hex characters following a \u |
|
|
|
set sLen [tcl::string::length $str] |
|
|
|
#we need to handle arbitrarily long sequences of backslashes. \\\\\ etc |
|
set slash_active 0 |
|
set unicode2_active 0 |
|
set unicode4_active 0 |
|
set unicode8_active 0 |
|
|
|
::tomlish::log::debug "unescape_string. got len [string length str] str $str" |
|
|
|
#!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? |
|
set i 0 |
|
for {} {$i < $sLen} {} { |
|
if {$i > 0} { |
|
set lastChar [tcl::string::index $str [expr {$i - 1}]] |
|
} else { |
|
set lastChar "" |
|
} |
|
|
|
set c [tcl::string::index $str $i] |
|
#::tomlish::log::debug "unescape_string. got char $c" ;#too much? |
|
|
|
##---------------------- |
|
##as we are 'unescaping' - should we really be testing here for existing values that should have been escaped? |
|
##The answer is probably no - keep this function to a single purpose - test elsewhere for raw controls. |
|
##this test looks incomplete anyway REVIEW |
|
#scan $c %c n |
|
#if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { |
|
# #we don't expect unescaped unicode characters from 0000 to 001F - |
|
# #*except* for raw tab (which is whitespace) and newlines |
|
# error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" |
|
#} |
|
##---------------------- |
|
|
|
incr i ;#must incr here because we do'returns'inside the loop |
|
if {$c eq "\\"} { |
|
if {$slash_active} { |
|
append buffer "\\" |
|
set slash_active 0 |
|
} elseif {$unicode2_active} { |
|
error "unescape_string. unexpected case slash during unicode2 not yet handled" |
|
} elseif {$unicode4_active} { |
|
error "unescape_string. unexpected case slash during unicode4 not yet handled" |
|
} elseif {$unicode8_active} { |
|
error "unescape_string. unexpected case slash during unicode8 not yet handled" |
|
} else { |
|
# don't output anything (yet) |
|
set slash_active 1 |
|
} |
|
} else { |
|
if {$unicode2_active} { |
|
if {[tcl::string::length $buffer2] < 2} { |
|
append buffer2 $c |
|
} |
|
if {[tcl::string::length $buffer2] == 2} { |
|
#we have a \xHH to test |
|
set unicode2_active 0 |
|
set result [tomlish::utils::hex_escape_info "\\x$buffer2"] |
|
if {[lindex $result 0] eq "ok"} { |
|
append buffer [dict get $result ok char] |
|
} else { |
|
error "unescape_string error: [lindex $result 1]" |
|
} |
|
} |
|
} elseif {$unicode4_active} { |
|
if {[tcl::string::length $buffer4] < 4} { |
|
append buffer4 $c |
|
} |
|
if {[tcl::string::length $buffer4] == 4} { |
|
#we have a \uHHHH to test |
|
set unicode4_active 0 |
|
set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] |
|
if {[lindex $result 0] eq "ok"} { |
|
append buffer [dict get $result ok char] |
|
} else { |
|
error "unescape_string error: [lindex $result 1]" |
|
} |
|
} |
|
} elseif {$unicode8_active} { |
|
if {[tcl::string::length $buffer8] < 8} { |
|
append buffer8 $c |
|
} |
|
if {[tcl::string::length $buffer8] == 8} { |
|
#we have a \UHHHHHHHH to test |
|
set unicode8_active 0 |
|
set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] |
|
if {[lindex $result 0] eq "ok"} { |
|
append buffer [dict get $result ok char] |
|
} else { |
|
error "unescape_string error: [lindex $result 1]" |
|
} |
|
} |
|
} elseif {$slash_active} { |
|
set slash_active 0 |
|
set ctest [tcl::string::map {{"} dq} $c] |
|
switch -exact -- $ctest { |
|
dq { |
|
append buffer {"} |
|
} |
|
b - t - n - f - r { |
|
append buffer [subst -nocommand -novariable "\\$c"] |
|
} |
|
e { |
|
append buffer \x1b |
|
} |
|
x { |
|
#introduced in 1.1.0 \xHH |
|
set unicode2_active 1 |
|
set buffer2 "" |
|
} |
|
u { |
|
set unicode4_active 1 |
|
set buffer4 "" |
|
} |
|
U { |
|
set unicode8_active 1 |
|
set buffer8 "" |
|
} |
|
default { |
|
set slash_active 0 |
|
#review - toml spec says all other escapes are reserved |
|
#and if they are used TOML should produce an error. |
|
#append buffer "\\$c" |
|
set msg "Invalid escape sequence \\ followed by '$c'" |
|
return -code error -errorcode {TOMLISH SYNTAX INVALIDESCAPE} $msg |
|
} |
|
} |
|
} else { |
|
append buffer $c |
|
} |
|
} |
|
} |
|
#puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" |
|
if {$unicode2_active} { |
|
error "End of string reached before complete hex escape sequence \xHH" |
|
} |
|
if {$unicode4_active} { |
|
error "End of string reached before complete unicode escape sequence \uHHHH" |
|
} |
|
if {$unicode8_active} { |
|
error "End of string reached before complete unicode escape sequence \UHHHHHHHH" |
|
} |
|
if {$slash_active} { |
|
append buffer "\\" |
|
} |
|
try { |
|
encoding convertto utf-8 $buffer |
|
} trap {} {emsg eopts} { |
|
return -code error -errorcode {TOMLISH SYNTAX ENCODINGERROR} $emsg |
|
} |
|
return $buffer |
|
} |
|
|
|
#This does not have to do with unicode normal forms - which it seems toml has decided against regarding use in keys (review/references?) |
|
#This is meant for internal use regarding ensuring we match equivalent keys which may have just been specified with different string mechanisms, |
|
#e.g squoted vs dquoted vs barekey. |
|
proc normalize_key {rawkey} { |
|
set c1 [tcl::string::index $rawkey 0] |
|
set c2 [tcl::string::index $rawkey end] |
|
if {($c1 eq "'") && ($c2 eq "'")} { |
|
#single quoted segment. No escapes allowed within it. |
|
set key [tcl::string::range $rawkey 1 end-1] |
|
} elseif {($c1 eq "\"") && ($c2 eq "\"")} { |
|
#double quoted segment. Unapply escapes. |
|
# |
|
set keydata [tcl::string::range $rawkey 1 end-1] ;#strip outer quotes only |
|
#e.g key could have mix of \UXXXXXXXX escapes and unicode chars |
|
#or mix of \t and literal tabs. |
|
#unescape to convert all to literal versions for comparison |
|
set key [::tomlish::utils::unescape_string $keydata] |
|
#set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. |
|
} else { |
|
set key $rawkey |
|
} |
|
return $key |
|
} |
|
|
|
proc string_to_slashu {string} { |
|
set rv {} |
|
foreach c [split $string {}] { |
|
scan $c %c cdec |
|
if {$cdec > 65535} { |
|
append rv {\U} [format %.8X $cdec] |
|
} else { |
|
append rv {\u} [format %.4X $cdec] |
|
} |
|
} |
|
return $rv |
|
} |
|
|
|
#'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. |
|
#This is used for display purposes only (error msgs) |
|
proc nonprintable_to_slashu {s} { |
|
set res "" |
|
foreach i [split $s ""] { |
|
scan $i %c cdec |
|
|
|
set printable 0 |
|
if {($cdec>31) && ($cdec<127)} { |
|
set printable 1 |
|
} |
|
if {$printable} { |
|
append res $i |
|
} else { |
|
if {$cdec > 65535} { |
|
append res \\U[format %.8X $cdec] |
|
} else { |
|
append res \\u[format %.4X $cdec] |
|
} |
|
} |
|
} |
|
set res |
|
} ;# initial version from tcl wiki RS |
|
|
|
proc rawstring_to_jsonstring {s} { |
|
#like nonprintable_to_slashu |
|
# - also escape every dquote |
|
# - escape newlines |
|
set res "" |
|
foreach i [split $s ""] { |
|
scan $i %c cdec |
|
switch -- $cdec { |
|
34 { |
|
#double quote |
|
append res \\\" |
|
} |
|
13 { |
|
#carriage return |
|
append res \\r |
|
} |
|
8 { |
|
append res \\b |
|
} |
|
9 { |
|
append res \\t |
|
} |
|
10 { |
|
#linefeed |
|
append res \\n |
|
} |
|
92 { |
|
append res \\\\ |
|
} |
|
default { |
|
set printable 0 |
|
if {($cdec>31) && ($cdec<127)} { |
|
set printable 1 |
|
} |
|
if {$printable} { |
|
append res $i |
|
} else { |
|
if {$cdec > 65535} { |
|
#append res $i |
|
#append res \\U[format %.8X $cdec] ;#wrong |
|
#append res "\\U{[format %.8x $cdec]}" ;#some variation of json? |
|
package require punk::cesu |
|
#e.g \U0001f610 emoticon face |
|
#surrogate pair: \uD83D\uDE10 |
|
set surrogatepair [punk::cesu::to_surrogatestring -format escape $i] |
|
append res $surrogatepair |
|
} else { |
|
append res \\u[format %.4X $cdec] |
|
} |
|
} |
|
} |
|
} |
|
} |
|
set res |
|
|
|
} |
|
|
|
#check if str is valid for use as a toml bare key |
|
#Early toml versions only allowed letters + underscore + dash |
|
proc is_basic_barekey {str} { |
|
if {[tcl::string::length $str] == 0} { |
|
return 0 |
|
} else { |
|
set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] |
|
if {[tcl::string::length $str] == $matches} { |
|
#all characters match the regexp |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
} |
|
|
|
#from toml.abnf in github.com/toml-lang/toml |
|
#unquoted-key = 1*unquoted-key-char |
|
#unquoted-key-char = ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _ |
|
#unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions |
|
#unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block |
|
#unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon |
|
#unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ |
|
#unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics |
|
#unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces |
|
#unquoted-key-char =/ %x2070-21FF / %x2300-24FF ; skip math operators |
|
#unquoted-key-char =/ %x25A0-268B / %x2690-2757 ; skip box drawing, block elements, and some yin-yang symbols |
|
#unquoted-key-char =/ %x2762-2767 / %x2776-27E5 ; skip some Dingbat punctuation |
|
#unquoted-key-char =/ %x2801-297F ; skip some math brackets and arrows, and braille blank |
|
#unquoted-key-char =/ %x2B00-2FFF / %x3001-D7FF ; skip various math operators and symbols, and ideographic space |
|
#unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) |
|
#unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) |
|
variable re_barekey |
|
set ranges [list] |
|
lappend ranges {a-zA-Z0-9\_\-} |
|
lappend ranges {\u00B2} {\u00B3} {\u00B9} {\u00BC-\u00BE} ;# superscript digits, fractions |
|
lappend ranges {\u00C0-\u00D6} {\u00D8-\u00F6} {\u00F8-\u037D} ;# non-symbol chars in Latin block |
|
lappend ranges {\u037f-\u1FFF} ;# exclude GREEK QUESTION MARK, which is basically a semi-colon |
|
lappend ranges {\u200C-\u200D} {\u203F-\u2040} ;# from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ |
|
lappend ranges {\u2070-\u218f} {\u2460-\u24FF} ;# include super-subscripts, letterlike/numberlike forms, enclosed alphanumerics |
|
lappend ranges {\u2C00-\u2FEF} {\u3001-\uD7FF} ;# skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces |
|
lappend ranges {\u2070-\u21FF} {\u2300-\u24FF} ;# skip math operators |
|
lappend ranges {\u25A0-\u268B} {\u2690-\u2757} ;# skip box drawing, block elements, and some yin-yang symbols |
|
lappend ranges {\u2762-\u2767} {\u2776-\u27E5} ;# skip some Dingbat punctuation |
|
lappend ranges {\u2801-\u297F} ;# skip some math brackets and arrows, and braille blank |
|
lappend ranges {\u2B00-\u2FFF} {\u3001-\uD7FF} ;# skip various math operators and symbols, and ideographic space |
|
lappend ranges {\uF900-\uFDCF} {\uFDF0-\uFFFD} ;# skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) |
|
lappend ranges {\U10000-\UEFFFF} ;# all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) |
|
set re_barekey {^[} |
|
foreach r $ranges { |
|
append re_barekey $r |
|
} |
|
append re_barekey {]+$} |
|
|
|
proc is_barekey {str} { |
|
if {[tcl::string::length $str] == 0} { |
|
return 0 |
|
} |
|
variable re_barekey |
|
return [regexp $re_barekey $str] |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'integer'. |
|
proc int_validchars1 {str} { |
|
set numchars [tcl::string::length $str] |
|
if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
#add support for hex,octal,binary 0x.. 0o.. 0b... |
|
proc int_validchars {str} { |
|
set numchars [tcl::string::length $str] |
|
if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
proc is_int {str} { |
|
set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] ;#0b101 etc covered by a-f |
|
|
|
if {[tcl::string::length $str] == $matches} { |
|
#all characters in legal range |
|
|
|
# --------------------------------------- |
|
#check for leading zeroes in non 0x 0b 0o |
|
#first strip any +, - or _ (just for this test) |
|
#(but still allowing 0 -0 +0) |
|
set check [tcl::string::map {+ "" - "" _ ""} $str] |
|
if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { |
|
return 0 |
|
} |
|
# --------------------------------------- |
|
|
|
#check +,- only occur in the first position. (excludes also +++1 etc) |
|
if {[tcl::string::last - $str] > 0} { |
|
return 0 |
|
} |
|
if {[tcl::string::last + $str] > 0} { |
|
return 0 |
|
} |
|
|
|
#------------------------------------------- |
|
#unclear if a 'digit' includes the type specifiers x b o |
|
#we assume the 0x 0b 0o are NOT counted as digits - as underscores here would seem |
|
#to be likely to cause interop issues with other systems |
|
#(e.g tcl allows 0b1_1 but not 0b_11) |
|
#Most of this structure would be unnecessary if we could rely on string::is::integer understanding underscores (9+?) |
|
#we still need to support earlier Tcl for now though. |
|
|
|
#first rule out any case with more than one underscore in a row |
|
if {[regexp {__} $str]} { |
|
return 0 |
|
} |
|
if {[string index $str 0] eq "_"} { |
|
return 0 |
|
} |
|
set utest [string trimleft $str +-] |
|
#test again for further trick like _+_0xFF |
|
if {[string index $utest 0] eq "_"} { |
|
return 0 |
|
} |
|
if {[string range $utest 0 1] in {0x 0b 0o}} { |
|
set testnum [string range $utest 2 end] |
|
#spec says *non-negative* integers may *also* be expressed in hex, octal or binary |
|
#and also explicitly states + not allowed |
|
#presumed to mean negative not allowed. |
|
if {[string index $str 0] in {- +}} { |
|
return 0 |
|
} |
|
} else { |
|
set testnum $utest |
|
#exclude also things like 0_x 0___b that snuck past our prefix test |
|
if {![string is digit -strict [string map {_ ""} $testnum]]} { |
|
return 0 |
|
} |
|
#assert - only digits and underscores in testnum |
|
#still may have underscores at each end |
|
} |
|
#assert testnum is now the 'digits' portion of a <digits>, 0x<digits> 0b<digits> 0o<digits> number |
|
#(+ and - already stripped) |
|
#It may still have chars unsuitable for its type - which will be caught by the string::is::integer test below |
|
if {[string length $testnum] != [string length [string trim $testnum _]]} { |
|
#had non-inner underscores in 'digit' part |
|
return 0 |
|
} |
|
#assert str only has solo inner underscores (if any) between 'digits' |
|
#------------------------------------------- |
|
|
|
set numeric_value [tcl::string::map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores |
|
#use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) |
|
if {![tcl::string::is integer -strict $numeric_value]} { |
|
return 0 |
|
} |
|
|
|
|
|
|
|
#!todo - check bounds only based on some config value |
|
#even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements by default (for now) |
|
#presumably very large numbers would have to be supplied in a toml file as strings. |
|
#Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max |
|
#some question around implementations allowed to use lower values such as 2^31 on some systems? |
|
if {$::tomlish::max_int ne "" && $numeric_value > $::tomlish::max_int} { |
|
return 0 |
|
} |
|
if {$::tomlish::min_int ne "" && $numeric_value < $::tomlish::min_int} { |
|
return 0 |
|
} |
|
} else { |
|
return 0 |
|
} |
|
#Got this far - didn't find anything wrong with it. |
|
return 1 |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'float'. |
|
proc float_validchars {str} { |
|
set numchars [tcl::string::length $str] |
|
if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
#only allow lower case for these special values - as per Toml 1.0 spec |
|
if {$str ni {inf +inf -inf nan +nan -nan}} { |
|
return 0 |
|
} else { |
|
return 1 |
|
} |
|
} |
|
} |
|
|
|
#note - Tcl's string is double will return true also for the subset of float values which are integers |
|
#This function is to determine whether it matches the Toml float concept - so requires a . or e or E |
|
proc is_float {str} { |
|
#vip greenlight known literals, don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) |
|
if {$str in {inf +inf -inf nan +nan -nan}} { |
|
return 1 |
|
} |
|
#doorcheck the basics for floatiness vs members of that rival gang - ints |
|
if {![regexp {[.eE]} $str]} { |
|
#could be an integer - which isn't specifically a float for Toml purposes. |
|
return 0 |
|
} |
|
|
|
|
|
#patdown for any contraband chars |
|
set matches [regexp -all {[eE0-9\_\-\+\.]} $str] |
|
if {[tcl::string::length $str] != $matches} { |
|
return 0 |
|
} |
|
|
|
#all characters in legal range |
|
|
|
#A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) |
|
|
|
#Early Toml spec also disallowed leading zeros in the exponent part(?) |
|
#... this seems less interoperable anyway (some libraries generate leading zeroes in exponents) |
|
#we allow leading zeros in exponents here. |
|
|
|
#Check for leading zeros in main part |
|
#first strip any +, - or _ (just for this test) |
|
set check [tcl::string::map {+ "" - "" _ ""} $str] |
|
set r {([0-9])*} |
|
regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E |
|
#leading zero only if exactly one zero |
|
if {$intpart ne "0" && [string match 0* $intpart]} { |
|
return 0 |
|
} |
|
|
|
#for floats, +,- may occur in multiple places |
|
#e.g -2E-22 +3e34 |
|
#!todo - check bounds ? |
|
|
|
#----------------------------------------- |
|
if {[regexp {__} $str]} { |
|
return 0 |
|
} |
|
if {[string index $str 0] eq "_" || [string index $str end] eq "_"} { |
|
return 0 |
|
} |
|
set utest [string trimleft $str +-] |
|
#test again for further trick like _+_ |
|
if {[string index $utest 0] eq "_"} { |
|
return 0 |
|
} |
|
#----------------------------------------- |
|
|
|
#decimal point, if used must be surrounded by at least one digit on each side |
|
#e.g 3.e+20 also illegal |
|
set dposn [string first . $str] |
|
if {$dposn > -1 } { |
|
set d3 [string range $str $dposn-1 $dposn+1] |
|
if {![string is integer -strict [string index $d3 0]] || ![string is integer -strict [string index $d3 2]]} { |
|
return 0 |
|
} |
|
} |
|
#we've already eliminated leading/trailing underscores |
|
#now ensure each inner underscore is surrounded by digits |
|
if {[regexp {_[^0-9]|[^0-9]_} $str]} { |
|
return 0 |
|
} |
|
|
|
#strip underscores for tcl double check so we can support < tcl 9 versions which didn't allow underscores |
|
set check [tcl::string::map {_ ""} $str] |
|
#string is double accepts inf nan +NaN etc. |
|
if {![tcl::string::is double $check]} { |
|
return 0 |
|
} |
|
|
|
#All good - seems to be a toml-approved float and not an int. |
|
return 1 |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'datetime'. |
|
proc datetime_validchars {str} { |
|
set numchars [tcl::string::length $str] |
|
if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
proc is_datepart {str} { |
|
set matches [regexp -all {[0-9\-]} $str] |
|
if {[tcl::string::length $str] != $matches} { |
|
return 0 |
|
} |
|
#seems to require yyyy-mm-dd (e.g not allowing just yyyy-mm) |
|
if {![regexp {^([0-9]{4})-([0-9]{2})-([0-9]{2})$} $str _match y m d]} { |
|
return 0 |
|
} |
|
if {$m > 12 || $m == 0} { |
|
return 0 |
|
} |
|
switch -- [expr {$m}] { |
|
1 - 3 - 5 - 7 - 8 - 10 - 12 { |
|
if {$d > 31 || $d == 0} { |
|
return 0 |
|
} |
|
} |
|
2 { |
|
if {$d > 29 || $d == 0} { |
|
return 0 |
|
} |
|
if {$d == 29} { |
|
#leapyear check |
|
if {[catch {clock scan $str -format %Y-%m-%d} errM]} { |
|
return 0 |
|
} |
|
} |
|
} |
|
4 - 6 - 9 - 11 { |
|
if {$d > 30 || $d == 0} { |
|
return 0 |
|
} |
|
} |
|
} |
|
return 1 |
|
} |
|
proc is_localdate {str} { |
|
is_datepart $str |
|
} |
|
|
|
#allow only hh:mm:ss or hh:mm (no subseconds) |
|
proc _is_hms_or_hm_time {val} { |
|
set numchars [tcl::string::length $val] |
|
if {[regexp -all {[0-9:]} $val] != $numchars} { |
|
return 0 |
|
} |
|
#assert now digits and colons only |
|
set hms_cparts [split $val :] |
|
#2 or 3 parts only are valid - check contents of each part |
|
if {[llength $hms_cparts] == 2} { |
|
lassign $hms_cparts hr min |
|
if {[string length $hr] != 2 || [string length $min] != 2} { |
|
return 0 |
|
} |
|
if {$hr > 23 || $min > 59} { |
|
return 0 |
|
} |
|
} elseif {[llength $hms_cparts] == 3} { |
|
lassign $hms_cparts hr min sec |
|
if {[string length $hr] != 2 || [string length $min] != 2 || [string length $sec] !=2} { |
|
return 0 |
|
} |
|
#possible for sec to be 60 - leap second RFC 3339 |
|
if {$hr > 23 || $min > 59 || $sec > 60} { |
|
return 0 |
|
} |
|
} else { |
|
return 0 |
|
} |
|
return 1 |
|
} |
|
proc is_timepart {str} { |
|
#validate the part after the T (or space) |
|
#we receive only that trailing part here. |
|
|
|
#odt1 = 1979-05-27T07:32:00Z |
|
#odt2 = 1979-05-27T00:32:00-07:00 |
|
#odt3 = 1979-05-27T00:32:00.5-07:00 |
|
#odt4 = 1979-05-27T00:32:00.999999-07:00 |
|
|
|
set numchars [tcl::string::length $str] |
|
#timepart can have negative or positive offsets so - and + must be accepted |
|
if {[regexp -all {[zZt0-9\-\+\.:]} $str] == $numchars} { |
|
#todo |
|
#basic check that we have leading 2dig hr and 2dig min separated by colon |
|
if {![regexp {^[0-9]{2}:[0-9]{2}$|^[0-9]{2}:[0-9]{2}[^0-9]{1}.*$} $str]} { |
|
#nn:nn or nn:nnX.* where X is non digit |
|
return 0 |
|
} |
|
set dotparts [split $str .] |
|
if {[llength $dotparts] ni {1 2}} { |
|
return 0 |
|
} |
|
if {[llength $dotparts] == 2} { |
|
lassign $dotparts hms tail |
|
#validate tail - which might have +- offset |
|
if {[string index $tail end] ni {z Z}} { |
|
#from hh:mm:??.<tail> |
|
#check for +/- something |
|
if {[regexp {(.*)[+-](.*)} $tail _match fraction offset]} { |
|
if {![string is digit -strict $fraction]} { |
|
return 0 |
|
} |
|
if {![_is_hms_or_hm_time $offset]} { |
|
return 0 |
|
} |
|
} |
|
} else { |
|
set tail [string range $tail 0 end-1] |
|
#expect tail nnnn (from hh:mm::ss.nnnnZ) |
|
#had a dot and a zZ - no other offset valid (?) |
|
if {![string is digit -strict $tail]} { |
|
return 0 |
|
} |
|
} |
|
|
|
} else { |
|
#no dot (fraction of second) |
|
if {[regexp {(.*)[+-](.*)} $str _match hms offset]} { |
|
#validate offset |
|
if {![_is_hms_or_hm_time $offset]} { |
|
return 0 |
|
} |
|
} else { |
|
set hms $str |
|
set offset "" |
|
#trim a *single* z or Z off hms if present - multiple should error later |
|
if {[string index $hms end] in {z Z}} { |
|
set hms [string range $hms 0 end-1] |
|
} |
|
} |
|
} |
|
#hms is allowed in toml to be hh:mm:ss or hh:mm |
|
#validate we have hh:mm:ss or hh:mm - exactly 2 digits each |
|
if {![_is_hms_or_hm_time $hms]} { |
|
return 0 |
|
} |
|
|
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
proc is_localtime {str} { |
|
#time of day without any relation to a specific day or any offset or timezone |
|
set numchars [tcl::string::length $str] |
|
if {[regexp -all {[0-9\.:]} $str] == $numchars} { |
|
#todo |
|
if {![regexp {^[0-9]{2}:[0-9]{2}$|^[0-9]{2}:[0-9]{2}:[0-9]{2}([.][0-9]+){0,1}$} $str]} { |
|
#hh:mm or hh:mm:ss or hh:mm::ss.nnn |
|
return 0 |
|
} |
|
set dotparts [split $str .] |
|
if {[llength $dotparts] ni {1 2}} { |
|
return 0 |
|
} |
|
if {[llength $dotparts] == 2} { |
|
lassign $dotparts hms _tail |
|
#validate tail - just fractional seconds - regex has confirmed at least one digit and only digits |
|
#nothing todo? max length? |
|
} else { |
|
#no fractional seconds |
|
set hms $str |
|
} |
|
if {![_is_hms_or_hm_time $hms]} { |
|
return 0 |
|
} |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
#review |
|
proc is_datetime {str} { |
|
#Essentially RFC3339 formatted date-time - but: |
|
#1) allowing seconds to be omitted (:00 assumed) |
|
#2) T may be replaced with a single space character TODO - parser support for space in datetime! |
|
# (RFC 3339 allows space instead of T also - but doesn't specify it *must* be a single space) |
|
|
|
#toml-lint @2025-04 doesn't accept t for T or z for Z - but RFC3339 does |
|
#toml spec doesn't clarify - we will accept |
|
|
|
#e.g 1979-05-27 |
|
#e.g 1979-05-27T00:32:00Z |
|
#e.g 1979-05-27 00:32:00-07:00 |
|
#e.g 1979-05-27 00:32:00+10:00 |
|
#e.g 1979-05-27 00:32:00.999999-07:00 |
|
|
|
#review |
|
#minimal datetimes? |
|
# 2024 not ok - 2024T not accepted by tomlint why? |
|
# 02:00 ok |
|
# 02:00:00.5 ok |
|
# 1:00 - not ok - RFC3339 requires 2-digit hr,min,sec |
|
|
|
#toml-lint.com accepts 2025-01 |
|
|
|
if {[string length $str] < 5} { |
|
return 0 |
|
} |
|
|
|
set matches [regexp -all {[zZtT0-9\ \-\+\.:]} $str] |
|
if {[tcl::string::length $str] == $matches} { |
|
#all characters in legal range |
|
if {[regexp -all {\ } $str] > 1} { |
|
#only a single space is allowed. |
|
return 0 |
|
} |
|
#If we get a space - it is only valid as a convience to represent the T separator |
|
#we can normalize by converting to T here before more tests |
|
set str [string map {" " T t T} $str] |
|
#a further sanity check on T |
|
if {[regexp -all {T} $str] > 1} { |
|
return 0 |
|
} |
|
|
|
#!todo - use full RFC 3339 parser? |
|
#!todo - what if the value is 'time only'? |
|
|
|
if {[string first T $str] > -1} { |
|
lassign [split $str T] datepart timepart |
|
if {![is_datepart $datepart]} { |
|
return 0 |
|
} |
|
if {![is_timepart $timepart]} { |
|
return 0 |
|
} |
|
} else { |
|
#either a datepart or a localtime |
|
#spec: "If you include only the time portion of an RFC 3339 formatted date-time, it will represent that time of day |
|
# without any relation to a specific day or any offset or timezone." |
|
if {!([is_datepart $str] || [is_localtime $str])} { |
|
return 0 |
|
} |
|
} |
|
|
|
|
|
#Tcl's free-form clock scan (no -format option) is deprecated |
|
# |
|
#if {[catch {clock scan $datepart} err]} { |
|
# puts stderr "tcl clock scan failed err:'$err'" |
|
# return 0 |
|
#} |
|
|
|
} else { |
|
return 0 |
|
} |
|
return 1 |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::utils ---}] |
|
} |
|
|
|
namespace eval tomlish::parse { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::parse}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
#This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. |
|
#The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: |
|
# - e.g some kind of backtracking required if using an ABNF parser? |
|
#I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" |
|
#More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' |
|
|
|
#It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? |
|
|
|
#A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) |
|
|
|
|
|
variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text |
|
|
|
variable state |
|
# states: |
|
# table-space, itable-space, array-space |
|
# array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, |
|
# dquoted-key, squoted-key |
|
# string-state, literal-state, multistring... |
|
# |
|
# notes: |
|
# only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack |
|
|
|
# |
|
# xxx_value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keyval-tail or array-syntax |
|
# |
|
#stateMatrix defines for each state, actions to take for each possible token. |
|
#single-element actions are the name of the next state into which to transition, or a 'POPSPACE' instruction to pop a level off the spacestack and add the data to the parent container. |
|
#dual-element actions are a push instruction and the name of the space to push on the stack. |
|
# - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) |
|
|
|
# -- --- --- --- --- --- |
|
#token/state naming guide |
|
# -- --- --- --- --- --- |
|
#tokens : underscore separated or bare name e.g newline, start_quote, start_squote |
|
#private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence |
|
#states : always contain at least one dash e.g err-state, table-space |
|
#instructions |
|
# -- --- --- --- --- --- |
|
|
|
|
|
#stateMatrix dict of elements mapping current state to next state based on returned tokens |
|
# current-state {token-encountered next-state ... } |
|
# where next-state can be a 1 or 2 element list. |
|
#If 2 element - the first item is an instruction (ucase) |
|
#If 1 element - it is either a lowercase dashed state name or an ucase instruction |
|
#e.g {PUSHSPACE <state-space>} or POPSPACE or SAMESPACE |
|
|
|
|
|
#SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases |
|
|
|
variable stateMatrix |
|
set stateMatrix [dict create] |
|
#--------------------------------------------------------- |
|
#WARNING |
|
#The stateMatrix implementation here is currently messy. |
|
#The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. |
|
#This means the state behaviour has to be reasoned about by looking at both in conjuction. |
|
#--------------------------------------------------------- |
|
|
|
#xxx-space vs xxx-syntax inadequately documented - TODO |
|
|
|
#review - out of date? |
|
# --------------------------------------------------------------------------------------------------------------# |
|
# incomplete example of some state starting at table-space |
|
# --------------------------------------------------------------------------------------------------------------# |
|
# ( = -> keyval-value-expected) |
|
# keyval-syntax (popped -> keyval-space -> keyval-tail) (autotransition on pop) |
|
# keyval-space (autotransition on push ^) |
|
# table-space (barekey^) (startdquote -> dquoted-key ^) |
|
# --------------------------------------------------------------------------------------------------------------# |
|
|
|
dict set stateMatrix\ |
|
table-space { |
|
bom "table-space"\ |
|
whitespace "table-space"\ |
|
newline "table-space"\ |
|
barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ |
|
squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ |
|
dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ |
|
XXXsingle_dquote "quoted-key"\ |
|
XXXsingle_squote "squoted-key"\ |
|
comment "table-space"\ |
|
starttablename "tablename-state"\ |
|
starttablearrayname "tablearrayname-state"\ |
|
enddquote "err-state"\ |
|
endsquote "err-state"\ |
|
comma "err-state"\ |
|
eof "end-state"\ |
|
equal "err-state"\ |
|
cr "err-lonecr"\ |
|
} |
|
|
|
|
|
|
|
dict set stateMatrix\ |
|
keyval-space {\ |
|
whitespace "keyval-syntax"\ |
|
equal "keyval-value-expected"\ |
|
} |
|
|
|
# ' = ' portion of keyval |
|
dict set stateMatrix\ |
|
keyval-syntax {\ |
|
whitespace "keyval-syntax"\ |
|
barekey {PUSHSPACE "dottedkey-space"}\ |
|
squotedkey {PUSHSPACE "dottedkey-space"}\ |
|
dquotedkey {PUSHSPACE "dottedkey-space"}\ |
|
equal "keyval-value-expected"\ |
|
comma "err-state"\ |
|
newline "err-state"\ |
|
eof "err-state"\ |
|
} |
|
#### |
|
dict set stateMatrix\ |
|
keyval-value-expected {\ |
|
whitespace "keyval-value-expected"\ |
|
untyped_value {TOSTATE "keyval-untyped-sequence" note "possible datetime datepart"}\ |
|
literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ |
|
string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ |
|
single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ |
|
triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ |
|
single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ |
|
triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ |
|
startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ |
|
startarray {PUSHSPACE array-space returnstate keyval-tail}\ |
|
} |
|
#double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} |
|
|
|
#untyped_value sequences without intervening comma are allowed for datepart timepart |
|
#we will produce tomlish with missing SEPS and to_dict must validate whether 2 adjacent barekeys are valid |
|
dict set stateMatrix\ |
|
keyval-untyped-sequence {\ |
|
whitespace "keyval-untyped-sequence"\ |
|
untyped_value {TOSTATE "keyval-tail"}\ |
|
literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ |
|
string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ |
|
single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ |
|
triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ |
|
single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ |
|
triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ |
|
startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ |
|
startarray {PUSHSPACE array-space returnstate keyval-tail}\ |
|
newline "POPSPACE"\ |
|
comment "keyval-tail"\ |
|
eof "end-state"\ |
|
} |
|
|
|
#2025 - no leading-squote-space - only trailing-squote-space. |
|
|
|
dict set stateMatrix\ |
|
keyval-tail {\ |
|
whitespace "keyval-tail"\ |
|
newline "POPSPACE"\ |
|
comment "keyval-tail"\ |
|
eof "end-state"\ |
|
} |
|
|
|
|
|
#itable-space/ curly-syntax : itables |
|
# x={<here>y=1,<here>} |
|
dict set stateMatrix\ |
|
itable-space {\ |
|
whitespace "itable-space"\ |
|
newline "itable-space"\ |
|
barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ |
|
squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ |
|
dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ |
|
endinlinetable "POPSPACE"\ |
|
comma "err-state"\ |
|
comment "itable-space"\ |
|
eof "err-state"\ |
|
} |
|
#we don't get single_squote etc here - instead we get the resulting squotedkey token |
|
|
|
|
|
# ??? review - something like this |
|
# <dottedkeyspace-tail><here> |
|
# x={y =1,} |
|
dict set stateMatrix\ |
|
itable-keyval-syntax {\ |
|
whitespace {TOSTATE "itable-keyval-syntax"}\ |
|
barekey {PUSHSPACE "dottedkey-space"}\ |
|
squotedkey {PUSHSPACE "dottedkey-space"}\ |
|
dquotedkey {PUSHSPACE "dottedkey-space"}\ |
|
equal {TOSTATE "itable-keyval-value-expected"}\ |
|
newline "err-state"\ |
|
eof "err-state"\ |
|
} |
|
|
|
# x={y<here>=1} |
|
dict set stateMatrix\ |
|
itable-keyval-space {\ |
|
whitespace "itable-keyval-syntax"\ |
|
equal {TOSTATE "itable-keyval-value-expected" note "required"}\ |
|
} |
|
|
|
dict set stateMatrix\ |
|
itable-keyval-value-expected {\ |
|
whitespace "itable-keyval-value-expected"\ |
|
untyped_value {TOSTATE "itable-val-tail" note ""}\ |
|
single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ |
|
triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ |
|
single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ |
|
triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ |
|
startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ |
|
startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ |
|
} |
|
#double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' |
|
# review |
|
# double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} |
|
|
|
|
|
|
|
# x={y=1<here>,z="x"<here>} |
|
#POPSPACE is transition from itable-keyval-space to parent itable-space |
|
dict set stateMatrix\ |
|
itable-val-tail {\ |
|
whitespace "itable-val-tail"\ |
|
endinlinetable "POPSPACE"\ |
|
comma "POPSPACE"\ |
|
newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ |
|
comment "itable-val-tail"\ |
|
eof "err-state"\ |
|
} |
|
# XXXnewline "POPSPACE" |
|
# We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail |
|
# This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record |
|
#e.g |
|
# x = { |
|
# j=1 |
|
# #comment within dottedkey j record |
|
# , # comment unattached |
|
# #comment unattached |
|
# k=2 , #comment unattached |
|
# l=3 #comment within l record |
|
# , m=4 |
|
# #comment associated with m record |
|
# |
|
# #still associated with m record |
|
# } |
|
## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. |
|
#The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma |
|
#so we cant do: j= 1 #comment for j1 , |
|
# and have the trailing comma recognised. |
|
# |
|
# To associate: j= 1, #comment for j1 |
|
# we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? |
|
# |
|
# The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma |
|
# is 'associated' with the previous entry. |
|
# |
|
# These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, |
|
# but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments |
|
# (e.g reordering records within an itable) |
|
#The user's intention for 'associated' isn't always clear and the specs don't really guide on this. |
|
|
|
|
|
#dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] |
|
#it is for keyval ie x.y.z = value |
|
|
|
#this is the state after dot |
|
#we are expecting a complete key token or whitespace |
|
#(initial entry to the space is by one of the keys - which will immediately go to dottedkey-space-tail) |
|
dict set stateMatrix\ |
|
dottedkey-space {\ |
|
whitespace "dottedkey-space"\ |
|
dotsep "err-state"\ |
|
barekey "dottedkey-space-tail"\ |
|
squotedkey "dottedkey-space-tail"\ |
|
dquotedkey "dottedkey-space-tail"\ |
|
newline "err-state"\ |
|
comma "err-state"\ |
|
comment "err-state"\ |
|
equal "err-state"\ |
|
} |
|
|
|
#dottedkeyend "POPSPACE" |
|
#equal "POPSPACE"\ |
|
|
|
|
|
#jmn 2025 |
|
#we have 1 or more dottedkeys so far - need dotsep to add more, whitespace to maintain, equal to pop |
|
dict set stateMatrix\ |
|
dottedkey-space-tail {\ |
|
whitespace "dottedkey-space-tail" |
|
dotsep "dottedkey-space" |
|
equal "POPSPACE"\ |
|
eof "err-state"\ |
|
newline "err-state"\ |
|
} |
|
|
|
#-------------------------------------------------------------------------- |
|
#scratch area |
|
#from_toml {x=1} |
|
# barekey tok <x> |
|
# table-space PUSHSPACE keyval-space state keyval-syntax |
|
# |
|
|
|
|
|
#-------------------------------------------------------------------------- |
|
|
|
|
|
#REVIEW |
|
#toml spec looks like heading towards allowing newlines within inline tables |
|
#https://github.com/toml-lang/toml/issues/781 |
|
|
|
#2025 - multiline itables appear to be valid for 1.1 - which we are targeting. |
|
#https://github.com/toml-lang/toml/blob/main/toml.md#inline-table |
|
|
|
#JMN2025 |
|
#review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES |
|
#We currently allow multiline ITABLES (also with comments) in the tokenizer. |
|
#if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? |
|
|
|
|
|
#JMN REVIEW |
|
#dict set stateMatrix\ |
|
# array-space {\ |
|
# whitespace "array-space"\ |
|
# newline "array-space"\ |
|
# untyped_value "SAMESPACE"\ |
|
# startarray {PUSHSPACE "array-space"}\ |
|
# endarray "POPSPACE"\ |
|
# startinlinetable {PUSHSPACE itable-space}\ |
|
# single_dquote "string-state"\ |
|
# single_squote "literal-state"\ |
|
# triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ |
|
# comma "array-space"\ |
|
# comment "array-space"\ |
|
# eof "err-state-array-space-got-eof"\ |
|
# } |
|
|
|
## array-space ## |
|
set aspace [dict create] |
|
dict set aspace whitespace "array-space" |
|
dict set aspace newline "array-space" |
|
#dict set aspace untyped_value "SAMESPACE" |
|
dict set aspace untyped_value "array-syntax" |
|
dict set aspace startarray {PUSHSPACE "array-space"} |
|
dict set aspace endarray "POPSPACE" |
|
dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} |
|
dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} |
|
dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} |
|
dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} |
|
dict set aspace startinlinetable {PUSHSPACE itable-space} |
|
#dict set aspace comma "array-space" |
|
dict set aspace comment "array-space" |
|
dict set aspace eof "err-state-array-space-got-eof" |
|
dict set stateMatrix array-space $aspace |
|
|
|
#when we pop from an inner array we get to array-syntax |
|
#e.g {x=[[]<array-syntax]} |
|
#when there is leading space before a value we get to array-syntax |
|
|
|
## array-syntax ## |
|
set asyntax [dict create] |
|
dict set asyntax whitespace "array-syntax" |
|
dict set asyntax newline "array-syntax" |
|
#dict set asyntax untyped_value "SAMESPACE" |
|
#dict set asyntax startarray {PUSHSPACE array-space} |
|
dict set asyntax endarray "POPSPACE" |
|
#dict set asyntax single_dquote "string-state" |
|
#dict set asyntax single_squote "literal-state" |
|
dict set asyntax comma "array-space" |
|
dict set asyntax comment "array-syntax" |
|
dict set stateMatrix array-syntax $asyntax |
|
|
|
|
|
|
|
|
|
#dquotedkey is a token - dquoted-key is a state |
|
dict set stateMatrix\ |
|
dquoted-key {\ |
|
whitespace "NA"\ |
|
dquotedkey "dquoted-key"\ |
|
newline "err-state"\ |
|
} |
|
dict set stateMatrix\ |
|
squoted-key {\ |
|
whitespace "NA"\ |
|
squotedkey "squoted-key"\ |
|
newline "err-state"\ |
|
} |
|
# endsquote {PUSHSPACE "keyval-space"} |
|
|
|
dict set stateMatrix\ |
|
string-state {\ |
|
whitespace "NA"\ |
|
string "string-state"\ |
|
enddquote "SAMESPACE"\ |
|
newline "err-state"\ |
|
eof "err-state"\ |
|
} |
|
dict set stateMatrix\ |
|
literal-state {\ |
|
whitespace "NA"\ |
|
literal "literal-state"\ |
|
endsquote "SAMESPACE"\ |
|
newline "err-state"\ |
|
eof "err-state"\ |
|
} |
|
|
|
|
|
dict set stateMatrix\ |
|
multistring-space {\ |
|
whitespace "multistring-space"\ |
|
continuation "multistring-space"\ |
|
stringpart "multistring-space"\ |
|
newline "multistring-space"\ |
|
tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ |
|
single_dquote {TOSTATE multistring-space}\ |
|
double_dquote {TOSTATE multistring-space}\ |
|
triple_dquote {POPSPACE}\ |
|
eof "err-state"\ |
|
} |
|
dict set stateMatrix\ |
|
trailing-dquote-space { |
|
tentative_accum_dquote "POPSPACE" |
|
} |
|
|
|
|
|
#only valid subparts are literalpart and newline. other whitespace etc is within literalpart |
|
#todo - treat sole cr as part of literalpart but crlf and lf as newline |
|
dict set stateMatrix\ |
|
multiliteral-space {\ |
|
literalpart "multiliteral-space"\ |
|
newline "multiliteral-space"\ |
|
tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ |
|
single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ |
|
double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ |
|
triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ |
|
eof "err-premature-eof-in-multiliteral-space"\ |
|
} |
|
|
|
#trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes |
|
dict set stateMatrix\ |
|
trailing-squote-space { |
|
tentative_accum_squote "POPSPACE" |
|
} |
|
|
|
|
|
#dict set stateMatrix\ |
|
# tablename-state {\ |
|
# whitespace "NA"\ |
|
# tablename {zeropoppushspace table-space}\ |
|
# tablename2 {PUSHSPACE table-space}\ |
|
# endtablename "tablename-tail"\ |
|
# comma "err-state"\ |
|
# newline "err-state"\ |
|
# } |
|
|
|
set tnamestate [dict create] |
|
dict set tnamestate whitespace "NA" |
|
dict set tnamestate tablename {zeropoppushspace table-space} |
|
#dict set tnamestate tablename2 {PUSHSPACE table-space returnstate tablearrayname-tail} |
|
dict set tnamestate tablename2 {PUSHSPACE table-space returnstate tablename-tail} |
|
dict set tnamestate endtablename "tablename-tail" |
|
dict set tnamestate endtablearrayname "tablearrayname-tail" |
|
dict set tnamestate comma "err-state" |
|
dict set tnamestate newline "err-state" |
|
dict set stateMatrix tablename-state $tnamestate |
|
|
|
|
|
#dict set stateMatrix\ |
|
# tablearrayname-state {\ |
|
# whitespace "NA"\ |
|
# tablearrayname {zeropoppushspace table-space}\ |
|
# tablearrayname2 {PUSHSPACE table-space}\ |
|
# endtablearray "tablearrayname-tail"\ |
|
# comma "err-state"\ |
|
# newline "err-state"\ |
|
# } |
|
set tarrnamestate [dict create] |
|
dict set tarrnamestate whitespace "NA" |
|
dict set tarrnamestate tablearrayname {zeropoppushspace table-space} |
|
dict set tarrnamestate tablearrayname2 {PUSHSPACE table-space} |
|
dict set tarrnamestate endtablearrayname "tablearrayname-tail" |
|
dict set tarrnamestate comma "err-state" |
|
dict set tarrnamestate newline "err-state" |
|
dict set stateMatrix tablearrayname-state $tarrnamestate |
|
|
|
|
|
dict set stateMatrix\ |
|
tablename-tail {\ |
|
whitespace "tablename-tail"\ |
|
newline "table-space"\ |
|
comment "tablename-tail"\ |
|
eof "end-state"\ |
|
} |
|
#dict set stateMatrix\ |
|
# tablearrayname-tail {\ |
|
# whitespace "tablearrayname-tail"\ |
|
# newline "table-space"\ |
|
# comment "tablearrayname-tail"\ |
|
# eof "end-state"\ |
|
# } |
|
|
|
|
|
# [[xxx]<here>] ??? |
|
set tarntail [dict create] |
|
dict set tarntail whitespace "err-state" ;#"tablearrayname-tail" ;#spec doesn't allow whitespace here |
|
dict set tarntail newline "err-state" |
|
dict set tarntail comment "err-state" |
|
dict set tarntail eof "err-state" |
|
dict set tarntail endtablename "tablearray-tail" |
|
dict set stateMatrix tablearrayname-tail $tarntail |
|
|
|
#review - somewhat counterintuitive...? |
|
# [(starttablearrayname) (endtablearrayname] |
|
# [(starttablename) (endtablename)] |
|
|
|
# [[xxx]]<here> ??? |
|
set tartail [dict create] |
|
dict set tartail whitespace "tablearray-tail" |
|
dict set tartail newline "table-space" |
|
dict set tartail comment "tablearray-tail" |
|
dict set tartail eof "end-state" |
|
dict set stateMatrix tablearray-tail $tartail |
|
|
|
|
|
|
|
|
|
|
|
|
|
dict set stateMatrix\ |
|
end-state {} |
|
|
|
set knowntokens [list] |
|
set knownstates [list] |
|
dict for {state transitions} $stateMatrix { |
|
if {$state ni $knownstates} {lappend knownstates $state} |
|
dict for {tok instructions} $transitions { |
|
if {$tok ni $knowntokens} {lappend knowntokens $tok} |
|
} |
|
} |
|
dict set stateMatrix nostate {} |
|
foreach tok $knowntokens { |
|
dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" |
|
} |
|
|
|
|
|
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
|
#purpose - debugging? remove? |
|
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
|
#build a list of 'push triggers' from the stateMatrix |
|
# ie tokens which can push a new space onto spacestack |
|
set push_trigger_tokens [list] |
|
tcl::dict::for {s transitions} $stateMatrix { |
|
tcl::dict::for {token transition_to} $transitions { |
|
set instruction [lindex $transition_to 0] |
|
switch -exact -- $instruction { |
|
PUSHSPACE - zeropoppushspace { |
|
if {$token ni $push_trigger_tokens} { |
|
lappend push_trigger_tokens $token |
|
} |
|
} |
|
} |
|
} |
|
} |
|
::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" |
|
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
|
|
|
|
|
|
|
#This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) |
|
#spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE |
|
|
|
#mainly for the -space states: |
|
#redirect to another state $c based on a state transition from $whatever to $b |
|
# e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. |
|
#this is useful as we often don't know state $b. e.g when it is decided by 'POPSPACE' |
|
|
|
#use dict set to add values so we can easily add/remove/comment lines |
|
|
|
#Push to, next |
|
#default first states when we push to these spaces |
|
variable spacePushTransitions [dict create] |
|
dict set spacePushTransitions keyval-space keyval-syntax |
|
dict set spacePushTransitions itable-keyval-space itable-keyval-syntax |
|
dict set spacePushTransitions array-space array-space |
|
dict set spacePushTransitions table-space tablename-state |
|
#dict set spacePushTransitions #itable-space itable-space |
|
|
|
#Pop to, next |
|
variable spacePopTransitions [dict create] |
|
dict set spacePopTransitions array-space array-syntax |
|
|
|
|
|
#itable-keyval-space itable-val-tail |
|
#review |
|
#we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail |
|
#leave it out and make the POPSPACE caller explicitly specify it |
|
#keyval-space keyval-tail |
|
|
|
variable spaceSameTransitions [dict create] |
|
#JMN test |
|
#dict set spaceSameTransitions array-space array-syntax |
|
|
|
#itable-keyval-space itable-val-tail |
|
|
|
|
|
variable state_list ;#reset every tomlish::decode::toml |
|
|
|
namespace export tomlish toml |
|
namespace ensemble create |
|
|
|
#goNextState has various side-effects e.g pushes and pops spacestack |
|
#REVIEW - setting nest and v elements here is ugly |
|
#todo - make neater, more single-purpose? |
|
proc goNextState {tokentype tok currentstate} { |
|
variable state |
|
variable nest |
|
variable v |
|
|
|
set prevstate $currentstate |
|
|
|
|
|
variable spacePopTransitions |
|
variable spacePushTransitions |
|
variable spaceSameTransitions |
|
|
|
variable last_space_action "none" |
|
variable last_space_type "none" |
|
variable state_list |
|
|
|
set result "" |
|
set starttok "" |
|
|
|
if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { |
|
set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] |
|
::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" |
|
switch -exact -- [lindex $transition_to 0] { |
|
POPSPACE { |
|
set popfromspace_info [spacestack peek] |
|
set popfromspace_state [dict get $popfromspace_info state] |
|
spacestack pop |
|
set parent_info [spacestack peek] |
|
set type [dict get $parent_info type] |
|
set parentspace [dict get $parent_info state] |
|
|
|
set last_space_action "pop" |
|
set last_space_type $type |
|
|
|
if {[dict exists $parent_info returnstate]} { |
|
set next [dict get $parent_info returnstate] |
|
#clear the returnstate on current level |
|
set existing [spacestack pop] |
|
dict unset existing returnstate |
|
spacestack push $existing ;#re-push modification |
|
::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" |
|
} else { |
|
### |
|
#review - do away with spacePopTransitions - which although useful to provide a default.. |
|
# - involve error-prone configurations distant to the main state transition configuration in stateMatrix |
|
if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { |
|
set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] |
|
::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" |
|
} else { |
|
set next $parentspace |
|
::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" |
|
} |
|
} |
|
set result $next |
|
} |
|
SAMESPACE { |
|
set currentspace_info [spacestack peek] |
|
::tomlish::log::debug "--->> SAMESPACE got current space entry: $currentspace_info <<<<<" |
|
set type [dict get $currentspace_info type] |
|
set currentspace [dict get $currentspace_info state] |
|
|
|
if {[dict exists $currentspace_info returnstate]} { |
|
set next [dict get $currentspace_info returnstate] |
|
#clear the returnstate on current level |
|
set existing [spacestack pop] |
|
dict unset existing returnstate |
|
spacestack push $existing ;#re-push modification |
|
::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected to stored returnstate $next" |
|
} else { |
|
if {[dict exists $::tomlish::parse::spaceSameTransitions $currentspace]} { |
|
set next [dict get $::tomlish::parse::spaceSameTransitions $currentspace] |
|
::tomlish::log::info "--->> SAMESPACE transition to space $currentspace redirected state to $next (spaceSameTransitions)" |
|
} else { |
|
set next $currentspace |
|
::tomlish::log::info "--->> SAMESPACE transition to space $currentspace" |
|
} |
|
} |
|
set result $next |
|
} |
|
zeropoppushspace { |
|
if {$nest > 0} { |
|
#pop back down to the root level (table-space) |
|
spacestack pop |
|
set parentinfo [spacestack peek] |
|
set type [dict get $parentinfo type] |
|
set target [dict get $parentinfo state] |
|
|
|
set last_space_action "pop" |
|
set last_space_type $type |
|
|
|
#----- |
|
#standard pop |
|
set parentlevel [expr {$nest -1}] |
|
lappend v($parentlevel) [set v($nest)] |
|
incr nest -1 |
|
#----- |
|
} |
|
#re-entrancy |
|
|
|
#set next [list PUSHSPACE [lindex $transition_to 1]] |
|
set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 |
|
::tomlish::log::debug "--->> zeropoppushspace goNextState RECURSE. calling goNextState $nexttokentype $currentstate" |
|
set transition_info [::tomlish::parse::goNextState $nexttokentype $tok $currentstate] |
|
set result [dict get $transition_info newstate] |
|
} |
|
PUSHSPACE { |
|
set original_target [dict get $transition_to PUSHSPACE] |
|
if {[dict exists $transition_to returnstate]} { |
|
#adjust the existing space record on the stack. |
|
#struct::stack doesn't really support that - so we have to pop and re-push |
|
#todo - investigate a custom stack implementation where we can efficiently lset the top of the stack |
|
set currentspace [spacestack pop] |
|
dict set currentspace returnstate [dict get $transition_to returnstate] |
|
spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. |
|
} |
|
if {[dict exists $transition_to starttok]} { |
|
set starttok [dict get $transition_to starttok] |
|
} |
|
spacestack push [dict create type space state $original_target] |
|
|
|
set last_space_action "push" |
|
set last_space_type "space" |
|
|
|
if {[dict exists $transition_to state]} { |
|
#an explicit state in the pushed space was requested in the stateMatrix - override the spacePushTransition (spacePushTransitions can be deprecated if we require explicitness?) |
|
set next [dict get $transition_to state] |
|
::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next by explicit 'state' entry" |
|
} else { |
|
#puts $::tomlish::parse::spacePushTransitions |
|
if {[dict exists $::tomlish::parse::spacePushTransitions $original_target]} { |
|
set next [dict get $::tomlish::parse::spacePushTransitions $original_target] |
|
::tomlish::log::info "--->> PUSHSPACE transition to space $original_target redirected state to $next (spacePushTransitions) " |
|
} else { |
|
set next $original_target |
|
::tomlish::log::info "--->> PUSHSPACE transition to space $original_target" |
|
} |
|
} |
|
set result $next |
|
} |
|
TOSTATE { |
|
if {[dict exists $transition_to returnstate]} { |
|
#adjust the existing space record on the stack. |
|
#struct::stack doesn't really support that - so we have to pop and re-push |
|
#todo - investigate a custom stack implementation where we can efficiently lset the top of the stack |
|
set currentspace [spacestack pop] |
|
dict set currentspace returnstate [dict get $transition_to returnstate] |
|
spacestack push $currentspace ;#return modified info to stack so when we POPSPACE the returnstate is available. |
|
} |
|
set result [dict get $transition_to TOSTATE] |
|
} |
|
default { |
|
#simplified version of TOSTATE |
|
set result [lindex $transition_to 0] ;#ignore everything but first word |
|
} |
|
} |
|
} else { |
|
::tomlish::log::error "--->> No state transition defined from state $currentstate when tokentype $tokentype received" |
|
set result "nostate" |
|
} |
|
lappend state_list [list tokentype $tokentype from $currentstate to $result] |
|
set state $result |
|
::tomlish::log::notice "--->> STATE TRANSITION tokenType: '$tokentype' tok:$tok triggering '$currentstate' -> '$result' last_space_action:$last_space_action " |
|
return [dict create prevstate $prevstate newstate $result space_action $last_space_action starttok $starttok] |
|
} |
|
|
|
proc report_line {{line ""}} { |
|
variable linenum |
|
variable is_parsing |
|
if {$is_parsing} { |
|
if {$line eq ""} { |
|
set line $linenum |
|
} |
|
return "Line Number: $line" |
|
} else { |
|
#not in the middle of parsing tomlish text - return nothing. |
|
return "" |
|
} |
|
} |
|
|
|
#produce a *slightly* more readable string rep of the nest for puts etc. |
|
proc nest_pretty1 {list} { |
|
set prettier "{" |
|
|
|
foreach el $list { |
|
if { [lindex $el 0] eq "NEWLINE"} { |
|
append prettier "[list $el]\n" |
|
} elseif {([llength $el] > 1) && ([lindex $el 0] in {KEY DQKEY SQKEY TABLE ARRAY})} { |
|
append prettier [nest_pretty1 $el] |
|
} else { |
|
append prettier "[list $el] " |
|
} |
|
} |
|
append prettier "}" |
|
return $prettier |
|
} |
|
|
|
proc set_tokenType {t} { |
|
variable tokenType |
|
variable tokenType_list |
|
if {![info exists tokenType]} { |
|
set tokenType "" |
|
} |
|
lappend tokenType_list $t |
|
set tokenType $t |
|
} |
|
|
|
proc switch_tokenType {t} { |
|
variable tokenType |
|
variable tokenType_list |
|
lset tokenType_list end $t |
|
set tokenType $t |
|
} |
|
|
|
proc get_tokenType {} { |
|
variable tokenType |
|
return $tokenType |
|
} |
|
|
|
|
|
proc get_token_waiting {} { |
|
variable token_waiting |
|
return [lindex $token_waiting 0] |
|
} |
|
proc clear_token_waiting {} { |
|
variable token_waiting |
|
set token_waiting [list] |
|
} |
|
|
|
#token_waiting is a list - but our standard case is to have only one |
|
#in certain circumstances such as near eof we may have 2 |
|
#the set_token_waiting function only allows setting when there is not already one waiting. |
|
#we want to catch cases of inadvertently trying to set multiple |
|
# - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. |
|
proc set_token_waiting {args} { |
|
if {[llength $args] %2 != 0} { |
|
error "tomlish set_token_waiting must have args of form: type <tokentype> value <value> complete 0|1" |
|
} |
|
variable token_waiting |
|
|
|
if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { |
|
#tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another |
|
#we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context |
|
#rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it |
|
set err "tomlish set_token_waiting already has token_waiting: [lindex $token_waiting 0]" |
|
append err \n " - cannot add token_waiting: $args" |
|
error $err |
|
#set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] |
|
#set token_waiting [list] |
|
} |
|
|
|
set waiting [dict create] |
|
dict for {k v} $args { |
|
switch -exact $k { |
|
type - complete { |
|
dict set waiting $k $v |
|
} |
|
value { |
|
dict set waiting tok $v |
|
} |
|
startindex { |
|
dict set waiting startindex $v |
|
} |
|
default { |
|
error "tomlish set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" |
|
} |
|
} |
|
} |
|
if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { |
|
error "tomlish set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" |
|
} |
|
if {![llength $token_waiting]} { |
|
set token_waiting [list $waiting] |
|
} else { |
|
#an extra sanity-check that we don't have more than just the eof.. |
|
if {[llength $token_waiting] > 1} { |
|
set err "tomlish Unexpected. Existing token_waiting count > 1.\n" |
|
foreach tw $token_waiting { |
|
append err " $tw" \n |
|
} |
|
append err " - cannot add token_waiting: $waiting" |
|
error $err |
|
} |
|
#last entry must be a waiting eof |
|
set token_waiting [list $waiting [lindex $token_waiting end]] |
|
} |
|
return |
|
} |
|
|
|
#returns 0 or 1 |
|
#tomlish::parse::tok |
|
#we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag |
|
# - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) |
|
# - interactive use? |
|
|
|
proc tok {} { |
|
variable nest |
|
variable s |
|
variable v |
|
variable i |
|
variable tok |
|
variable type ;#character type |
|
variable state ;#FSM |
|
|
|
|
|
variable tokenType |
|
variable tokenType_list |
|
|
|
|
|
variable endToken |
|
|
|
variable lastChar |
|
|
|
variable braceCount |
|
variable bracketCount |
|
|
|
|
|
#------------------------------ |
|
#Previous run found another (presumably single-char) token |
|
#The normal case is for there to be only one dict in the list |
|
#multiple is an exception - primarily for eof |
|
variable token_waiting |
|
if {[llength $token_waiting]} { |
|
set waiting [lindex $token_waiting 0] |
|
|
|
set tokenType [dict get $waiting type] |
|
set tok [dict get $waiting tok] |
|
#todo: dict get $token_waiting complete |
|
set token_waiting [lrange $token_waiting 1 end] |
|
return 1 |
|
} |
|
#------------------------------ |
|
|
|
set resultlist [list] |
|
set sLen [tcl::string::length $s] |
|
|
|
set slash_active 0 |
|
set quote 0 |
|
set c "" |
|
for {} {$i < $sLen} {} { |
|
if {$i > 0} { |
|
set lastChar [tcl::string::index $s [expr {$i - 1}]] |
|
set start_of_data h |
|
} else { |
|
set lastChar "" |
|
set start_of_data 1 |
|
#bom-handling |
|
if {[tcl::string::index $s 0] eq "\uFEFF"} { |
|
#bom (could be from various encodings - now decoded as single unicode char FEFF) |
|
#incr i 1 ;#skip over initial bom? |
|
} |
|
} |
|
|
|
|
|
set c [tcl::string::index $s $i] |
|
set cindex $i |
|
|
|
set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] |
|
tomlish::log::debug "- tokloop char <$ctest> index $i tokenType:$tokenType tok:<$tok>" |
|
#puts "got char $c during tokenType '$tokenType'" |
|
incr i ;#must incr here because we do returns inside the loop |
|
|
|
|
|
|
|
switch -exact -- $ctest { |
|
# { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
#for multiliteral, multistring - data and/or end |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
#pseudo token beginning with underscore - never returned to state machine - review |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
barekey { |
|
error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] and a selection of letter-like chars allowed (see tomlish::utils::is_barekey). [tomlish::parse::report_line]" |
|
} |
|
whitespace { |
|
# hash marks end of whitespace token |
|
#do a return for the whitespace, set token_waiting |
|
#set_token_waiting type comment value "" complete 1 |
|
incr i -1 ;#leave comment for next run |
|
return 1 |
|
} |
|
untyped_value { |
|
#REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? |
|
#we will accept a comment marker as an immediate terminator of the untyped_value. |
|
incr i -1 |
|
return 1 |
|
} |
|
starttablename - starttablearrayname { |
|
#fix? |
|
error "tomlish Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
#invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out |
|
append tok $c |
|
} |
|
default { |
|
#dquotedkey, string,literal, multistring |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
switch -- $state { |
|
multistring-space { |
|
set_tokenType stringpart |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok "#" |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "#" |
|
} |
|
default { |
|
#start of token if we're not in a token |
|
set_tokenType comment |
|
set tok "" ;#The hash is not part of the comment data |
|
} |
|
} |
|
} |
|
} |
|
lc { |
|
#left curly brace |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i [tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart - squotedkey { |
|
append tok $c |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename - starttablearrayname { |
|
#*bare* tablename can only contain letters,digits underscores |
|
error "tomlish Invalid tablename first character \{ [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
#valid in quoted parts |
|
append tok $c |
|
} |
|
comment { |
|
if {$had_slash} {append tok "\\"} |
|
append tok "\{" |
|
} |
|
default { |
|
#end any other token. |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
itable-keyval-value-expected - keyval-value-expected { |
|
#switch last key to tablename?? |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
array-space - array-syntax { |
|
#nested anonymous inline table |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
table-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok "\{" |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "\{" |
|
} |
|
default { |
|
error "tomlish state: '$state'. left brace case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
|
|
} |
|
rc { |
|
#right curly brace |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart - squotedkey { |
|
append tok $c |
|
} |
|
string - dquotedkey - comment { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename - tablename { |
|
if {$had_slash} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
set_token_waiting type endinlinetable value "" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
starttablearrayname - tablearrayname { |
|
if {$had_slash} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
default { |
|
#end any other token |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
table-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
itable-space { |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
tablename-state { |
|
#e.g [] - empty tablename - allowed or not? |
|
#empty tablename/tablearrayname ? |
|
#error "unexpected tablename problem" |
|
|
|
set_tokenType "endinlinetable" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
tablearrayname-state { |
|
error "tomlish unexpected tablearrayname-state problem" |
|
set_tokenType "endinlinetable" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
array-syntax - array-space { |
|
#invalid |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
itable-val-tail { |
|
set_tokenType "endinlinetable" |
|
set tok "" |
|
#we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 |
|
incr i -1 |
|
return 1 |
|
} |
|
itable-keyval-syntax { |
|
error "tomlish endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok "\}" |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" ; #review |
|
set tok "\}" |
|
} |
|
default { |
|
#JMN2024b keyval-tail? |
|
error "tomlish state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
|
|
} |
|
lb { |
|
#left square bracket |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart - squotedkey { |
|
append tok $c |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename { |
|
#change the tokenType |
|
switch_tokenType "starttablearrayname" |
|
set tok "" ;#no output into the tomlish list for this token |
|
#any following whitespace is part of the tablearrayname, so return now |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
#e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token |
|
if {$had_slash} { |
|
#resultant tablename may be invalid - but leave for datastructure loading stage to catch |
|
#append tok "\\[" |
|
append tok {\[} |
|
} else { |
|
if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { |
|
#invalid at this point - state machine should disallow: |
|
# table -> starttablearrayname |
|
# tablearray -> starttablearrayname |
|
set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex |
|
return 1 |
|
} else { |
|
#we appear to still be in single or double quoted section |
|
append tok "\[" |
|
} |
|
} |
|
} |
|
comment { |
|
if {$had_slash} {append tok "\\"} |
|
append tok "\[" |
|
} |
|
default { |
|
#end any other token. |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
keyval-value-expected - itable-keyval-value-expected { |
|
set_tokenType "startarray" |
|
set tok "\[" |
|
return 1 |
|
} |
|
array-space - array-syntax { |
|
#nested array? |
|
set_tokenType "startarray" |
|
set tok "\[" |
|
return 1 |
|
#error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
table-space { |
|
#table name |
|
#assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray |
|
#note that a starttablearrayname token may contain whitespace between the brackets |
|
# e.g \[ \[ |
|
set_tokenType "starttablename" |
|
set tok "" ;#there is no output into the tomlish list for this token |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok "\[" |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "\[" |
|
} |
|
itable-space { |
|
#handle state just to give specific error msg |
|
error "tomlish state: '$state'. Left square bracket invalid. Cannot start array in inline table without key. Use key=\[<vals>\] syntax. [tomlish::parse::report_line]" |
|
} |
|
default { |
|
error "tomlish state: '$state'. startarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} |
|
rb { |
|
#right square bracket |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart - squotedkey { |
|
append tok $c |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
comment { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
#???? |
|
incr i -1 |
|
if {$had_slash} {incr i -1} ;#reprocess |
|
return 1 |
|
} else { |
|
incr i -1 |
|
if {$had_slash} {incr i -1} ;#reprocess |
|
return 1 |
|
} |
|
} |
|
starttablename { |
|
#toml-test invalid/table/empty |
|
|
|
set_token_waiting type tablename value "" complete 1 startindex $cindex |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename { |
|
#e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token |
|
if {$had_slash} { |
|
#resultant tablename may be invalid - but leave for datastructure loading stage to catch |
|
append tok "\\]" |
|
} else { |
|
if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { |
|
set_token_waiting type endtablename value "" complete 1 startindex $cindex |
|
return 1 |
|
} else { |
|
#we appear to still be in single or double quoted section |
|
append tok "]" |
|
} |
|
} |
|
} |
|
tablearrayname { |
|
#e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token |
|
if {$had_slash} { |
|
#resultant tablename may be invalid - but leave for datastructure loading stage to catch |
|
append tok "\\]" |
|
} else { |
|
if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { |
|
set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex |
|
return 1 |
|
} else { |
|
#we appear to still be in single or double quoted section |
|
append tok "]" |
|
} |
|
} |
|
} |
|
default { |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
array-syntax - array-space { |
|
#invalid - but allow parser statemachine to report it. |
|
set_tokenType "endarray" |
|
set tok "\]" |
|
return 1 |
|
} |
|
table-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "endarray" |
|
set tok "\]" |
|
return 1 |
|
} |
|
tablename-state { |
|
#e.g [] - empty tablename |
|
#tomltest 1.1.0 invalid/table/empty |
|
#should be invalid |
|
#we parse it and let dict::from_tomlish error when it tries to split table |
|
|
|
set_tokenType "endtablename" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
tablearrayname-state { |
|
error "tomlish unexpected tablearrayname problem" |
|
set_tokenType "endtablearray" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
tablearrayname-tail { |
|
#[[xxx]<here> |
|
set_tokenType "endtablename" |
|
#sequence: starttablename -> starttablearrayname -> endtablearrayname -> endtablename |
|
return 1 |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok "\]" |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "\]" |
|
} |
|
default { |
|
error "tomlish state '$state'. endarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} |
|
bsl { |
|
#backslash |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
#end whitespace token |
|
incr i -1 ;#reprocess bsl in next run |
|
return 1 |
|
} else { |
|
error "tomlish Unexpected backslash during whitespace. [tomlish::parse::report_line]" |
|
} |
|
} |
|
literal - literalpart - squotedkey { |
|
#never need to set slash_active true when in single quoted tokens |
|
append tok "\\" |
|
set slash_active 0 |
|
} |
|
string - dquotedkey - comment { |
|
if {$slash_active} { |
|
set slash_active 0 |
|
append tok "\\\\" |
|
} else { |
|
set slash_active 1 |
|
} |
|
} |
|
stringpart { |
|
if {$slash_active} { |
|
set slash_active 0 |
|
append tok "\\\\" |
|
} else { |
|
set slash_active 1 |
|
} |
|
} |
|
starttablename - starttablearrayname { |
|
error "tomlish backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
if {$slash_active} { |
|
set slash_active 0 |
|
append tok "\\\\" |
|
} else { |
|
set slash_active 1 |
|
} |
|
} |
|
barekey { |
|
error "tomlish Unexpected backslash during barekey. [tomlish::parse::report_line]" |
|
} |
|
default { |
|
error "tomlish Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
if {$slash_active} { |
|
set_tokenType "stringpart" |
|
set tok "\\\\" |
|
set slash_active 0 |
|
} else { |
|
set slash_active 1 |
|
} |
|
} |
|
multiliteral-space { |
|
#nothing can be escaped in multiliteral-space - not even squotes (?) review |
|
set_tokenType "literalpart" |
|
set tok "\\" |
|
} |
|
default { |
|
error "tomlish tok error: Unexpected backslash when no token is active. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} |
|
sq { |
|
#single quote |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote { |
|
#for within multiliteral |
|
#short tentative_accum_squote tokens are returned if active upon receipt of any other character |
|
#longest allowable for leading/trailing are returned here |
|
#### |
|
set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote |
|
#assert state = trailing-squote-space |
|
append tok $c |
|
if {$existingtoklen == 4} { |
|
#maxlen to be a tentative_accum_squote is multisquote + 2 = 5 |
|
#return tok with value ''''' |
|
return 1 |
|
} |
|
} |
|
tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
#pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space |
|
switch -- [tcl::string::length $tok] { |
|
1 { |
|
#no conclusion can yet be reached |
|
append tok $c |
|
} |
|
2 { |
|
#enter multiliteral |
|
#switch? |
|
append tok $c |
|
set_tokenType triple_squote |
|
return 1 |
|
} |
|
default { |
|
#if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled |
|
#by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the |
|
#extra 1 or 2 squotes as data. |
|
error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" |
|
} |
|
} |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
whitespace { |
|
#end whitespace |
|
incr i -1 ;#reprocess sq |
|
return 1 |
|
} |
|
literal { |
|
#slash_active always false |
|
#terminate the literal |
|
set_token_waiting type endsquote value "'" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
literalpart { |
|
#ended by ''' - but final could be '''' or ''''' (up to 2 squotes allowed directly before ending triple squote sequence) |
|
#todo |
|
# idea: end this literalpart (possibly 'temporarily') |
|
# let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack |
|
# upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) |
|
incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing |
|
return 1 |
|
} |
|
XXXitablesquotedkey { |
|
set_token_waiting type endsquote value "'" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
squotedkey { |
|
### |
|
#set_token_waiting type endsquote value "'" complete 1 |
|
return 1 |
|
} |
|
starttablename - starttablearrayname { |
|
#!!! |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
append tok $c |
|
} |
|
barekey { |
|
#barekeys now support all sorts of unicode letter/number chars for other cultures |
|
#but not punctuation - not even for those of Irish heritage who don't object |
|
#to the anglicised form of some names. |
|
# o'shenanigan seems to not be a legal barekey |
|
#The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. |
|
error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" |
|
} |
|
default { |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
array-space - keyval-value-expected - itable-keyval-value-expected { |
|
#leading squote |
|
#pseudo-token _start_squote_sequence ss not received by state machine |
|
#This pseudotoken will trigger production of single_squote token or triple_squote token |
|
#It currently doesn't trigger double_squote token |
|
#(handle '' same as 'x' ie produce a single_squote and go into processing literal) |
|
#review - producing double_squote for empty literal may be slightly more efficient. |
|
#This token is not used to handle squote sequences *within* a multiliteral |
|
set_tokenType "_start_squote_sequence" |
|
set tok "'" |
|
} |
|
multiliteral-space { |
|
#each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row |
|
#we are building up a tentative_accum_squote to determine if |
|
#a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines |
|
#b) it is exactly ''' and we can terminate the whole multiliteral |
|
#c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space |
|
set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote |
|
set tok "'" |
|
return 1 |
|
} |
|
table-space - itable-space { |
|
#tests: squotedkey.test squotedkey_itable.test |
|
set_tokenType "squotedkey" |
|
set tok "" |
|
} |
|
XXXtable-space - XXXitable-space { |
|
#future - could there be multiline keys? MLLKEY, MLBKEY ? |
|
#this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) |
|
#probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files |
|
#@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key |
|
#where key is simple-key or dotted-key - no MLL or MLB components |
|
#the spec states solution for arbitrary binary data is application specific involving encodings |
|
#such as hex, base64 |
|
set_tokenType "_start_squote_sequence" |
|
set tok "'" |
|
return 1 |
|
} |
|
tablename-state { |
|
#first char in tablename-state/tablearrayname-state |
|
set_tokenType "tablename" |
|
append tok "'" |
|
} |
|
tablearrayname-state { |
|
set_tokenType "tablearrayname" |
|
append tok "'" |
|
} |
|
literal-state { |
|
#shouldn't get here? review |
|
tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" |
|
set_tokenType "literal" |
|
incr -1 |
|
return 1 |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} {append tok "\\"} |
|
append tok "," |
|
#error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" |
|
} |
|
dottedkey-space { |
|
set_tokenType "squotedkey" |
|
} |
|
default { |
|
error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
|
|
} |
|
dq { |
|
#double quote |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#review |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
tentative_accum_dquote { |
|
#within multistring |
|
#short tentative_accum_dquote tokens are returned if active upon receipt of any other character |
|
#longest allowable for leading/trailing are returned here |
|
#### |
|
set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote |
|
#assert state = trailing-squote-space |
|
append tok $c |
|
if {$existingtoklen == 4} { |
|
#maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 |
|
#return tok with value """"" |
|
return 1 |
|
} |
|
} |
|
_start_dquote_sequence { |
|
#pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space |
|
switch -- [tcl::string::length $tok] { |
|
1 { |
|
#no conclusion can yet be reached |
|
append tok $c |
|
} |
|
2 { |
|
#enter multistring |
|
#switch? |
|
append tok $c |
|
set_tokenType triple_dquote |
|
return 1 |
|
} |
|
default { |
|
#if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled |
|
#by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the |
|
#extra 1 or 2 dquotes as data. |
|
error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" |
|
} |
|
} |
|
} |
|
literal - literalpart { |
|
append tok $c |
|
} |
|
string { |
|
if {$had_slash} { |
|
append tok "\\" $c |
|
} else { |
|
#unescaped quote always terminates a string |
|
set_token_waiting type enddquote value "\"" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
stringpart { |
|
#sub element of multistring |
|
if {$had_slash} { |
|
append tok "\\" $c |
|
} else { |
|
incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing |
|
return 1 |
|
} |
|
} |
|
whitespace { |
|
#assert: had_slash will only ever be true in multistring-space |
|
if {$had_slash} { |
|
incr i -2 |
|
return 1 |
|
} else { |
|
#end whitespace token - throw dq back for reprocessing |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
comment { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
XXXdquotedkey { |
|
if {$had_slash} { |
|
append tok "\\" |
|
append tok $c |
|
} else { |
|
set_token_waiting type enddquote value "\"" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
dquotedkey { |
|
### |
|
if {$had_slash} { |
|
append tok "\\" |
|
append tok $c |
|
} else { |
|
#set_token_waiting type enddquote value {"} complete 1 |
|
return 1 |
|
} |
|
} |
|
squotedkey { |
|
append tok $c |
|
} |
|
tablename - tablearrayname { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 ;## |
|
return 1 |
|
} |
|
default { |
|
error "tomlish got quote during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
#token is string only if we're expecting a value at this point |
|
switch -exact -- $state { |
|
array-space - keyval-value-expected - itable-keyval-value-expected { |
|
#leading dquote |
|
#pseudo-token _start_squote_sequence ss not received by state machine |
|
#This pseudotoken will trigger production of single_dquote token or triple_dquote token |
|
#It currently doesn't trigger double_dquote token |
|
#(handle "" same as "x" ie produce a single_dquote and go into processing string) |
|
#review - producing double_dquote for empty string may be slightly more efficient. |
|
#This token is not used to handle dquote sequences once *within* a multistring |
|
set_tokenType "_start_dquote_sequence" |
|
set tok {"} |
|
} |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "stringpart" |
|
set tok "\\\"" |
|
} else { |
|
#each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row |
|
#we are building up a tentative_accum_squote to determine if |
|
#a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines |
|
#b) it is exactly ''' and we can terminate the whole multiliteral |
|
#c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space |
|
set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote |
|
set tok {"} |
|
return 1 |
|
} |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "\"" |
|
} |
|
table-space - itable-space { |
|
set_tokenType "dquotedkey" |
|
set tok "" |
|
} |
|
dottedkey-space { |
|
set_tokenType dquotedkey |
|
set tok "" |
|
|
|
#only if complex keys become a thing |
|
#set_tokenType dquote_seq_begin |
|
#set tok $c |
|
} |
|
tablename-state { |
|
set_tokenType tablename |
|
set tok $c |
|
} |
|
tablearrayname-state { |
|
set_tokenType tablearrayname |
|
set tok $c |
|
} |
|
default { |
|
error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} |
|
= { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#review |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart - squotedkey { |
|
#assertion had_slash 0 |
|
append tok $c |
|
} |
|
string - comment - dquotedkey { |
|
#for these tokenTypes an = is just data. |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_token_waiting type equal value = complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
barekey { |
|
#set_token_waiting type equal value = complete 1 |
|
incr i -1 |
|
return 1 |
|
} |
|
starttablename - starttablearrayname { |
|
error "tomlish Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
#invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out |
|
append tok $c |
|
} |
|
default { |
|
error "tomlish unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok = |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "=" |
|
} |
|
dottedkey-space { |
|
set_tokenType "equal" |
|
set tok "=" |
|
return 1 |
|
} |
|
default { |
|
set_tokenType "equal" |
|
set tok = |
|
return 1 |
|
} |
|
} |
|
} |
|
} |
|
cr { |
|
#REVIEW! |
|
# \r carriage return |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#we have received a double cr |
|
::tomlish::log::warn "double cr - will generate cr token. needs testing" |
|
set_tokenType "cr" ;#lone cr token will generally raise an error - but let state machine handle it |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal { |
|
append tok $c |
|
} |
|
literalpart { |
|
#part of MLL string (multi-line literal string) |
|
#we need to split out crlf as a separate NEWLINE to be consistent |
|
::tomlish::log::warn "literalpart ended by cr - needs testing" |
|
#return literalpart temporarily - allow cr to be reprocessed from multiliteral-space |
|
incr i -1 |
|
return 1 |
|
} |
|
stringpart { |
|
#stringpart is a part of MLB string (multi-line basic string) |
|
#throw back the cr - if followed by lf it will become a {NEWLINE crlf} entry within the MULTISTRING list (e.g between STRINGPART entries) |
|
incr i -1 |
|
return 1 |
|
} |
|
starttablename - starttablearrayname { |
|
error "tomlish Character <carriage return> is invalid first character for $tokenType. [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
#could in theory be valid in quoted part of name |
|
#review - might be better just to disallow here |
|
append tok $c |
|
} |
|
whitespace { |
|
#it should technically be part of whitespace if not followed by lf |
|
#but outside of values we are also free to map it to be another NEWLINE instead? REVIEW |
|
incr i -1 |
|
return 1 |
|
} |
|
untyped_value { |
|
incr i -1 |
|
return 1 |
|
} |
|
comment { |
|
#JJJJ |
|
#review |
|
incr i -1 |
|
return 1 |
|
} |
|
default { |
|
#!todo - error out if cr inappropriate for tokenType |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
#lf may be appended if next |
|
#review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) |
|
set_tokenType "newline" |
|
set tok cr |
|
} |
|
} |
|
lf { |
|
# \n newline |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#review |
|
#this lf is the trailing part of a crlf |
|
append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
#multiliteral or multistring |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal { |
|
#nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' |
|
#even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error |
|
set_token_waiting type newline value lf complete 1 startindex $cindex |
|
return 1 |
|
} |
|
literalpart { |
|
#we allow newlines - but store them within the multiliteral as their own element |
|
#This is a legitimate end to the literalpart - but not the whole multiliteral |
|
set_token_waiting type newline value lf complete 1 startindex $cindex |
|
return 1 |
|
} |
|
stringpart { |
|
if {$had_slash} { |
|
#emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) |
|
set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_token_waiting type newline value lf complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
starttablename - tablename - tablearrayname - starttablearrayname { |
|
error "tomlish Character <linefeed> is invalid in $tokenType. [tomlish::parse::report_line]" |
|
} |
|
default { |
|
#newline ends all other tokens. |
|
#note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) |
|
#note for whitespace: |
|
# we will use the convention that \n terminates the current whitespace even if whitespace follows |
|
# ie whitespace is split into separate whitespace tokens at each newline |
|
|
|
#puts "-------------- newline lf during tokenType $tokenType" |
|
set_token_waiting type newline value lf complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_tokenType "newline" |
|
set tok lf |
|
return 1 |
|
} |
|
} |
|
multiliteral-space { |
|
#assert had_slash 0 |
|
set_tokenType "newline" |
|
set tok "lf" |
|
return 1 |
|
} |
|
default { |
|
#ignore slash? error? |
|
set_tokenType "newline" |
|
set tok lf |
|
return 1 |
|
} |
|
} |
|
#if {$had_slash} { |
|
# #CONT directly before newline - allows strings_5_byteequivalent test to pass |
|
# set_tokenType "continuation" |
|
# set tok "\\" |
|
# incr i -1 |
|
# return 1 |
|
#} else { |
|
# set_tokenType newline |
|
# set tok lf |
|
# return 1 |
|
#} |
|
} |
|
} |
|
, { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
comment - tablename - tablearrayname { |
|
if {$had_slash} {append tok "\\"} |
|
append tok , |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
#stringpart can have up to 2 quotes too |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
literal - literalpart - squotedkey { |
|
#assert had_slash always 0 |
|
append tok $c |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_token_waiting type comma value "," complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
default { |
|
set_token_waiting type comma value "," complete 1 startindex $cindex |
|
if {$had_slash} {append tok "\\"} |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} {append tok "\\"} |
|
append tok "," |
|
} |
|
multiliteral-space { |
|
#assert had_slash 0 |
|
set_tokenType "literalpart" |
|
set tok "," |
|
} |
|
default { |
|
set_tokenType "comma" |
|
set tok "," |
|
return 1 |
|
} |
|
} |
|
} |
|
} |
|
. { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
comment - untyped_value { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
literal - literalpart - squotedkey { |
|
#assert had_slash always 0 |
|
append tok $c |
|
} |
|
whitespace { |
|
switch -exact -- $state { |
|
multistring-space { |
|
#review |
|
if {$had_slash} { |
|
incr i -2 |
|
} else { |
|
incr i -1 |
|
} |
|
return 1 |
|
} |
|
xxxdottedkey-space { |
|
incr i -1 |
|
return 1 |
|
} |
|
dottedkey-space-tail { |
|
incr i -1 |
|
return 1 |
|
} |
|
default { |
|
error "tomlish Received period during tokenType 'whitespace' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
starttablename - starttablearrayname { |
|
#This would correspond to an empty table name |
|
error "tomlish Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" |
|
} |
|
tablename - tablearrayname { |
|
#subtable - split later - review |
|
append tok $c |
|
} |
|
barekey { |
|
#e.g x.y = 1 |
|
#we need to transition the barekey to become a structured table name ??? review |
|
#x is the tablename y is the key |
|
set_token_waiting type dotsep value "." complete 1 startindex $cindex |
|
return 1 |
|
} |
|
default { |
|
error "tomlish Received period during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
#set_token_waiting type period value . complete 1 |
|
#return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok "" |
|
if {$had_slash} {append tok "\\"} |
|
append tok "." |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok "." |
|
} |
|
XXXdottedkey-space { |
|
### obs? |
|
set_tokenType "dotsep" |
|
set tok "." |
|
return 1 |
|
} |
|
dottedkey-space-tail { |
|
### |
|
set_tokenType "dotsep" |
|
set tok "." |
|
return 1 |
|
} |
|
default { |
|
set_tokenType "untyped_value" |
|
set tok "." |
|
} |
|
} |
|
} |
|
|
|
} |
|
" " - tab { |
|
if {[tcl::string::length $tokenType]} { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
barekey { |
|
#todo had_slash - emit token or error |
|
#whitespace is a terminator for bare keys |
|
#set_token_waiting type whitespace value $c complete 1 |
|
incr i -1 |
|
return 1 |
|
} |
|
untyped_value { |
|
#unquoted values (int,date,float etc) are terminated by whitespace |
|
#set_token_waiting type whitespace value $c complete 1 |
|
incr i -1 |
|
return 1 |
|
} |
|
comment { |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok $c |
|
} |
|
string - dquotedkey { |
|
if {$had_slash} { append tok "\\" } |
|
append tok $c |
|
} |
|
stringpart { |
|
#for stringpart we store WS separately for ease of processing continuations (CONT stripping) |
|
if {$had_slash} { |
|
#REVIEW |
|
#emit the stringpart - go back to the slash |
|
incr i -2 |
|
return 1 |
|
} else { |
|
#split into STRINGPART xxx WS " " |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
literal - literalpart - squotedkey { |
|
append tok $c |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
append tok $c |
|
} else { |
|
append tok $c |
|
} |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
#include whitespace in the tablename/tablearrayname |
|
#Will need to be normalized upon interpreting the tomlish as a datastructure |
|
append tok $c |
|
} |
|
default { |
|
error "tomlish Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
switch -exact -- $state { |
|
tablename-state { |
|
#tablename can have leading,trailing and interspersed whitespace! |
|
#These will not be treated as whitespace tokens, instead forming part of the name. |
|
set_tokenType tablename |
|
set tok "" |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
tablearrayname-state { |
|
set_tokenType tablearrayname |
|
set tok "" |
|
if {$had_slash} {append tok "\\"} |
|
append tok $c |
|
} |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok $c |
|
} |
|
default { |
|
if {$had_slash} { |
|
error "tomlish unexpected backslash [tomlish::parse::report_line]" |
|
} |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
} |
|
} |
|
tabX { |
|
if {[tcl::string::length $tokenType]} { |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
barekey { |
|
#whitespace is a terminator for bare keys |
|
incr i -1 |
|
#set_token_waiting type whitespace value $c complete 1 |
|
return 1 |
|
} |
|
untyped_value { |
|
#unquoted values (int,date,float etc) are terminated by whitespace |
|
#set_token_waiting type whitespace value $c complete 1 |
|
incr i -1 |
|
return 1 |
|
} |
|
squotedkey { |
|
append tok $c |
|
} |
|
dquotedkey - string - comment - whitespace { |
|
#REVIEW |
|
append tok $c |
|
} |
|
stringpart { |
|
#for stringpart we store WS separately for ease of processing continuations (CONT stripping) |
|
if {$had_slash} { |
|
#REVIEW |
|
#emit the stringpart - go back to the slash |
|
incr i -2 |
|
return 1 |
|
} else { |
|
#split into STRINGPART aaa WS " " |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
literal - literalpart { |
|
append tok $c |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
#include whitespace in the tablename/tablearrayname |
|
#Will need to be normalized upon interpreting the tomlish as a datastructure |
|
append tok $c |
|
} |
|
default { |
|
error "tomlish Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
if {$slash_active} { |
|
set slash_active 0 |
|
} |
|
switch -exact -- $state { |
|
tablename-state { |
|
#tablename can have leading,trailing and interspersed whitespace! |
|
#These will not be treated as whitespace tokens, instead forming part of the name. |
|
set_tokenType tablename |
|
set tok $c |
|
} |
|
tablearrayname-state { |
|
set_tokenType tablearrayname |
|
set tok $c |
|
} |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_tokenType whitespace |
|
append tok $c |
|
} |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok $c |
|
} |
|
default { |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
} |
|
} |
|
bom { |
|
#bom encoded as single unicode codepoint \uFFEF |
|
#BOM (Byte Order Mark) - ignored by token consumer |
|
if {[tcl::string::length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
#assert - tok will be one or two squotes only |
|
#A toml literal probably isn't allowed to contain this |
|
#but we will parse and let the validator sort it out. |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
literal - literalpart { |
|
append tok $c |
|
} |
|
string - stringpart { |
|
append tok $c |
|
} |
|
default { |
|
#state machine will generally not have entry to accept bom - let it crash |
|
set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok $c |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
set tok $c |
|
} |
|
default { |
|
set_tokenType "bom" |
|
set tok "\uFEFF" |
|
return 1 |
|
} |
|
} |
|
} |
|
} |
|
default { |
|
|
|
if {[tcl::string::length $tokenType]} { |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
newline { |
|
#incomplete newline |
|
set_tokenType "cr" |
|
incr i -1 |
|
return 1 |
|
} |
|
tentative_accum_squote - tentative_accum_dquote { |
|
incr i -1 |
|
return 1 |
|
} |
|
_start_squote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_squote" |
|
return 1 |
|
} |
|
_start_dquote_sequence { |
|
incr i -[tcl::string::length $tok] |
|
set_tokenType "single_dquote" |
|
return 1 |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
incr i -1 |
|
return 1 |
|
} else { |
|
#review |
|
incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. |
|
return 1 |
|
} |
|
} |
|
barekey { |
|
if {[tomlish::utils::is_barekey $c]} { |
|
append tok $c |
|
} else { |
|
error "tomlish Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] and a selection of letter-like chars allowed. (see tomlish::utils::is_barekey) [tomlish::parse::report_line]" |
|
} |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 |
|
#allow statemachine to set context for subsequent chars |
|
return 1 |
|
} |
|
string - stringpart { |
|
append tok $c |
|
} |
|
default { |
|
#e.g comment/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
switch -exact -- $state { |
|
table-space - itable-space { |
|
#if no currently active token - assume another key value pair |
|
if {[tomlish::utils::is_barekey $c]} { |
|
set_tokenType "barekey" |
|
append tok $c |
|
} else { |
|
error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" |
|
} |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
if {$had_slash} { |
|
set tok \\$c |
|
} else { |
|
set tok $c |
|
} |
|
} |
|
multiliteral-space { |
|
set_tokenType "literalpart" |
|
set tok $c |
|
} |
|
tablename-state { |
|
set_tokenType "tablename" |
|
set tok $c |
|
} |
|
tablearrayname-state { |
|
set_tokenType "tablearrayname" |
|
set tok $c |
|
} |
|
dottedkey-space { |
|
set_tokenType barekey |
|
set tok $c |
|
} |
|
default { |
|
#todo - something like ansistring VIEW to show control chars? |
|
set cshow [string map [list \t tab \v vt] $c] |
|
tomlish::log::debug "- tokloop char '$cshow' setting to untyped_value while state:$state [tomlish::parse::report_line]" |
|
set_tokenType "untyped_value" |
|
set tok $c |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
} |
|
|
|
#run out of characters (eof) |
|
if {[tcl::string::length $tokenType]} { |
|
#check for invalid ending tokens |
|
#if {$state eq "err-state"} { |
|
# error "Reached end of data whilst tokenType = '$tokenType'. INVALID" |
|
#} |
|
switch -exact -- $tokenType { |
|
_start_squote_sequence { |
|
set toklen [tcl::string::length $tok] |
|
switch -- $toklen { |
|
1 { |
|
#invalid eof with open literal |
|
error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" |
|
} |
|
2 { |
|
set_tokenType "literal" |
|
set tok "" |
|
return 1 |
|
|
|
##review |
|
#set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] |
|
#set_tokenType "literal" |
|
#set tok "" |
|
#return 1 |
|
} |
|
} |
|
} |
|
_start_dquote_sequence { |
|
set toklen [tcl::string::length $tok] |
|
switch -- $toklen { |
|
1 { |
|
#invalid eof with open string |
|
error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" |
|
} |
|
2 { |
|
set_tokenType "string" |
|
set tok "" |
|
return 1 |
|
} |
|
} |
|
} |
|
newline { |
|
#The only newline token that has still not been returned should have a tok value of "cr" |
|
puts "tomlish eof reached - with incomplete newline token '$tok'" |
|
if {$tok eq "cr"} { |
|
#we convert lone cr to it's own "cr" token elsewhere in the document to allow statemachine to handle it. |
|
#(which it should generally do by not handling it ie raising an error - or emitting an ERROR list in the tomlish) |
|
#if trailing char is a lone cr - we should encode it the same way as elsewhere that is outside of values |
|
# ie as it's own token. |
|
switch_tokenType "cr" |
|
return 1 |
|
} else { |
|
#should be unreachable |
|
error "tomlish eof reached - with invalid newline token. value: $tok" |
|
} |
|
} |
|
} |
|
set_token_waiting type eof value eof complete 1 startindex $i ;#review |
|
return 1 |
|
} else { |
|
::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" |
|
set tokenType "eof" |
|
set tok "eof" |
|
} |
|
return 0 |
|
} |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::parse ---}] |
|
} |
|
namespace eval tomlish::huddle { |
|
proc from_json {json} { |
|
package require huddle |
|
package require huddle::json |
|
#note - huddle may now contain raw surrogate pair - which cannot be emitted to stdout |
|
set h [huddle::json::json2huddle parse $json] |
|
} |
|
proc from_dict {d} { |
|
|
|
} |
|
|
|
#raw - strings must already be processed into values suitable for json e.g surrogate pair escaping |
|
proc jsondumpraw {huddle_object {offset " "} {newline "\n"} {begin ""}} { |
|
upvar ::huddle::types types |
|
set nextoff "$begin$offset" |
|
set nlof "$newline$nextoff" |
|
set sp " " |
|
if {[string equal $offset ""]} {set sp ""} |
|
|
|
set type [huddle type $huddle_object] |
|
|
|
switch -- $type { |
|
boolean - |
|
number { |
|
return [huddle get_stripped $huddle_object] |
|
} |
|
null { |
|
return null |
|
} |
|
string { |
|
set data [huddle get_stripped $huddle_object] |
|
|
|
# JSON permits only oneline string |
|
#set data [string map { |
|
# \n \\n |
|
# \t \\t |
|
# \r \\r |
|
# \b \\b |
|
# \f \\f |
|
# \\ \\\\ |
|
# \" \\\" |
|
# / \\/ |
|
# } $data |
|
#] |
|
return "\"$data\"" |
|
} |
|
list { |
|
set inner {} |
|
set len [huddle llength $huddle_object] |
|
for {set i 0} {$i < $len} {incr i} { |
|
set subobject [huddle get $huddle_object $i] |
|
lappend inner [jsondumpraw $subobject $offset $newline $nextoff] |
|
} |
|
if {[llength $inner] == 1} { |
|
return "\[[lindex $inner 0]\]" |
|
} |
|
return "\[$nlof[join $inner ,$nlof]$newline$begin\]" |
|
} |
|
dict { |
|
set inner {} |
|
foreach {key} [huddle keys $huddle_object] { |
|
lappend inner [subst {"$key":$sp[jsondumpraw [huddle get $huddle_object $key] $offset $newline $nextoff]}] |
|
} |
|
#if {[llength $inner] == 1} { |
|
# return $inner ;#wrong - breaks with quoted list representation |
|
# #FAILS: toml-test valid/comment/tricky |
|
#} |
|
|
|
return "\{$nlof[join $inner ,$nlof]$newline$begin\}" |
|
} |
|
default { |
|
set node [unwrap $huddle_object] |
|
#foreach {tag src} $node break |
|
lassign $node tag src |
|
return [$types(callback:$tag) jsondumpraw $huddle_object $offset $newline $nextoff] |
|
} |
|
} |
|
} |
|
} |
|
|
|
#typed as per toml-test types |
|
namespace eval tomlish::typedhuddle { |
|
proc from_json {json} { |
|
set plainhuddle [tomlish::huddle::from_json $json] |
|
|
|
error "tomlish::typedhuddle::from_json unimplemented" |
|
} |
|
proc from_dict {d} { |
|
package require huddle |
|
set h [huddle create] |
|
if {[tomlish::dict::is_typeval $d]} { |
|
set dtype [dict get $d type] |
|
switch -- $dtype { |
|
ARRAY { |
|
#error "typedhuddle::from_dict ARRAY not yet handled" |
|
set h_list [huddle list] |
|
set elements [dict get $d value] |
|
foreach el $elements { |
|
set sub [from_dict $el] |
|
huddle append h_list $sub |
|
} |
|
return $h_list |
|
} |
|
default { |
|
set tinfo [tomlish::dict::convert_typeval_to_tomltest $d] |
|
#basic non-container types |
|
set h_tdict [huddle create] |
|
huddle set h_tdict type [huddle string [dict get $tinfo type]] |
|
huddle set h_tdict value [huddle string [dict get $tinfo value]] |
|
return $h_tdict |
|
} |
|
} |
|
} else { |
|
dict for {dictkey dictval} $d { |
|
set jsonkey [tomlish::utils::rawstring_to_jsonstring $dictkey] |
|
if {[tomlish::dict::is_typeval $dictval]} { |
|
set dtype [dict get $dictval type] |
|
switch -- $dtype { |
|
ARRAY { |
|
#error "typedhuddle::from_dict ARRAY not yet handled" |
|
set h_next [huddle list] |
|
set elements [dict get $dictval value] |
|
foreach el $elements { |
|
set sub [from_dict $el] |
|
huddle append h_next $sub |
|
} |
|
} |
|
default { |
|
set tinfo [tomlish::dict::convert_typeval_to_tomltest $dictval] |
|
set tp [dict get $tinfo type] |
|
#basic non-container types |
|
set h_next [huddle create] ;#dict |
|
huddle set h_next type [huddle string [dict get $tinfo type]] |
|
huddle set h_next value [huddle string [dict get $tinfo value]] |
|
} |
|
} |
|
huddle set h $jsonkey $h_next |
|
} else { |
|
#dict |
|
set sub [from_dict $dictval] |
|
huddle set h $jsonkey $sub |
|
} |
|
} |
|
} |
|
return $h |
|
} |
|
proc is_typeval {huddled} { |
|
set htype [huddle type $huddled] |
|
if {$htype ne "dict"} { |
|
return 0 |
|
} |
|
if {[huddle keys $huddled] ne {type value}} { |
|
return 0 |
|
} |
|
set tp [huddle type $huddled type] |
|
switch -- $tp { |
|
string - integer - float - bool - datetime - datetime-local - date-local - time-local { |
|
return 1 |
|
} |
|
} |
|
return 0 |
|
} |
|
|
|
#direction from typed json towards toml |
|
proc convert_typeval_to_tomlish {huddled} { |
|
set htype [huddle get_stripped $huddled type] |
|
set hval [huddle get_stripped $huddled value] |
|
switch -- $htype { |
|
string { |
|
#we need to decide here the type of string element to use in toml/tomlish |
|
#STRING,MULTISTRING,LITERAL,MULTILITERAL |
|
#set unesc [tomlish::utils::unescape_jsonstring $hval] ;#no need - json parser unescaped when creating the huddle |
|
set unesc $hval |
|
#(huddle::json::json2huddle parse $json) |
|
#since it was unescaped any backslashes remaining represent themselves - reapply escape - REVIEW |
|
#set hval [string map [list \\ \\\ ] $hval] |
|
#JSJS |
|
if {[string first \n $unesc] >= 0} { |
|
#always use a MULTI |
|
if {[string first ' $unesc] >=0} { |
|
if {[string first ''' $unesc] >=0} { |
|
set dtype MULTISTRING |
|
} else { |
|
set dtype MULTILITERAL |
|
} |
|
} else { |
|
if {[string first \"\"\" $unesc] >=0} { |
|
set dtype MULTILITERAL |
|
} else { |
|
set dtype MULTISTRING |
|
} |
|
} |
|
} else { |
|
#use multi if needed? |
|
if {[string first '' $hval] >=0} { |
|
if {[string first ''' $unesc] >=0} { |
|
set dtype STRING |
|
} else { |
|
set dtype MULTILITERAL |
|
} |
|
} elseif {[string first ' $unesc] >= 0} { |
|
set dtype STRING |
|
} elseif {[string first \"\"\" $unesc] >= 0} { |
|
set dtype LITERAL |
|
} else { |
|
#STRING or LITERAL? |
|
set dtype STRING |
|
} |
|
} |
|
|
|
} |
|
datetime - bool { |
|
set dtype [string toupper $htype] |
|
} |
|
float { |
|
set dtype FLOAT |
|
if {[string is integer -strict $hval]} { |
|
#json FLOAT specified as integer - must have dot for toml |
|
set hval [expr {double($hval)}] |
|
} |
|
} |
|
integer { |
|
set dtype INT |
|
} |
|
datetime - datetime-local - date-local - time-local { |
|
#DDDD |
|
#set dtype DATETIME |
|
set dtype [string toupper $htype] |
|
} |
|
default { |
|
error "tomlish::typedhuddle::convert_typeval_to_tomlish unrecognised type $htype" |
|
} |
|
} |
|
return [list type $dtype value $hval] |
|
} |
|
|
|
} |
|
namespace eval tomlish::toml { |
|
proc from_binary {bindata} { |
|
set bom "" |
|
set b12 [tcl::string::range $bindata 0 1] |
|
set b12test [string map [list \xEF\xBB utf8_12 \xFE\xFF bom16be \xFF\xFE utf32le_12 \x00\x00 utf32be_12] $b12] |
|
switch -- $b12test { |
|
bom16be { |
|
#FEFF |
|
set bom utf-16be |
|
} |
|
utf32le_12 { |
|
#FFFE |
|
set b34 [tcl::string::range $bindata 2 3] |
|
if {$b34 eq "\x00\x00"} { |
|
set bom utf-32le |
|
} else { |
|
set bom utf-16le |
|
} |
|
} |
|
utf32be_12 { |
|
#0000 |
|
set b34 [tcl::string::range $bindata 2 3] |
|
if {$b34 eq "\xFE\xFF"} { |
|
set bom utf-32be |
|
} |
|
} |
|
utf8_12 { |
|
set b3 [tcl::string::index $bindata 2] |
|
if {$b3 eq "\xBF"} { |
|
set bom utf-8 |
|
} |
|
} |
|
} |
|
if {$bom eq ""} { |
|
#no bom - assume utf8 - but we read in as binary |
|
#if data wasn't actually utf8 we may error here depending on content - or we may just get wrongly encoded chars |
|
set tomldata [encoding convertfrom utf-8 $bindata] |
|
} elseif {$bom eq "utf-8"} { |
|
#utf-8 bom read in as binary |
|
set tomldata [encoding convertfrom utf-8 $bindata] |
|
#bom now encoded as single unicode char \uFFEF |
|
} else { |
|
return -code error -errorcode {TOML ENCODING NOTUTF8} "Input not UTF8 encoded according to BOM. Indicated encoding is '$bom' - invalid for toml" |
|
} |
|
return $tomldata |
|
} |
|
proc from_tomlish {tomlish} { |
|
return [tomlish::encode::tomlish $tomlish] |
|
} |
|
|
|
#todo - rename to taggedjson |
|
proc from_tomlish_from_dict_from_typedjson {json} { |
|
set d [tomlish::dict::from_typedjson $json] |
|
from_tomlish [tomlish::from_dict $d] ;#return tomlish |
|
} |
|
|
|
proc tablename_split {tablename {normalize false}} { |
|
#we can't just split on . because we have to handle quoted segments which may contain a dot. |
|
#eg {dog."tater.man"} |
|
if {$tablename eq ""} { |
|
error "tablename_split. No table name segments found. empty tablename" |
|
} |
|
set sLen [tcl::string::length $tablename] |
|
set segments [list] |
|
set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval |
|
#quoted is for double-quotes, litquoted is for single-quotes (string literal) |
|
set seg "" |
|
for {set i 0} {$i < $sLen} {incr i} { |
|
|
|
if {$i > 0} { |
|
set lastChar [tcl::string::index $tablename [expr {$i - 1}]] |
|
} else { |
|
set lastChar "" |
|
} |
|
|
|
#todo - track\count backslashes properly |
|
|
|
set c [tcl::string::index $tablename $i] |
|
if {$c eq "\""} { |
|
if {($lastChar eq "\\")} { |
|
#not strictly correct - we could have had an even number prior-backslash sequence |
|
#the toml spec would have us error out immediately on bsl in bad location - but we're |
|
#trying to parse to unvalidated tomlish |
|
set ctest escq |
|
} else { |
|
set ctest dq |
|
} |
|
} else { |
|
set ctest [string map [list " " sp \t tab] $c] |
|
} |
|
|
|
switch -- $ctest { |
|
. { |
|
switch -exact -- $mode { |
|
preval { |
|
error "tablename_split. dot not allowed - expecting a value" |
|
} |
|
unquoted { |
|
#dot marks end of segment. |
|
if {![tomlish::utils::is_barekey $seg]} { |
|
error "tablename_split. unquoted key segment $seg is not a valid toml key" |
|
} |
|
lappend segments $seg |
|
set seg "" |
|
set mode "preval" |
|
} |
|
quoted { |
|
append seg $c |
|
} |
|
litquoted { |
|
append seg $c |
|
} |
|
postval { |
|
#got dot in an expected location |
|
set mode "preval" |
|
} |
|
} |
|
} |
|
dq { |
|
#unescaped dquote |
|
switch -- $mode { |
|
preval { |
|
set mode "quoted" |
|
set seg "\"" |
|
} |
|
unquoted { |
|
#invalid in barekey - but we are after structure only |
|
append seg $c |
|
} |
|
quoted { |
|
append seg $c |
|
#JJJJ |
|
if {$normalize} { |
|
lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] |
|
} else { |
|
lappend segments $seg |
|
} |
|
set seg "" |
|
set mode "postval" ;#make sure we only accept a dot or end-of-data now. |
|
} |
|
litquoted { |
|
append seg $c |
|
} |
|
postval { |
|
error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" |
|
} |
|
} |
|
} |
|
' { |
|
switch -- $mode { |
|
preval { |
|
append seg $c |
|
set mode "litquoted" |
|
} |
|
unquoted { |
|
#single quote inside e.g o'neill - ultimately invalid - but we pass through here. |
|
append seg $c |
|
} |
|
quoted { |
|
append seg $c |
|
} |
|
litquoted { |
|
append seg $c |
|
#no normalization to do aside from stripping squotes |
|
if {$normalize} { |
|
lappend segments [tcl::string::range $seg 1 end-1] |
|
} else { |
|
lappend segments $seg |
|
} |
|
set seg "" |
|
set mode "postval" |
|
} |
|
postval { |
|
error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" |
|
} |
|
} |
|
} |
|
sp - tab { |
|
switch -- $mode { |
|
preval - postval { |
|
#ignore |
|
} |
|
unquoted { |
|
#terminates a barekey |
|
lappend segments $seg |
|
set seg "" |
|
set mode "postval" |
|
} |
|
default { |
|
#append to quoted or litquoted |
|
append seg $c |
|
} |
|
} |
|
} |
|
default { |
|
switch -- $mode { |
|
preval { |
|
set mode unquoted |
|
append seg $c |
|
} |
|
postval { |
|
error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" |
|
} |
|
default { |
|
append seg $c |
|
} |
|
} |
|
} |
|
} |
|
|
|
if {$i == $sLen-1} { |
|
#end of data |
|
::tomlish::log::debug "End of data: mode='$mode'" |
|
switch -exact -- $mode { |
|
preval { |
|
if {[llength $segments]} { |
|
error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" |
|
} else { |
|
error "tablename_split. Whitespace only? No table name segments found. tablename: '$tablename'" |
|
} |
|
} |
|
unquoted { |
|
if {![tomlish::utils::is_barekey $seg]} { |
|
#e.g toml-test invalid/table/with-pound required to fail for invalid barekey |
|
error "tablename_split. unquoted key segment $seg is not a valid toml key" |
|
} |
|
lappend segments $seg |
|
} |
|
quoted { |
|
error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" |
|
} |
|
litquoted { |
|
error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" |
|
} |
|
postval { |
|
#ok - segment already lappended |
|
} |
|
} |
|
} |
|
} |
|
|
|
#note - we must allow 'empty' quoted strings '' & "" |
|
# (these are 'discouraged' but valid toml keys) |
|
|
|
return $segments |
|
} |
|
|
|
#tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace |
|
# tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] |
|
#trimmed, the tablename becomes {a.b.c} |
|
# A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] |
|
#ie whitespace is only irrelevant if it's outside a quoted segment |
|
#trimmed, the tablename becomes {a.b."c etc "} |
|
proc tablename_trim {tablename} { |
|
set segments [tomlish::toml::tablename_split $tablename false] |
|
set trimmed_segments [list] |
|
foreach seg $segments { |
|
lappend trimmed_segments [::string trim $seg " \t"] |
|
} |
|
return [join $trimmed_segments .] |
|
} |
|
} |
|
|
|
namespace eval tomlish::dict { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
namespace path [namespace parent] |
|
|
|
#from_taggedjson |
|
proc from_typedjson {json} { |
|
package require huddle |
|
package require huddle::json |
|
set h [huddle::json::json2huddle parse $json] |
|
#json2huddle parse unescapes the basic json escapes \n \\ etc |
|
#$h could now contain raw form of surrogate pair (json2huddle parse as at 2025-014 doesn't convert the surrogates - just unescapes?) |
|
if {[catch {encoding convertto utf-8 $h} errM]} { |
|
#This test suggests we have raw surrogate pairs - REVIEW |
|
package require punk::cesu |
|
set h [punk::cesu::from_surrogatestring $h] |
|
} |
|
tomlish::dict::from_typedhuddle $h |
|
} |
|
proc from_typedhuddle {h} { |
|
set resultd [dict create] |
|
switch -- [huddle type $h] { |
|
dict { |
|
foreach k [huddle keys $h] { |
|
switch -- [huddle type $h $k] { |
|
dict { |
|
set huddle_d [huddle get $h $k] |
|
#puts stderr "huddle_d: $huddle_d" |
|
#set v [huddle get_stripped $h $k] |
|
if {[tomlish::typedhuddle::is_typeval $huddle_d]} { |
|
dict set resultd $k [tomlish::typedhuddle::convert_typeval_to_tomlish $huddle_d] |
|
} else { |
|
dict set resultd $k [from_typedhuddle $huddle_d] |
|
} |
|
} |
|
list { |
|
set items [huddle get $h $k] |
|
|
|
set numitems [huddle llength $items] |
|
if {$numitems == 0} { |
|
dict set resultd $k [list type ARRAY value {}] |
|
} else { |
|
set arritems [list] |
|
for {set i 0} {$i < $numitems} {incr i} { |
|
set item [huddle get $items $i] |
|
#puts stderr "item: $item" |
|
#set v [huddle get $item] |
|
if {[tomlish::typedhuddle::is_typeval $item]} { |
|
lappend arritems [tomlish::typedhuddle::convert_typeval_to_tomlish $item] |
|
} else { |
|
lappend arritems [from_typedhuddle $item] |
|
} |
|
} |
|
dict set resultd $k [list type ARRAY value $arritems] |
|
} |
|
} |
|
default { |
|
error "dict_from_json unexpected subtype [huddle type $h $k] in dict" |
|
} |
|
} |
|
} |
|
} |
|
list { |
|
set items [huddle get $h] |
|
set numitems [huddle llength $items] |
|
if {$numitems == 0} { |
|
return [list type ARRAY value {}] |
|
} else { |
|
set arritems [list] |
|
for {set i 0} {$i < $numitems} {incr i} { |
|
set item [huddle get $items $i] |
|
#puts stderr "item: $item" |
|
#set v [huddle get $item] |
|
if {[tomlish::typedhuddle::is_typeval $item]} { |
|
lappend arritems [tomlish::typedhuddle::convert_typeval_to_tomlish $item] |
|
} else { |
|
lappend arritems [from_typedhuddle $item] |
|
} |
|
} |
|
return [list type ARRAY value $arritems] |
|
} |
|
|
|
} |
|
} |
|
return $resultd |
|
} |
|
|
|
proc is_typeval {d} { |
|
#designed to detect {type <tag> value <whatever>} e.g {type INT value 3}, {type STRING value "blah etc"} |
|
#as a sanity check we need to avoid mistaking user data that happens to match same form |
|
#consider x.y={type="spud",value="blah"} |
|
#The value of type will itself have already been converted to {type STRING value spud} ie never a single element. |
|
#check the length of the type as a quick way to see it's a tag - not something else masqerading. |
|
expr {[string is dict $d] && [dict size $d] == 2 && [dict exists $d type] && [dict exists $d value] && [llength [dict get $d type]] == 1} |
|
} |
|
|
|
#simple types only - not containers? |
|
proc convert_typeval_to_tomltest {d} { |
|
set dtype [dict get $d type] |
|
set dval [dict get $d value] |
|
switch -- $dtype { |
|
INT { |
|
set testtype integer |
|
set dval [expr {$dval}] ;#convert e.g 0xDEADBEEF to 3735928559 |
|
} |
|
DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - FLOAT - BOOL { |
|
#DDDD |
|
set testtype [string tolower $dtype] |
|
} |
|
STRING - MULTISTRING { |
|
set testtype string |
|
#JJJJ |
|
set dval [tomlish::utils::unescape_string $dval] |
|
set dval [tomlish::utils::rawstring_to_jsonstring $dval] |
|
} |
|
LITERAL - MULTILITERAL { |
|
set testtype string |
|
#don't validate on way out to json here? |
|
#decoder should validate by calling tomlish::from_dict |
|
#if {![tomlish::utils::rawstring_is_valid_literal $dval]} { |
|
# return -code error -errorcode {TOML SYNTAX INVALIDLITERAL} $msg |
|
#} |
|
set dval [tomlish::utils::rawstring_to_jsonstring $dval] |
|
} |
|
MULTILITERAL { |
|
#todo - escape newlines for json? |
|
set testtype string |
|
} |
|
default { |
|
error "convert_typeval_to_tomltest unhandled type $dtype" |
|
} |
|
} |
|
return [list type $testtype value $dval] |
|
} |
|
|
|
# Check that each leaf is a typeval or typeval dict |
|
#importantly: must accept empty dict leaves e.g {x {}} |
|
proc is_typeval_dict {d {checkarrays 0}} { |
|
if {![string is dict $d]} { |
|
return 0 |
|
} |
|
dict for {k v} $d { |
|
set is_d 0 |
|
if {!([is_typeval $v] || [set is_d [is_typeval_dict $v $checkarrays]])} { |
|
return 0 |
|
} |
|
if {!$is_d} { |
|
set vtype [dict get $v type] |
|
switch -- $vtype { |
|
INT - FLOAT - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - BOOL - LITERAL - STRING - MULTILITERAL - MULTISTRING {} |
|
ARRAY { |
|
if {$checkarrays} { |
|
set arrdata [dict get $v value] |
|
foreach el $arrdata { |
|
if {![is_typeval_dict $el $checkarrays]} { |
|
return 0 |
|
} |
|
} |
|
} |
|
} |
|
default { |
|
puts stderr "is_typeval_dict: Unexpected type '$vtype'" |
|
return 0 |
|
} |
|
} |
|
} |
|
} |
|
return 1 |
|
} |
|
|
|
proc last_tomltype_posn {d} { |
|
set last_simple -1 |
|
set dictposn [expr {[dict size $d] -1}] |
|
foreach k [lreverse [dict keys $d]] { |
|
set dval [dict get $d $k] |
|
if {[is_typeval $dval]} { |
|
set last_simple $dictposn |
|
break |
|
} |
|
incr dictposn -1 |
|
} |
|
return $last_simple |
|
} |
|
|
|
|
|
#review |
|
proc name_from_tablestack {tablestack} { |
|
set name "" |
|
foreach tinfo [lrange $tablestack 1 end] { |
|
lassign $tinfo type namepart |
|
switch -- $type { |
|
T { |
|
if {$name eq ""} { |
|
append name $namepart |
|
} else { |
|
append name .$namepart |
|
} |
|
} |
|
I { |
|
if {$name eq ""} { |
|
append name $namepart |
|
} else { |
|
append name .$namepart |
|
} |
|
} |
|
default { |
|
#end at first break in the leading sequence of T & I tablenames |
|
break |
|
} |
|
} |
|
} |
|
return $name |
|
} |
|
|
|
|
|
#tablenames_info is a flat dict with the key being an '@@' path |
|
proc _show_tablenames {tablenames_info} { |
|
#e.g {@l@a @@b} {ttype header_table tdefined closed} |
|
append msg \n "tablenames_info:" \n |
|
dict for {tkey tinfo} $tablenames_info { |
|
append msg " " "table: $tkey" \n |
|
dict for {field finfo} $tinfo { |
|
append msg " " "$field $finfo" \n |
|
} |
|
} |
|
return $msg |
|
} |
|
|
|
#take a raw string and classify: result is a 2 element list comprised of KEY|SQKEY|DQKEY and the value being the appropriate inner string |
|
proc classify_rawkey {rawval} { |
|
if {![::tomlish::utils::is_barekey $rawval]} { |
|
#requires quoting |
|
# |
|
#Any dot in the key would have been split by dict::from_tomlish - so if it's present here it's part of this key - not a level separator! |
|
# |
|
#we'll use a basic mechanisms for now to determine the type of quoting |
|
# - whether it has any single quotes or not. |
|
# (can't go in an SQKEY) |
|
# - whether it has any chars that require quoting when in a Bstring |
|
# (if so - then its visual representation might be unsuitable for a key in a toml text file, so escape and put in DQKEY instead of literal SQKEY) |
|
#todo - more? |
|
#REVIEW - the backslash might often be in things like a regex or windows path - which is often better expressed in a literal SQKEY |
|
# from literal examples: |
|
# 'c:\Users\nodejs\templates' |
|
# '<\i\c*\s*>' |
|
#If these are in *keys* our basic test will express these as: |
|
# "c:\\Users\\nodejs\\templates" |
|
# "<\\i\\c*\\s*>" |
|
# This still works - but a smarter test might determine when SQKEY is the better form? |
|
#when coming from external systems - can we even know if the value was already escaped? REVIEW |
|
#Probably when coming from json - we know it's already escaped - and so we build our dict converting keys to unescaped |
|
#TODO - clarify in documentation that keys resulting from dict::from_tomlish are in 'normalized' (unescaped) form |
|
# |
|
#For keys - we currently (2025) are only allowed barekeys,basic strings and literal strings. (no multiline forms) |
|
set k_escaped [tomlish::utils::rawstring_to_Bstring_with_escaped_controls $rawval] |
|
if {[string length $k_escaped] != [string length $rawval]} { |
|
#escaping made a difference |
|
set has_escape_requirement 1 |
|
} else { |
|
set has_escape_requirement 0 |
|
} |
|
if {[string first ' $rawval] >=0 || $has_escape_requirement} { |
|
#basic string |
|
# (any ANSI SGR sequence will end up here in escaped form ) |
|
return [list DQKEY $k_escaped] |
|
} else { |
|
#literal string |
|
return [list SQKEY $rawval] |
|
} |
|
} else { |
|
return [list KEY $rawval] |
|
} |
|
} |
|
#the quoting implies the necessary escaping for DQKEYs |
|
proc join_and_quote_rawkey_list {rawkeylist} { |
|
set result "" |
|
foreach rk $rawkeylist { |
|
lassign [tomlish::dict::classify_rawkey $rk] type val |
|
switch -- $type { |
|
SQKEY { |
|
append result "'$val'." |
|
} |
|
DQKEY { |
|
append result "\"$val\"." |
|
} |
|
KEY { |
|
append result "$val." |
|
} |
|
} |
|
} |
|
return [string range $result 0 end-1] |
|
} |
|
|
|
proc _process_tomlish_dottedkey {element {context_refpath {}}} { |
|
upvar tablenames_info tablenames_info |
|
upvar datastructure datastructure |
|
set dottedtables_defined [list] |
|
set dkey_info [tomlish::get_dottedkey_info $element] |
|
#e.g1 keys {x.y y} keys_raw {'x.y' "a\tb"} keys {x.y {a b}} (keys_raw has escape, keys has literal tab char) |
|
#e.g2 keys {x.y y} keys_raw {{"x.y"} "a\tb"} keys {x.y {a b}} (keys_raw has escape, keys has literal tab char) |
|
|
|
#[a.b] |
|
#t1.t2.dottedtable.leafkey = "val" |
|
#we have already checked supertables a & {a b} |
|
# - in basic case, passed in context_refpath as {@@a @@b} |
|
# - our context_refpath could also include some combination of keys and array indices e.g {@@a @@b 3 @@subtablekey} |
|
#We need to check {a b t1} & {a b t2} ('creation' only) |
|
#and then 'dottedtable' is 'defined' while leafkey is an ordinary key in dottedtable |
|
|
|
#note we also get here as a 'dottedkey' with the following even though there is no dot in k |
|
#[a.b] |
|
#leafkey = "val" |
|
|
|
set all_dotted_keys [dict get $dkey_info keys] |
|
set dottedkeyname [join $all_dotted_keys .] |
|
|
|
if {[llength $all_dotted_keys] > 1} { |
|
#dottedtable.k=1 |
|
#tX.dottedtable.k=1 |
|
#etc |
|
|
|
#Wrap in a list so we can detect 'null' equivalent. |
|
#We can't use empty string as that's a valid dotted key segment |
|
set dottedtable_bag [list [lindex $all_dotted_keys end-1]] |
|
set dotparents [lrange $all_dotted_keys 0 end-2] |
|
} else { |
|
#basic case - not really a 'dotted' key |
|
#k = 1 |
|
set dottedtable_bag [list] ;#empty bag |
|
set dotparents [list] |
|
} |
|
#assert dottedtable_bag only ever holds 0 or 1 elements |
|
set leaf_key [lindex $all_dotted_keys end] |
|
|
|
#see also: https://github.com/toml-lang/toml/issues/846 "Can dotted keys insert into already-defined [tables]?" |
|
#This code was originally written with a misinterpretation of: |
|
#"Dotted keys create and define a table for each key part before the last one, provided that such tables were not previously created." |
|
# 'each key part before the last one' refers to each key in a single dotted key entry |
|
# not each 2nd-to last key in a list of dotted keys. |
|
|
|
|
|
#we've already tested the table/tablearray keys that got us here.. but not the dottedkey segments (if any) prior to dottedtable & leaf_key |
|
set dottedsuper_refpath $context_refpath |
|
foreach normkey $dotparents { |
|
lappend dottedsuper_refpath @@$normkey |
|
if {![dict exists $tablenames_info $dottedsuper_refpath ttype]} { |
|
#supertable with this combined path (context_path plus parts of dottedkey) not yet 'created' |
|
if {[tomlish::dict::path::exists $datastructure $dottedsuper_refpath]} { |
|
#There is data so it must have been created as a keyval |
|
set msg "Path $dottedsuper_refpath for dotted key $dottedkeyname already has data but doesn't appear to be a table (keycollision) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here |
|
#dict set tablenames_info $dottedsuper_refpath ttype unknown_table ;#REVIEW |
|
dict set tablenames_info $dottedsuper_refpath ttype unknown_dotted ;#REVIEW |
|
|
|
#see note above re dotted keys insert into already defined table - we need to 'define' all the dotted supers in this block |
|
lappend dottedtables_defined $dottedsuper_refpath |
|
|
|
#ensure empty tables are still represented in the datastructure |
|
tomlish::dict::path::set_endpoint datastructure $dottedsuper_refpath {} ;#set to empty subdict |
|
} else { |
|
#added for fixed assumption |
|
set ttype [dict get $tablenames_info $dottedsuper_refpath ttype] |
|
set definedstate [dictn getdef $tablenames_info [list $dottedsuper_refpath tdefined] NULL] |
|
switch -- $ttype { |
|
dottedkey_table - unknown_dotted { |
|
#'created' as dotted - but make sure it's from this header section - i.e defined not set |
|
if {$definedstate ne "NULL"} { |
|
#collision with some other dottedkey |
|
set msg "Table at $dottedsuper_refpath represented by dottedkey $dottedkeyname has been 'defined' elsewhere (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
} |
|
itable { |
|
#itables are immediately defined |
|
set msg "Table at $dottedsuper_refpath represented by dottedkey $dottedkeyname has been 'defined' as itable (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
default { |
|
#header_table, header_tablearray or unknown_header |
|
#is header_tablearray any different from header_table in this context? |
|
#we don't set tdefined for tablearray anyway - so should be ok here. |
|
if {$definedstate ne "NULL"} { |
|
set msg "Table at $dottedsuper_refpath represented by dottedkey $dottedkeyname has been 'defined' in a header (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
#dottedtable being 2nd last segment was for original assumption - todo - tidy up? we are duplicating the logic above |
|
#review - any need/advantage to treat 2nd to last key any different from other supers? ie D in a.b.c.D.key=1 |
|
#no need for 'unknown_dotted' vs 'dottedkey_table' ?? |
|
if {[llength $dottedtable_bag] == 1} { |
|
set dottedtable [lindex $dottedtable_bag 0] |
|
set dottedkey_refpath [list {*}$dottedsuper_refpath "@@$dottedtable"] |
|
#our dotted key is attempting to define a table |
|
if {![dict exists $tablenames_info $dottedkey_refpath ttype]} { |
|
#first one - but check datastructure for collisions |
|
if {[tomlish::dict::path::exists $datastructure $dottedkey_refpath]} { |
|
set msg "Path $dottedkey_refpath for dotted key $dottedkeyname already has data but doesn't appear to be a table (keycollision) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#'create' the table |
|
dict set tablenames_info $dottedkey_refpath ttype dottedkey_table |
|
#don't actually set 'defined' here.. use the end of TABLE record to close them off by looking at this list |
|
tomlish::dict::path::set_endpoint datastructure $dottedkey_refpath {} |
|
lappend dottedtables_defined $dottedkey_refpath |
|
|
|
# |
|
} else { |
|
#exists - but might be from another dottedkey within the current header section |
|
#the table is open for adding keys until the next 'header' section ([tablename] / [[tablearray]]) |
|
#check for 'defined' closed (or just existence) |
|
set ttype [dict get $tablenames_info $dottedkey_refpath ttype] |
|
set definedstate [dictn getdef $tablenames_info [list $dottedkey_refpath tdefined] NULL] |
|
switch -- $ttype { |
|
dottedkey_table - unknown_dotted { |
|
#'created' as dotted - but make sure it's from this header section - i.e defined not set |
|
if {$definedstate ne "NULL"} { |
|
#collision with some other dottedkey |
|
set msg "Table at $dottedkey_refpath represented by dottedkey $dottedkeyname has been 'defined' elsewhere (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
} |
|
itable { |
|
#itables are immediately defined |
|
set msg "Table at $dottedkey_refpath represented by dottedkey $dottedkeyname has been 'defined' as itable (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
default { |
|
#header_table, header_tablearray or unknown_header |
|
#is header_tablearray any different from header_table in this context? |
|
#we don't set tdefined for tablearray anyway - so should be ok here. |
|
if {$definedstate ne "NULL"} { |
|
set msg "Table at $dottedkey_refpath represented by dottedkey $dottedkeyname has been 'defined' in a header (table redefinition) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
} |
|
} |
|
} |
|
} else { |
|
set dottedkey_refpath $dottedsuper_refpath |
|
} |
|
#assert - dottedkey represents a key val pair that can be added |
|
|
|
|
|
set fullkey_refpath [list {*}$dottedkey_refpath @@$leaf_key] |
|
if {[tomlish::dict::path::exists $datastructure $fullkey_refpath]} { |
|
set msg "Duplicate key. The key (path $fullkey_refpath) already exists at this level in the toml data. The toml data is not valid." |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
|
|
#set keyval_dict [_get_keyval_value $element] |
|
lassign [_get_keyval_value $element] _ keyval_dict _ sub_tablenames_info |
|
|
|
|
|
#keyval_dict is either a {type <tomltag> value <whatever>} |
|
#or the result from parsing an arbitrary dict from an inline table - which could theoretically look the same at the topmost level |
|
#punk::dict::is_typeval can distinguish |
|
tomlish::log::debug "_process_tomlish_dottedkey>>> context:$context_refpath dottedkey $dottedkeyname kv: $keyval_dict" |
|
tomlish::dict::path::set_endpoint datastructure $fullkey_refpath $keyval_dict |
|
|
|
#remove ? |
|
#if {![tomlish::dict::is_typeval $keyval_dict]} { |
|
# #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys |
|
# # inner structure will contain {type <tag> value <etc>} if all leaves are not empty ITABLES |
|
# ##set tkey [list {*}$norm_segments {*}$all_dotted_keys] |
|
|
|
# #by not creating a tablenames_info record - we effectively make it closed anyway? |
|
# #it should be detected as a key |
|
# #is there any need to store tablenames_info for it?? |
|
# #REVIEW |
|
|
|
# ##TODO - update? |
|
# #dictn incr tablenames_info [list $tkey seencount] |
|
# ##if the keyval_dict is not a simple type x value y - then it's an inline table ? |
|
# ##if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. |
|
# #dictn set tablenames_info [list $tkey closed] 1 |
|
#} |
|
return [dict create dottedtables_defined $dottedtables_defined] |
|
} |
|
#tomlish::dict::from_tomlish is a *basic* programmatic datastructure for accessing the data. |
|
# produce a dictionary of keys and values from a tomlish tagged list. |
|
# dict::from_tomlish is primarily for read access to toml data. |
|
#Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, |
|
# so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. |
|
# creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. |
|
#A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. |
|
# |
|
|
|
#within an ARRAY, we store a list of items such as plain dicts (possibly empty) and {type <tag> value <val>} for simple types |
|
#(ARRAYS can be mixed type) |
|
#This means our dict structure should have only ARRAY and simple types which need to be in {type <tag> value <val>} form |
|
#A dict within an array encodeded as a type ITABLE value <etc> should also parse - but is the unpreferred form - REVIEW test? |
|
|
|
#Namespacing? |
|
#ie note the difference: |
|
#[Data] |
|
#temp = { cpu = 79.5, case = 72.0} |
|
# versus |
|
#[Data] |
|
#temps = [{cpu = 79.5, case = 72.0}] |
|
proc from_tomlish {tomlish} { |
|
package require dictn |
|
|
|
#keep track of which tablenames have already been directly defined, |
|
# so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' |
|
#Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. |
|
#we don't error out just because a previous tablename segment has already appeared. |
|
|
|
#Declaring, Creating, and Defining Tables |
|
#https://github.com/toml-lang/toml/issues/795 |
|
#(update - only Creating and Defining are relevant terminology) |
|
|
|
#review |
|
#tablenames_info keys ttype created, tdefined, createdby, definedby, closedby ??? review keys |
|
# [tname] = header_table [[tname]] = header_tablearray |
|
|
|
#consider the following 2 which are legal: |
|
#[table] #'table' created, defined=open type header_table |
|
#x.y = 3 |
|
#[table.x.z] #'table' tdefined=closed closedby={header_table table.x.z}, 'table.x' created, 'table.x.z' created tdefined=open tdefinedby={header_table table.x.z} |
|
#k= 22 |
|
# #'table.x.z' tdefined=closed closedby={eof eof} |
|
|
|
#equivalent datastructure |
|
|
|
#[table] #'table' created, tdefined=open definedby={header_table table} |
|
#[table.x] #'table' tdefined=closed closedby={header_table table.x}, 'table.x' created tdefined=open definedby={header_table table.x} |
|
#y = 3 |
|
#[table.x.z] #'table.x' tdefined=closed closedby={header_table table.x.z}, 'table.x.z' created tdefined=open definedby={header_table table.x.z} |
|
#k=22 |
|
|
|
#illegal |
|
#[table] #'table' created and tdefined=open |
|
#x.y = 3 #'table.x' created first keyval pair tdefined=open definedby={keyval x.y = 3} |
|
#[table.x.y.z] #'table' tdefined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created |
|
#k = 22 |
|
# |
|
## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) |
|
|
|
#illegal |
|
#[table] |
|
#x.y = {p=3} |
|
#[table.x.y.z] |
|
#k = 22 |
|
## we should fail because y is an inline table which is closed to further entries |
|
|
|
#note: it is not safe to compare normalized tablenames using join! |
|
# e.g a.'b.c'.d is not the same as a.b.c.d |
|
# instead compare {a b.c d} with {a b c d} |
|
# Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. |
|
#'a.b'.'c.d.e' vs 'a.b.c'.'d.e' |
|
#we need to normalize the tablenames seen so that {"x\ty"} matches {"x<rawtab>y"} |
|
|
|
|
|
|
|
if {[uplevel 1 [list info exists tablenames_info]]} { |
|
upvar tablenames_info tablenames_info |
|
} else { |
|
set tablenames_info [dict create] ;#keyed on tablepath each of which is an @@path such as {@@config @@subgroup @@etc} (corresponding to config.subgroup.etc) |
|
#also has non @@ indexes which are list indexes as taken by tcl list commands (int or end-1 etc) |
|
#value is a dict with keys: ttype, tdefined |
|
} |
|
|
|
|
|
log::info "---> dict::from_tomlish processing '$tomlish'<<<" |
|
set items $tomlish |
|
|
|
foreach lst $items { |
|
if {[lindex $lst 0] ni $::tomlish::tags} { |
|
error "supplied list does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" |
|
} |
|
} |
|
|
|
if {[lindex $tomlish 0] eq "TOMLISH"} { |
|
#ignore TOMLISH tag at beginning |
|
set items [lrange $tomlish 1 end] |
|
} |
|
|
|
set datastructure [dict create] |
|
set dottedtables_defined [list] |
|
foreach item $items { |
|
set tag [lindex $item 0] |
|
#puts "...> item:'$item' tag:'$tag'" |
|
switch -exact -- $tag { |
|
KEY - DQKEY - SQKEY - INT - FLOAT - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - STRING - LITERAL { |
|
#why would we get individual key item as opposed to DOTTEDKEY? |
|
error "tomlish::dict::from_tomlish error: invalid tag: $tag. At the toplevel, from_tomlish can only process WS NEWLINE COMMENT and compound elements DOTTEDKEY TABLE TABLEARRAY ITABLE MULTILITERAL MULTISTRING" |
|
} |
|
DOTTEDKEY { |
|
#toplevel dotted key |
|
set dkinfo [_process_tomlish_dottedkey $item] |
|
lappend dottedtables_defined {*}[dict get $dkinfo dottedtables_defined] |
|
#at any level - we don't expect any more DOTTEDKEY records in a tomlish structure after TABLE or TABLEARRAY are encountered |
|
#as those records should encapsulate their own dottedkeys |
|
|
|
} |
|
TABLEARRAY { |
|
#close off any dottedtables_defined created by dottedkeys at this level |
|
foreach dtablepath $dottedtables_defined { |
|
dict set tablenames_info $dtablepath tdefined closed |
|
} |
|
set dottedtables_defined [list] ;#for closing off at end by setting 'defined' |
|
|
|
set tablearrayname [lindex $item 1] |
|
tomlish::log::debug "---> tomlish::dict::from_tomlish processing item TABLENAME (name: $tablearrayname): $item" |
|
set norm_segments [::tomlish::toml::tablename_split $tablearrayname true] ;#true to normalize |
|
#we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. |
|
#tablearrayname is likely to appear multiple times - so unlike a TABLE we don't check for 'defined' for the full name as an indicator of a problem |
|
set supertable [list] |
|
############## |
|
# [[a.b.c.d]] |
|
# norm_segments = {a b c d} |
|
#check a {a b} {a b c} <---- supertables of a.b.c.d |
|
############## |
|
set refpath [list] ;#e.g @@j1 @@j2 1 @@k1 end |
|
foreach normseg [lrange $norm_segments 0 end-1] { |
|
lappend supertable $normseg |
|
lappend refpath @@$normseg |
|
if {![dict exists $tablenames_info $refpath ttype]} { |
|
#supertable with this path doesn't yet exist |
|
if {[tomlish::dict::path::exists $datastructure $refpath]} { |
|
#There is data though - so it must have been created as a keyval |
|
set msg "Supertable [join $supertable .] of tablearray name $tablearrayname already has data but doesn't appear to be a table - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
#test: datastructure_tablearray_supertable_keycollision |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} else { |
|
#here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here |
|
#review - we can't later specify as tablearray so should just set ttype to header_table even though it's being created |
|
# because of a tablearray header? |
|
#By setting ttype to something other than table_header we can provide more precise errorCode/msg ?? |
|
dict set tablenames_info $refpath ttype unknown_header |
|
#ensure empty tables are still represented in the datastructure |
|
dict set datastructure {*}$supertable [list] |
|
} |
|
} else { |
|
#REVIEW!! |
|
# what happens with from_toml {[[a.b.c]]} {[[a.b]]} ??? |
|
#presumed that a and a.b were 'created' as tables (supertables of tablearray at a.b.c) and can't now be redefined as tablearrays |
|
|
|
#supertable has already been created - and may be defined - but even if defined we can add subtables unless it is of type itable |
|
#but if it's a tablearray - we need to point to the most 'recently defined table element of the array' |
|
#(last member of that array - need to check type? allowed to have non-table elements ie nonhomogenous??) |
|
set supertype [dict get $tablenames_info $refpath ttype] |
|
if {$supertype eq "header_tablearray"} { |
|
#exercised by toml-tests: |
|
# valid/table/array-table-array |
|
# valid/table/array-nest |
|
|
|
#puts stdout "todict!!! TABLEARRAY nesting required for supertable [join $supertable .]" |
|
|
|
#'refer' to the appropriate element in existing array |
|
set arrdata [tomlish::dict::path::get $datastructure [list {*}$refpath @@value]] |
|
set idx [expr {[llength $arrdata]-1}] |
|
if {$idx < 0} { |
|
#existing tablearray should have at least one entry even if empty (review) |
|
set msg "reference to empty tablearray?" |
|
return -code error -errorcode {TOMLISH STRUCTURE REFTOEMPTYTABLEARRAY} $msg |
|
} |
|
lappend refpath $idx |
|
} |
|
} |
|
} |
|
# |
|
#puts "TABLE supertable refpath $refpath" |
|
lappend refpath @@[lindex $norm_segments end] |
|
tomlish::log::debug "TABLEARRAY refpath $refpath" |
|
set tablearray_refpath $refpath |
|
|
|
|
|
if {![dict exists $tablenames_info $tablearray_refpath ttype]} { |
|
#first encounter of this tablearrayname |
|
if {[tomlish::dict::path::exists $datastructure $tablearray_refpath]} { |
|
#e.g from_toml {a=1} {[[a]]} |
|
set msg "Cannot create tablearray name $tablearrayname. Key already has data but key doesn't appear to be a table (keycollision) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
#test: datastructure_tablearray_direct_keycollision_error |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#no collision - we can create the tablearray and the array in the datastructure |
|
dict set tablenames_info $tablearray_refpath ttype header_tablearray |
|
#dict set datastructure {*}$norm_segments [list type ARRAY value {}] |
|
#create array along with empty array-item at position zero |
|
tomlish::dict::path::set_endpoint datastructure $tablearray_refpath [list type ARRAY value {{}}] |
|
set arrayitem_refpath [list {*}$tablearray_refpath 0] |
|
#set ARRAY_ELEMENTS [list] |
|
} else { |
|
#we have an existing tablenames_info record for this path - but is it a tablearray? |
|
set ttype [dict get $tablenames_info $tablearray_refpath ttype] |
|
if {$ttype ne "header_tablearray"} { |
|
#header_table or itable |
|
switch -- $ttype { |
|
itable {set ttypename itable} |
|
header_table {set ttypename table} |
|
dottedkey_table {set ttypename dottedkey_table} |
|
unknown_header - unknown_dotted { |
|
#table was created e.g as supertable - but not specifically a tablearray |
|
#violates ordering - return specific test error |
|
set msg "Table $tablearrayname referenced as supertable before tablearray defined (ordering)" |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEARRAYORDERING} $msg |
|
} |
|
default {error "unrecognised type $ttype - expected header_table or itable"} |
|
} |
|
set msg "tablearray name $tablearrayname already appears to be already created as '$ttypename' not tablearray - invalid?" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#EXISTING tablearray |
|
#add to array |
|
#error "add_to_array not implemented" |
|
#{type ARRAY value <list>} |
|
#set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments value] |
|
tomlish::log::debug ">>>>pre-extend-array dict::from_tomlish datastructure: $datastructure" |
|
set existing_array [tomlish::dict::path::get $datastructure [list {*}$tablearray_refpath @@value]] |
|
set arrayitem_refpath [list {*}$tablearray_refpath [llength $existing_array]] |
|
tomlish::dict::path::lappend datastructure $tablearray_refpath {} |
|
tomlish::log::debug ">>>>post-extend-array dict::from_tomlish datastructure: $datastructure" |
|
} |
|
|
|
|
|
#set object [dict create] ;#array context equivalent of 'datastructure' |
|
|
|
#add to ARRAY_ELEMENTS and write back in to datastructure. |
|
foreach element [lrange $item 2 end] { |
|
set type [lindex $element 0] |
|
tomlish::log::debug "----> todict processing $tag subitem $type processing contained element $element" |
|
switch -exact -- $type { |
|
DOTTEDKEY { |
|
set dkinfo [_process_tomlish_dottedkey $element $arrayitem_refpath] |
|
lappend dottedtables_defined {*}[dict get $dkinfo dottedtables_defined] |
|
} |
|
NEWLINE - COMMENT - WS { |
|
#ignore |
|
} |
|
TABLE { |
|
#we *perhaps* should be able to process tablearray subtables either as part of the tablearray record, or independently. |
|
#(or even a mixture of both, although that is somewhat an edge case, and of limited utility) |
|
#[[fruit]] |
|
#x=1 |
|
# [fruit.metadata] |
|
# [fruit.otherdata] |
|
|
|
#when processing a dict destined for the above - the tomlish generator (e.g from_dict) |
|
#should create as 1 or 3 records (but could create 2 records if there was an unrelated table in between the subtables) |
|
#choices: all in tablearray record, tablearray + 1 or 2 table records. |
|
# |
|
#We are going the other way here - so we just need to realise that the list of tables 'belonging' to this tablearray might not be complete. |
|
# |
|
#the subtable names must be prefixed with the tablearray - we should validate that for any contained TABLE records |
|
|
|
#The default mechanism is for from_dict to produce tomlish with separate TABLE records - and use the ordering to determine membership |
|
#If we were to support wrapping the TABLE records within a TABLEARRAY - we should also support TABLEARRAY within TABLEARRAY |
|
# ----------------------------------------------------------------------- |
|
#Implementing this is not critical for standard encoding/decoding of toml! |
|
#It would be an alternative form for the tomlish intermediate form - and adds complexity. |
|
# |
|
#The upside would be to provide a function for sorting/rearranging in the tomlish form if all records were fully encapsulated. |
|
#A possible downside is that unrelated tables placed before a tablearray is fully defined (within the tablearray definition area in toml) |
|
# would have to be re-positioned before or after the encapsulated tablearray record. |
|
# While unrelated tables in such a position aren't a recommended way to write toml, they appear to be valid |
|
# and preserving the author's ordering is a goal of the basic encoding/decoding operations if no explicit sorting/reordering was requested. |
|
# |
|
#Consider an 'encapsulate' method to this (tomlish -> tomlish) |
|
# ----------------------------------------------------------------------- |
|
#todo |
|
error "tomlish::dict::from_tomlish TABLE element within TABLEARRAY not handled - TABLE should be a separate tomlish record" |
|
} |
|
default { |
|
error "tomlish::dict::from_tomlish Sub element of type '$type' not understood in tablearray context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" |
|
} |
|
} |
|
} |
|
|
|
#end of TABLEARRAY record - equivalent of EOF or next header - close off the dottedtables |
|
foreach dtablepath $dottedtables_defined { |
|
dict set tablenames_info $dtablepath tdefined closed |
|
} |
|
} |
|
TABLE { |
|
#close off any dottedtables_defined created by dottedkeys at this level |
|
foreach dtablepath $dottedtables_defined { |
|
dict set tablenames_info $dtablepath tdefined closed |
|
} |
|
set tablename [lindex $item 1] |
|
set dottedtables_defined [list] ;#for closing off at end by setting 'defined' |
|
#As our TABLE record contains all it's child DOTTEDKEY records - this should be equivalent to setting them as defined at EOF or next header. |
|
|
|
#----------------------------------------------------------------------------------- |
|
#default assumption - our reference is to the main tablenames_info and datastructure |
|
#Will need to append keys appropriately if we have recursed |
|
#----------------------------------------------------------------------------------- |
|
|
|
log::debug "---> tomlish::dict::from_tomlish processing item TABLE (name: $tablename): $item" |
|
set norm_segments [::tomlish::toml::tablename_split $tablename true] ;#true to normalize |
|
|
|
|
|
|
|
set name_segments [::tomlish::toml::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d |
|
#results of tablename_split <name> 0 are 'raw' - ie some segments may be enclosed in single or double quotes. |
|
|
|
|
|
set supertable [list] |
|
############## |
|
# [a.b.c.d] |
|
# norm_segments = {a b c d} |
|
#check a {a b} {a b c} <---- supertables of a.b.c.d |
|
############## |
|
|
|
############## |
|
#[[a]] |
|
#[a.b] #supertable a is tablearray |
|
############## |
|
|
|
#also consider |
|
############## |
|
# [[a.b]] |
|
# [a.b.c.d] #supertable a is a table, supertable a.b is tablearray, supertable a.b.c is elementtable |
|
############## |
|
set refpath [list] ;#e.g @@j1 @@j2 1 @@k1 end |
|
foreach normseg [lrange $norm_segments 0 end-1] { |
|
lappend supertable $normseg |
|
lappend refpath @@$normseg |
|
if {![dict exists $tablenames_info $refpath ttype]} { |
|
#supertable with this path doesn't yet exist |
|
if {[tomlish::dict::path::exists $datastructure $refpath]} { |
|
#There is data though - so it must have been created as a keyval |
|
set msg "Supertable [join $supertable .] of table name $tablename (path $refpath) already has data but doesn't appear to be a table (keycollision) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here |
|
#we also don't know whether it's a table or a dottedkey_table (not allowed to be tablearray - out of order?) |
|
dict set tablenames_info $refpath ttype unknown_header |
|
#ensure empty tables are still represented in the datastructure |
|
#dict set datastructure {*}$supertable [list] |
|
tomlish::dict::path::set_endpoint datastructure $refpath {} |
|
} else { |
|
#supertable has already been created - and may be defined - but even if defined we can add subtables unless it is of type itable |
|
if {[dict get $tablenames_info $refpath ttype] eq "header_tablearray"} { |
|
#'refer' to the appropriate element in existing array |
|
set arrdata [tomlish::dict::path::get $datastructure [list {*}$refpath @@value]] |
|
set idx [expr {[llength $arrdata]-1}] |
|
if {$idx < 0} { |
|
#existing tablearray should have at least one entry even if empty (review) |
|
set msg "reference to empty tablearray?" |
|
return -code error -errorcode {TOMLISH STRUCTURE REFTOEMPTYTABLEARRAY} $msg |
|
} |
|
lappend refpath $idx |
|
} else { |
|
#?? |
|
if {[dictn getdef $tablenames_info [list $refpath tdefined] NULL] eq "NULL"} { |
|
} else { |
|
} |
|
} |
|
} |
|
} |
|
#puts "TABLE supertable refpath $refpath" |
|
lappend refpath @@[lindex $norm_segments end] |
|
tomlish::log::info "TABLE refpath $refpath" |
|
set table_refpath $refpath |
|
|
|
|
|
|
|
|
|
#table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename |
|
# - or may have existing data from a keyval |
|
if {![dict exists $tablenames_info $table_refpath ttype]} { |
|
if {[tomlish::dict::path::exists $datastructure $table_refpath]} { |
|
#e.g from_toml {a=1} {[a]} |
|
set msg "Cannot create table name $tablename. Key already has data but key doesn't appear to be a table (keycollision) - invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
#test: datastructure_tablename_keyval_collision_error |
|
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg |
|
} |
|
#no data or previously created table |
|
dict set tablenames_info $table_refpath ttype header_table |
|
|
|
#We are 'defining' this table's keys and values here (even if empty) |
|
#dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here |
|
tomlish::dict::path::set_endpoint datastructure $table_refpath {} ;#ensure table still represented in datastructure even if we add no keyvals here |
|
} else { |
|
if {[dict get $tablenames_info $table_refpath ttype] eq "header_tablearray"} { |
|
#e.g tomltest invalid/table/duplicate-table-array2 |
|
#[[tbl]] |
|
#[tbl] |
|
set msg "Table name $tablename has already been created as a tablearray. Invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} else { |
|
#any other type tdefined is a problem |
|
set T_DEFINED [dictn getdef $tablenames_info [list $table_refpath tdefined] NULL] |
|
if {$T_DEFINED ne "NULL" } { |
|
#our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path |
|
set msg "Table name $tablename has already been defined in the toml data. Invalid" |
|
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] |
|
#raise a specific type of error for tests to check |
|
return -code error -errorcode {TOMLISH STRUCTURE TABLEREDEFINED} $msg |
|
} |
|
} |
|
} |
|
dict set tablenames_info $table_refpath tdefined open |
|
|
|
#now add the contained elements |
|
foreach element [lrange $item 2 end] { |
|
set type [lindex $element 0] |
|
log::debug "----> todict processing $tag subitem $type processing contained element $element" |
|
switch -exact -- $type { |
|
DOTTEDKEY { |
|
set dkinfo [_process_tomlish_dottedkey $element $table_refpath] |
|
lappend dottedtables_defined {*}[dict get $dkinfo dottedtables_defined] |
|
} |
|
NEWLINE - COMMENT - WS { |
|
#ignore |
|
} |
|
default { |
|
error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" |
|
} |
|
} |
|
} |
|
|
|
#end of TABLE record - equivalent of EOF or next header - close off the dottedtables |
|
foreach dtablepath $dottedtables_defined { |
|
dict set tablenames_info $dtablepath tdefined closed |
|
} |
|
} |
|
ITABLE { |
|
#As there is no other mechanism to create tables within an ITABLE than dottedkeys |
|
# and ITABLES are fully defined/enclosed - we can rely on key collision and don't need to track dottedtables_defined - REVIEW. |
|
set dottedtables_defined [list] |
|
#SEP??? |
|
#ITABLE only ever on RHS of = or inside ARRAY |
|
set datastructure [dict create] |
|
set tablenames_info [dict create] |
|
|
|
foreach element [lrange $item 1 end] { |
|
set type [lindex $element 0] |
|
log::debug "----> tododict processing $tag subitem $type processing contained element $element" |
|
switch -exact -- $type { |
|
DOTTEDKEY { |
|
set dkinfo [_process_tomlish_dottedkey $element] |
|
} |
|
NEWLINE - COMMENT - WS { |
|
#ignore |
|
} |
|
default { |
|
error "Sub element of type '$type' not understood in ITABLE context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" |
|
} |
|
} |
|
} |
|
} |
|
ARRAY { |
|
#arrays in toml are allowed to contain mixtures of types |
|
set datastructure [list] |
|
log::debug "--> processing array: $item" |
|
|
|
foreach element [lrange $item 1 end] { |
|
set type [lindex $element 0] |
|
log::debug "----> tododict processing $tag subitem $type processing contained element $element" |
|
switch -exact -- $type { |
|
INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL { |
|
set value [lindex $element 1] |
|
lappend datastructure [list type $type value $value] |
|
} |
|
STRING { |
|
#JJJJ |
|
#don't unescape string! |
|
set value [lindex $element 1] |
|
#lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] |
|
lappend datastructure [list type $type value $value] |
|
} |
|
LITERAL { |
|
set value [lindex $element 1] |
|
lappend datastructure [list type $type value $value] |
|
} |
|
ITABLE { |
|
#anonymous table |
|
#lappend datastructure [list type $type value [::tomlish::to_dict [list $element]]] |
|
lappend datastructure [::tomlish::dict::from_tomlish [list $element]] ;#store itables within arrays as raw dicts (possibly empty) |
|
} |
|
TABLE - TABLEARRAY { |
|
#invalid? shouldn't be output from from_dict - but could manually be constructed as such? review |
|
#doesn't make sense as table needs a name? |
|
#take as synonym for ITABLE? |
|
error "tomlish::dict::from_tomlish $type within array unexpected" |
|
} |
|
ARRAY - MULTISTRING - MULTILITERAL { |
|
#set value [lindex $element 1] |
|
lappend datastructure [list type $type value [::tomlish::dict::from_tomlish [list $element]]] |
|
} |
|
WS - SEP - NEWLINE - COMMENT { |
|
#ignore whitespace, commas, newlines and comments |
|
} |
|
default { |
|
error "tomlish::dict::from_tomlish Unexpected value type '$type' found in array" |
|
} |
|
} |
|
} |
|
} |
|
MULTILITERAL { |
|
#triple squoted string |
|
#first newline stripped only if it is the very first element |
|
#(ie *immediately* following the opening delims) |
|
#All whitespace other than newlines is within LITERALPARTS |
|
# ------------------------------------------------------------------------- |
|
#todo - consider extension to toml to allow indent-aware multiline literals |
|
# how - propose as issue in toml github? Use different delim? e.g ^^^ ? |
|
#e.g |
|
# xxx=?'''abc |
|
# def |
|
# etc |
|
# ''' |
|
# - we would like to trimleft each line to the column following the opening delim |
|
# ------------------------------------------------------------------------- |
|
|
|
log::debug "---> todict processing multiliteral: $item" |
|
set parts [lrange $item 1 end] |
|
if {[lindex $parts 0 0] eq "NEWLINE"} { |
|
set parts [lrange $parts 1 end] ;#skip it |
|
} |
|
for {set idx 0} {$idx < [llength $parts]} {incr idx} { |
|
set element [lindex $parts $idx] |
|
set type [lindex $element 0] |
|
switch -exact -- $type { |
|
LITERALPART { |
|
append stringvalue [lindex $element 1] |
|
} |
|
NEWLINE { |
|
set val [lindex $element 1] |
|
if {$val eq "lf"} { |
|
append stringvalue \n |
|
} else { |
|
append stringvalue \r\n |
|
} |
|
} |
|
default { |
|
error "Unexpected value type '$type' found in multistring" |
|
} |
|
} |
|
} |
|
set datastructure $stringvalue |
|
} |
|
MULTISTRING { |
|
#triple dquoted string |
|
log::debug "---> tomlish::dict::from_tomlish processing multistring: $item" |
|
set stringvalue "" |
|
set idx 0 |
|
set parts [lrange $item 1 end] |
|
for {set idx 0} {$idx < [llength $parts]} {incr idx} { |
|
set element [lindex $parts $idx] |
|
set type [lindex $element 0] |
|
#We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted |
|
switch -exact -- $type { |
|
STRING { |
|
#todo - do away with STRING ? |
|
#we don't build MULTISTRINGS containing STRING - but should we accept it? |
|
tomlish::log::warn "double quoting a STRING found in MULTISTRING - should be STRINGPART?" |
|
#append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" |
|
append stringvalue "\"[lindex $element 1]\"" |
|
} |
|
STRINGPART { |
|
#JJJ |
|
#don't unescape string |
|
#append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] |
|
append stringvalue [lindex $element 1] |
|
} |
|
CONT { |
|
#When the last non-whitespace character on a line is an unescaped backslash, |
|
#it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter |
|
# review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? |
|
set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] |
|
if {$next_nl == -1} { |
|
#last (or first and only) line |
|
return -code error -errorcode {TOMLISH SYNTAX INVALIDESCAPE} "Invalid whitespace escape - not a valid continuation position" |
|
#set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] |
|
#if {$non_ws >= 0} { |
|
# #append stringvalue "\\" |
|
# return -code error -errorcode {TOMLISH SYNTAX INVALIDESCAPE} "Invalid whitespace escape - not a valid continuation position" |
|
#} else { |
|
# #skip over ws without emitting |
|
# set idx [llength $parts] |
|
#} |
|
} else { |
|
set parts_til_nl [lrange $parts 0 $next_nl-1] |
|
set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] |
|
if {$non_ws >= 0} { |
|
#This CONT is invalid. If there had been a non-whitespace char directly following it, |
|
#it wouldn't have come through as a CONT token |
|
#Now that we see it isn't the last non-whitespace backslash on the line we can reject |
|
# as an invalid escape of space or tab |
|
#append stringvalue "\\" |
|
return -code error -errorcode {TOMLISH SYNTAX INVALIDESCAPE} "Invalid whitespace escape - not a valid continuation position" |
|
} else { |
|
#skip over ws on this line |
|
set idx $next_nl |
|
#then have to check each subsequent line until we get to first non-whitespace |
|
set trimming 1 |
|
while {$trimming && $idx < [llength $parts]} { |
|
set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] |
|
if {$next_nl == -1} { |
|
#last line |
|
set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] |
|
if {$non_ws >= 0} { |
|
set idx [expr {$non_ws -1}] |
|
} else { |
|
set idx [llength $parts] |
|
} |
|
set trimming 0 |
|
} else { |
|
set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] |
|
if {$non_ws >= 0} { |
|
set idx [expr {$non_ws -1}] |
|
set trimming 0 |
|
} else { |
|
set idx $next_nl |
|
#keep trimming |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
NEWLINE { |
|
#if newline is first element - it is not part of the data of a multistring |
|
if {$idx > 0} { |
|
set val [lindex $element 1] |
|
if {$val eq "lf"} { |
|
append stringvalue \n |
|
} else { |
|
append stringvalue \r\n |
|
} |
|
} |
|
} |
|
WS { |
|
append stringvalue [lindex $element 1] |
|
} |
|
default { |
|
error "Unexpected value type '$type' found in multistring" |
|
} |
|
} |
|
} |
|
set datastructure $stringvalue |
|
} |
|
WS - COMMENT - NEWLINE { |
|
#ignore |
|
} |
|
BOM { |
|
#this token is the unicode single char \uFFEF |
|
#It doesn't tell us what encoding was originally used (though toml should only accept UTF-8 files) |
|
#ignore at start - what about in other positions? |
|
} |
|
default { |
|
error "Unexpected tag '$tag' in Tomlish list '$tomlish'" |
|
} |
|
} |
|
} |
|
return $datastructure |
|
} |
|
} |
|
namespace eval tomlish::dict::path { |
|
#access tomlish dict structure |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
|
|
#access with path such as: @@k @@k 0 @@k end where dict keys marked with @@ and plain values are list indices into <list> in {type ARRAY value <list>} |
|
#leaf elements returned as structured {type <t> value <v>} |
|
proc get {dictval {path {}}} { |
|
if {$path eq ""} { |
|
return $dictval |
|
} |
|
::set data $dictval |
|
::set pathsofar [list] |
|
foreach p $path { |
|
::lappend pathsofar $p |
|
if {[string range $p 0 1] eq "@@"} { |
|
::set data [dict get $data [string range $p 2 end]] |
|
} else { |
|
if {![tomlish::dict::is_typeval $data]} { |
|
error "tomlish::dict::path::get error bad path $path. Attempt to access table as array at subpath $pathsofar." |
|
} |
|
if {[dict get $data type] ne "ARRAY"} { |
|
error "tomlish::dict::get error bad path $path. Subpath $pathsofar is not an array." |
|
} |
|
::set arrdata [dict get $data value] |
|
::set data [lindex $arrdata $p] |
|
} |
|
} |
|
return $data |
|
} |
|
proc exists {dictval path} { |
|
::set data $dictval |
|
::set pathsofar [list] |
|
::set exists 1 |
|
foreach p $path { |
|
::lappend pathsofar $p |
|
if {[string range $p 0 1] eq "@@"} { |
|
::set k [string range $p 2 end] |
|
if {![dict exists $data $k]} { |
|
return 0 |
|
} |
|
::set data [dict get $data $k] |
|
} else { |
|
if {![tomlish::dict::is_typeval $data]} { |
|
return 0 |
|
} |
|
if {[dict get $data type] ne "ARRAY"} { |
|
return 0 |
|
} |
|
::set arrdata [dict get $data value] |
|
::set intp [tomlish::system::lindex_resolve_basic $arrdata $p] ;#handle index math (end-1 etc) |
|
if {$intp == -1} { |
|
#out of bounds |
|
return 0 |
|
} |
|
::set data [lindex $arrdata $p] |
|
} |
|
} |
|
return $exists |
|
} |
|
|
|
#a restricted analogy of 'dictn set' |
|
#set 'endpoints' - don't create intermediate paths |
|
# can replace an existing dict with another dict |
|
# can create a key when key at tail end of path is a key (ie @@keyname, not index) |
|
# can replace an existing {type <type> value <val} entry, when new val is also a {type <type> value <val>} |
|
# with added restriction that if <type> is ARRAY the new <type> must also be ARRAY |
|
proc set_endpoint {dictvariable path value} { |
|
upvar $dictvariable dict_being_edited |
|
::set data $dict_being_edited |
|
::set pathsofar [list] |
|
if {!([tomlish::dict::is_typeval $value] || [tomlish::dict::is_typeval_dict $value 0])} { |
|
#failed check of supplied value as basic type, or a sub-dict structure (not checking arrays) |
|
error "tomlish::dict::path::set_endpoint error - value must already be in the tomlish form {type <tag> value <value>} or be a dict with such forms as leaves" |
|
} |
|
foreach p $path { |
|
::lappend pathsofar $p |
|
if {[string range $p 0 1] eq "@@"} { |
|
::set k [string range $p 2 end] |
|
|
|
#if {![dict exists $data $k]} { |
|
# error "tomlish::dict:path::set error bad path $path. Attempt to access nonexistent element at subpath $pathsofar." |
|
#} |
|
::set varname v[incr v] |
|
|
|
if {$pathsofar eq $path} { |
|
#see if endpoint of the path given already exists |
|
if {[dict exists $data $k]} { |
|
::set endpoint [dict get $data $k] |
|
if {[tomlish::dict::is_typeval $endpoint]} { |
|
set existing_tp [dict get $endpoint type] |
|
if {![tomlish::dict::is_typeval $value]} { |
|
error "tomlish::dict::path::set_endpoint error Unable to overwrite subpath '$pathsofar' which is of type $existing_tp with sub-dict. Supplied value not {type <type> value <value}" |
|
} |
|
#can switch types - except for arrays |
|
set val_tp [dict get $value type] |
|
if {$existing_tp eq "ARRAY" && $val_tp ne "ARRAY"} { |
|
error "tomlish::dict::path::set_endpoint error Unable to overwrite subpath '$pathsofar' which is of type $existing_tp with supplied type $val_tp." |
|
} |
|
} else { |
|
if {![tomlish::dict::is_typeval_dict $value]} { |
|
set val_tp [dict get $value type] |
|
error "tomlish::dict::path::set_endpoint error Cannot overwrite sub-dict (size: [dict size $endpoint]) at '$pathsofar' with type $val_tp" |
|
} |
|
} |
|
#temp debug |
|
tomlish::log::debug "overwriting [dict get $data $k]" |
|
} else { |
|
#new key at leaf - ok - value can be either simple key val or sub-dict |
|
} |
|
::set $varname $value |
|
dict set vdict $pathsofar $varname |
|
break |
|
} else { |
|
if {![dict exists $data $k]} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. Attempt to access nonexistent element at subpath '$pathsofar'." |
|
} |
|
} |
|
::set data [dict get $data $k] |
|
::set $varname $data |
|
dict set vdict $pathsofar $varname |
|
} else { |
|
# p is an index |
|
if {![tomlish::dict::is_typeval $data]} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. Attempt to access table as array at subpath '$pathsofar'." |
|
} |
|
if {[dict get $data type] ne "ARRAY"} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. Subpath '$pathsofar' is not an array." |
|
} |
|
::set varname v[incr v] |
|
if {$pathsofar eq $path} { |
|
if {[dict get $data type] ne "ARRAY"} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. Parent of subpath '$pathsofar' is not an array." |
|
} |
|
::set parentarray [dict get $data value] |
|
set idx [tomlish::system::lindex_resolve_basic $parentarray $p] |
|
if {$idx == -1} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. No existing element at '$p'" |
|
} |
|
::set endpoint [lindex $parentarray $p] |
|
if {[tomlish::dict::is_typeval $endpoint]} { |
|
if {![tomlish::dict::is_typeval $value]} { |
|
#attempt to replace plain typeval with a typeval dict |
|
error "tomlish::dict::path::set_endpoint error path '$path'. Cannot overwrite {type <t> val <v>} with sub-dict: $value" |
|
} |
|
switch -- [dict get $endpoint type] { |
|
ARRAY { |
|
#disallow overwriting array - unless given value is an ARRAY? REVIEW |
|
if {[dict get $value type] ne "ARRAY"} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. Cannot overwrite array with non-array: $value" |
|
} |
|
} |
|
default { |
|
# |
|
} |
|
} |
|
} else { |
|
#endpoint is a typeval dict not a plain typeval - only allow overwrite with a typeval dict |
|
if {![tomlish::dict::is_typeval_dict $value 0]} { |
|
error "tomlish::dict::path::set_endpoint error path '$path'. Cannot overwrite sub-dict (size: [dict size $endpoint]) with non sub-dict: $value" |
|
} |
|
} |
|
::set $varname $value |
|
dict set vdict $pathsofar $varname |
|
break |
|
} else { |
|
::set arrdata [dict get $data value] |
|
set idx [tomlish::system::lindex_resolve_basic $arrdata $p] |
|
if {$idx == -1} { |
|
error "tomlish::dict::path::set_endpoint error bad path '$path'. No existing element at $p" |
|
} |
|
::set data [lindex $arrdata $p] |
|
::set $varname $data |
|
dict set vdict $pathsofar $varname |
|
} |
|
} |
|
} |
|
#dict for {path varname} $vdict { |
|
# puts "$path $varname\n" |
|
# puts " '[::set $varname]'\n" |
|
# puts "" |
|
#} |
|
::set i 0 |
|
::set reverse [lreverse $vdict] |
|
foreach {varname path} $reverse { |
|
set newval [::set $varname] |
|
if {$i+2 == [llength $reverse]} { |
|
::set k [lindex $path end] |
|
::set k [string range $k 2 end] ;#first key is always @@something |
|
dict set dict_being_edited $k $newval |
|
#puts "--result $dict_being_edited" |
|
break |
|
} |
|
::set nextvarname [lindex $reverse $i+2] |
|
::set nextval [::set $nextvarname] |
|
::set k [lindex $path end] |
|
if {[string match @@* $k]} { |
|
#dict key |
|
#dict set $nextvarname $k $newval |
|
set_endpoint $nextvarname [list $k] $newval |
|
} else { |
|
#list index |
|
::set nextarr [dict get $nextval value] |
|
::lset nextarr $k $newval |
|
dict set $nextvarname value $nextarr |
|
} |
|
::incr i 2 |
|
} |
|
|
|
return $dict_being_edited |
|
|
|
} |
|
#path must be to a {type ARRAY value <list>} |
|
#REVIEW - how to lappend to deep mixed dict/array structure without rewriting whole datastructure? |
|
proc lappend {dictvariable path args} { |
|
upvar $dictvariable dict_being_edited |
|
::set data $dict_being_edited |
|
::set pathsofar [list] |
|
#::set newlist [list] |
|
::set v 0 |
|
::set vdict [dict create] |
|
foreach a $args { |
|
if {![string is dict $a]} { |
|
error "tomlish::dict::path::lappend error - lappended arguments must already be in the tomlish form {type <tag> value <value>} or be a dict with such forms as leaves" |
|
} |
|
} |
|
foreach p $path { |
|
::lappend pathsofar $p |
|
if {[string range $p 0 1] eq "@@"} { |
|
::set k [string range $p 2 end] |
|
if {![dict exists $data $k]} { |
|
error "tomlish::dict::path::lappend error bad path $path. Attempt to access nonexistent element at subpath $pathsofar." |
|
} |
|
::set varname v[incr v] |
|
|
|
if {$pathsofar eq $path} { |
|
#see if endpoint of the path given is an ARRAY |
|
::set endpoint [dict get $data $k] |
|
if {![tomlish::dict::is_typeval $endpoint]} { |
|
error "tomlish::dict::path::lappend error bad path $path. Attempt to access table as array at subpath $pathsofar." |
|
} |
|
if {[dict get $endpoint type] ne "ARRAY"} { |
|
error "tomlish::dict::path::lappend error bad path $path. Subpath $pathsofar is not an array." |
|
} |
|
::set arrdata [dict get $endpoint value] |
|
::lappend arrdata {*}$args |
|
dict set endpoint value $arrdata |
|
::set newlist $endpoint |
|
::set $varname $newlist |
|
dict set vdict $pathsofar $varname |
|
break |
|
} |
|
::set data [dict get $data $k] |
|
::set $varname $data |
|
dict set vdict $pathsofar $varname |
|
} else { |
|
if {![tomlish::dict::is_typeval $data]} { |
|
error "tomlish::dict::path::lappend error bad path $path. Attempt to access table as array at subpath $pathsofar." |
|
} |
|
if {[dict get $data type] ne "ARRAY"} { |
|
error "tomlish::dict::path::lappend error bad path $path. Subpath $pathsofar is not an array." |
|
} |
|
::set varname v[incr v] |
|
if {$pathsofar eq $path} { |
|
if {[dict get $data type] ne "ARRAY"} { |
|
error "tomlish::dict::path::lappend error bad path $path. Parent path is not an array." |
|
} |
|
::set parentarray [dict get $data value] |
|
::set idx [tomlish::system::lindex_resolve_basic $parentarray $p] |
|
if {$idx == -1} { |
|
error "tomlish::dict::path::lappend error bad path $path. Index $p does not exist." |
|
} |
|
::set endpoint [lindex $parentarray $p] |
|
if {[dict get $endpoint type] ne "ARRAY"} { |
|
error "tomlish::dict::path::lappend error bad path $path. Not an array." |
|
} |
|
|
|
::set arrdata [dict get $endpoint value] |
|
::lappend arrdata {*}$args |
|
dict set endpoint value $arrdata |
|
::set newlist $endpoint |
|
#::lset parentarray $p $newlist |
|
#set parentarray $newlist |
|
::set $varname $newlist |
|
dict set vdict $pathsofar $varname |
|
break |
|
} else { |
|
::set arrdata [dict get $data value] |
|
set idx [tomlish::system::lindex_resolve_basic $arrdata $p] |
|
if {$idx == -1} { |
|
error "tomlish::dict::path::lappend error bad path $path. Subpath $pathsofar, index $p does not exist." |
|
} |
|
::set data [lindex $arrdata $p] |
|
::set $varname $data |
|
dict set vdict $pathsofar $varname |
|
} |
|
} |
|
} |
|
#todo tomlish::log::debug ? |
|
#dict for {path varname} $vdict { |
|
# puts "$path $varname\n" |
|
# puts " [::set $varname]\n" |
|
# puts "" |
|
#} |
|
::set i 0 |
|
::set reverse [lreverse $vdict] |
|
foreach {varname path} $reverse { |
|
set newval [::set $varname] |
|
if {$i+2 == [llength $reverse]} { |
|
::set k [lindex $path end] |
|
::set k [string range $k 2 end] ;#first key is always @@something |
|
dict set dict_being_edited $k $newval |
|
#puts "--result $dict_being_edited" |
|
break |
|
} |
|
::set nextvarname [lindex $reverse $i+2] |
|
::set nextval [::set $nextvarname] |
|
::set k [lindex $path end] |
|
if {[string match @@* $k]} { |
|
#dict key |
|
set k [string range $k 2 end] |
|
dict set $nextvarname $k $newval |
|
} else { |
|
#list index |
|
::set nextarr [dict get $nextval value] |
|
::lset nextarr $k $newval |
|
dict set $nextvarname value $nextarr |
|
} |
|
::incr i 2 |
|
} |
|
return $dict_being_edited |
|
} |
|
} |
|
tcl::namespace::eval tomlish::to_dict { |
|
|
|
|
|
proc @@path {dictkeys} { |
|
lmap v $dictkeys {string cat @@ $v} |
|
} |
|
|
|
} |
|
|
|
|
|
tcl::namespace::eval tomlish::app { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::app}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
tcl::namespace::eval argdoc { |
|
proc test_suites {} { |
|
if {[package provide test::tomlish] eq ""} { |
|
return [list] |
|
} |
|
return [test::tomlish::SUITES] |
|
} |
|
} |
|
|
|
package require punk::args |
|
punk::args::define { |
|
@id -id ::tomlish::app::decoder |
|
@cmd -name tomlish::app::decoder -help\ |
|
"Read toml on stdin until EOF |
|
on error - returns non-zero exit code and writes error to |
|
the errorchannel. |
|
on success - returns zero exit code and writes JSON encoding |
|
of the data to the outputchannel. |
|
This decoder is intended to be compatble with toml-test." |
|
@leaders -min 0 -max 0 |
|
@opts |
|
-help -type none -help\ |
|
"Display this usage message" |
|
-inputchannel -default stdin |
|
-inputencoding -default "iso8859-1" -choicerestricted 0 -choices {utf-8 utf-16 iso8859-1} -help\ |
|
"configure encoding on input channel |
|
iso8859-1 is equivalent to binary encoding" |
|
-outputchannel -default stdout |
|
-errorchannel -default stderr |
|
@values -min 0 -max 0 |
|
} |
|
proc decoder {args} { |
|
set argd [punk::args::parse $args withid ::tomlish::app::decoder] |
|
set ch_input [dict get $argd opts -inputchannel] |
|
set ch_input_enc [dict get $argd opts -inputencoding] |
|
set ch_output [dict get $argd opts -outputchannel] |
|
set ch_error [dict get $argd opts -errorchannel] |
|
if {[dict exists $argd received -help]} { |
|
return [punk::args::usage -scheme info ::tomlish::app::decoder] |
|
} |
|
|
|
chan configure $ch_input -encoding $ch_input_enc |
|
#translation? |
|
chan configure $ch_input -translation lf ;# toml-test invalid/control tests we need to see raw CRs to reject them properly - auto translation won't do. |
|
|
|
#Just slurp it all - presumably we are not handling massive amounts of data on stdin. |
|
# - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. |
|
if {[catch { |
|
set inputdata [read $ch_input] |
|
if {$ch_input_enc eq "iso8859-1"} { |
|
set toml [tomlish::toml::from_binary $inputdata] |
|
} else { |
|
set toml $inputdata |
|
} |
|
} errM]} { |
|
puts stderr "read-input error: $errM" |
|
#toml-tests expect exit code 1 |
|
#e.g invalid/encoding/utf16-bom |
|
exit 1 ;#read error |
|
} |
|
try { |
|
set j [::tomlish::toml_to_typedjson $toml] |
|
} on error {em} { |
|
puts $ch_error "decoding failed: '$em'" |
|
exit 1 |
|
} |
|
puts -nonewline $ch_output $j |
|
exit 0 |
|
} |
|
|
|
package require punk::args |
|
punk::args::define { |
|
@id -id ::tomlish::app::encoder |
|
@cmd -name tomlish::app::encoder -help\ |
|
"Read JSON on input until EOF |
|
return non-zero exitcode if JSON data cannot be converted to |
|
a valid TOML representation. |
|
return zero exitcode and TOML data on output if JSON data can |
|
be converted. |
|
This encoder is intended to be compatible with toml-test." |
|
@leaders -min 0 -max 0 |
|
@opts |
|
-help -type none -help \ |
|
"Display this usage message" |
|
-restrict_barekeys -default 0 -help\ |
|
"If true, keys containing unicode will be quoted. |
|
If false, an extended range of barekeys will be used |
|
in unquoted form." |
|
-inputchannel -default stdin |
|
-inputencoding -default "" -choicerestricted 0 -choices {utf-8 utf-16 iso8859-1} -help\ |
|
"configure encoding on input channel |
|
If not supplied, leave at Tcl default" |
|
-outputchannel -default stdout |
|
-errorchannel -default stderr |
|
@values -min 0 -max 0 |
|
} |
|
proc encoder {args} { |
|
set argd [punk::args::parse $args withid ::tomlish::app::encoder] |
|
set restrict_barekeys [dict get $argd opts -restrict_barekeys] |
|
set ch_input [dict get $argd opts -inputchannel] |
|
set ch_input_enc [dict get $argd opts -inputencoding] |
|
set ch_output [dict get $argd opts -outputchannel] |
|
set ch_error [dict get $argd opts -errorchannel] |
|
if {[dict exists $argd received -help]} { |
|
return [punk::args::usage -scheme info ::tomlish::app::encoder] |
|
} |
|
#review |
|
if {$ch_input_enc ne ""} { |
|
chan configure $ch_input -encoding $ch_input_enc |
|
} |
|
#review |
|
chan configure $ch_input -translation lf |
|
|
|
if {[catch { |
|
set json [read $ch_input] |
|
}]} { |
|
exit 2 ;#read error |
|
} |
|
try { |
|
#tomlish::typedjson_to_toml |
|
set toml [::tomlish::toml::from_tomlish_from_dict_from_typedjson $json] |
|
} trap {} {e eopts} { |
|
puts $ch_error "encoding failed: '$e'" |
|
puts $ch_error "$::errorInfo" |
|
exit 1 |
|
} |
|
puts -nonewline $ch_output $toml |
|
exit 0 |
|
} |
|
|
|
punk::args::define { |
|
@dynamic |
|
@id -id ::tomlish::app::test |
|
@cmd -name tomlish::app::test |
|
@leaders |
|
@opts -any 1 |
|
-help -type none -help\ |
|
"Display this usage message |
|
or further info if more args." |
|
-suite -default tests -choices {${[::tomlish::app::argdoc::test_suites]}} |
|
@values -min 0 -max -1 |
|
} |
|
proc test {args} { |
|
package require test::tomlish |
|
set argd [punk::args::parse $args withid ::tomlish::app::test] |
|
set opts [dict get $argd opts] |
|
set values [dict get $argd values] |
|
set received [dict get $argd received] |
|
set solos [dict get $argd solos] |
|
set opt_suite [dict get $opts -suite] |
|
if {[dict exists $received -help] && ![dict exists $received -suite]} { |
|
return [punk::args::usage -scheme info ::tomlish::app::test] |
|
} |
|
|
|
test::tomlish::SUITE $opt_suite |
|
#if {[catch {test::tomlish::SUITE $opt_suite} errM]} { |
|
# puts stderr "Unknown test suite '$opt_suite'. Available suites: [test::tomlish::SUITES]" |
|
# exit 1 |
|
#} |
|
set run_opts [dict remove $opts -suite] |
|
set run_opts [dict remove $run_opts {*}$solos] |
|
set result [test::tomlish::RUN {*}$run_opts {*}$solos {*}$values] |
|
return $result |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::app ---}] |
|
} |
|
|
|
proc ::tomlish::appnames {} { |
|
set applist [list] |
|
foreach cmd [info commands ::tomlish::app::*] { |
|
lappend applist [namespace tail $cmd] |
|
} |
|
return $applist |
|
} |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Secondary API namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval tomlish::lib { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
namespace path [namespace parent] |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::lib}] |
|
#[para] Secondary functions that are part of the API |
|
#[list_begin definitions] |
|
|
|
#proc utility1 {p1 args} { |
|
# #*** !doctools |
|
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] |
|
# #[para]Description of utility1 |
|
# return 1 |
|
#} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval tomlish::system { |
|
|
|
#taken from punk::lib |
|
proc lindex_resolve_basic {list index} { |
|
#*** !doctools |
|
#[call [fun lindex_resolve_basic] [arg list] [arg index]] |
|
#[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) |
|
#[para] returns -1 for out of range at either end, or a valid integer index |
|
#[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound |
|
#[para] This is only likely to be faster than average over lindex_resolve for Tcl which has the builtin lseq command |
|
#[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 |
|
#[para] For pure integer indices the performance should be equivalent |
|
|
|
set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 |
|
if {[string is integer -strict $index]} { |
|
#can match +i -i |
|
#avoid even the lseq overhead when the index is simple |
|
if {$index < 0 || ($index >= [llength $list])} { |
|
#even though in this case we could return -2 or -3 like lindex_resolve; for consistency we don't, as it's not always determinable for compound indices using the lseq method. |
|
return -1 |
|
} else { |
|
#integer may still have + sign - normalize with expr |
|
return [expr {$index}] |
|
} |
|
} |
|
if {[llength $list]} { |
|
set indices [tomlish::system::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. |
|
#if lseq was available - $indices is an 'arithseries' - theoretically not taking up ram(?) |
|
} else { |
|
set indices [list] |
|
} |
|
set idx [lindex $indices $index] |
|
if {$idx eq ""} { |
|
#we have no way to determine if out of bounds is at lower vs upper end |
|
return -1 |
|
} else { |
|
return $idx |
|
} |
|
} |
|
|
|
if {[info commands ::lseq] ne ""} { |
|
#tcl 8.7+ lseq significantly faster, especially for larger ranges |
|
#The internal rep can be an 'arithseries' with no string representation |
|
#support minimal set from to |
|
proc range {from to} { |
|
lseq $from $to |
|
} |
|
} else { |
|
#lseq accepts basic expressions e.g 4-2 for both arguments |
|
#e.g we can do lseq 0 [llength $list]-1 |
|
#if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. |
|
proc range {from to} { |
|
set to [offset_expr $to] |
|
set from [offset_expr $from] |
|
if {$to > $from} { |
|
set count [expr {($to -$from) + 1}] |
|
if {$from == 0} { |
|
return [lsearch -all [lrepeat $count 0] *] |
|
} else { |
|
incr from -1 |
|
return [lmap v [lrepeat $count 0] {incr from}] |
|
} |
|
#slower methods. |
|
#2) |
|
#set i -1 |
|
#set L [lrepeat $count 0] |
|
#lmap v $L {lset L [incr i] [incr from];lindex {}} |
|
#return $L |
|
#3) |
|
#set L {} |
|
#for {set i 0} {$i < $count} {incr i} { |
|
# lappend L [incr from] |
|
#} |
|
#return $L |
|
} elseif {$from > $to} { |
|
set count [expr {$from - $to} + 1] |
|
#1) |
|
if {$to == 0} { |
|
return [lreverse [lsearch -all [lrepeat $count 0] *]] |
|
} else { |
|
incr from |
|
return [lmap v [lrepeat $count 0] {incr from -1}] |
|
} |
|
|
|
#2) |
|
#set i -1 |
|
#set L [lrepeat $count 0] |
|
#lmap v $L {lset L [incr i] [incr from -1];lindex {}} |
|
#return $L |
|
#3) |
|
#set L {} |
|
#for {set i 0} {$i < $count} {incr i} { |
|
# lappend L [incr from -1] |
|
#} |
|
#return $L |
|
} else { |
|
return [list $from] |
|
} |
|
} |
|
} |
|
|
|
} |
|
|
|
if {[info exists ::argc] && $::argc > 0} { |
|
#puts stderr "argc: $::argc args: $::argv" |
|
set arglist $::argv |
|
# -------------- |
|
#make sure any dependant packages that are sourced don't get any commandline args |
|
set ::argv {} |
|
set ::argc 0 |
|
# -------------- |
|
package require punk::args |
|
punk::args::define { |
|
@dynamic |
|
@id -id tomlish::cmdline |
|
@cmd -name tomlish -help\ |
|
"toml encoder/decoder written in Tcl" |
|
@opts -any 1 |
|
-help -type none -help\ |
|
"Display this usage message or more specific |
|
help if further arguments provided." |
|
-app -choices {${[tomlish::appnames]}} |
|
} |
|
try { |
|
set argd [punk::args::parse $arglist withid tomlish::cmdline] |
|
} trap {PUNKARGS VALIDATION} {msg erroropts} { |
|
puts stderr $msg |
|
exit 1 |
|
} |
|
|
|
|
|
lassign [dict values $argd] leaders opts values received solos |
|
if {[dict exists $received -help] && ![dict exists $received -app]} { |
|
#only emit cmdline help if -app not supplied as well - otherwise app function can act on -help for more specific help |
|
#puts stdout "Usage: -app <appname> where appname one of:[tomlish::appnames]" |
|
puts stdout [punk::args::usage -scheme info tomlish::cmdline] |
|
exit 0 |
|
} |
|
if {![dict exists $received -app]} { |
|
puts stderr [punk::args::usage -scheme error tomlish::cmdline] |
|
exit 1 |
|
} |
|
|
|
set app [dict get $opts -app] |
|
set appnames [tomlish::appnames] |
|
set app_opts [dict remove $opts -app {*}$solos] |
|
try { |
|
set result [tomlish::app::$app {*}$app_opts {*}$solos {*}$values] |
|
} trap {PUNKARGS VALIDATION} {msg erroropts} { |
|
#The validation error should fully describe the issue |
|
#no need for errortrace - keep the output cleaner |
|
puts stderr $msg |
|
exit 1 |
|
} trap {} {msg erroropts} { |
|
#unexpected error - uncaught throw will produce error trace |
|
#todo - a support msg? Otherwise we may as well just leave off this trap. |
|
throw [dict get $erroropts -errorcode] [dict get $erroropts -errorinfo] |
|
} |
|
if {"-help" in $solos} { |
|
puts stderr $result |
|
exit 1 |
|
} else { |
|
if {$result ne ""} { |
|
puts stdout $result |
|
exit 0 |
|
} |
|
} |
|
} |
|
|
|
## Ready |
|
package provide tomlish [namespace eval tomlish { |
|
variable pkg tomlish |
|
variable version |
|
set version 1.1.6 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|