You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3357 lines
150 KiB
3357 lines
150 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2024 |
|
# |
|
# @@ Meta Begin |
|
# Application tomlish 1.1.1 |
|
# Meta platform tcl |
|
# Meta license <unspecified> |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin tomlish_module_tomlish 0 1.1.1] |
|
#[copyright "2024"] |
|
#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {-}] [comment {-- Description at end of page heading --}] |
|
#[require tomlish] |
|
#[keywords module] |
|
#[description] |
|
#[para] - |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of tomlish |
|
#[subsection Concepts] |
|
#[para] - |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by tomlish |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
package require struct::stack |
|
package require logger |
|
|
|
#*** !doctools |
|
#[item] [package {Tcl 8.6-}] |
|
#[item] [package {struct::stack}] |
|
|
|
#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') |
|
|
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval tomlish { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
variable types |
|
#ARRAY is analogous to a Tcl list |
|
#TABLE is analogous to a Tcl dict |
|
#WS = inline whitespace |
|
#KEYVAL = bare key and value |
|
#QKEYVAL = quoted key and value |
|
#ITABLE = inline table (*can* be anonymous table) |
|
# inline table values immediately create a table with the opening brace |
|
# inline tables are fully defined between their braces, as are dotted-key subtables defined within |
|
# No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained |
|
|
|
set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] |
|
#tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) |
|
#we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) |
|
set min_int -9223372036854775808 ;#-2^63 |
|
set max_int +9223372036854775807 ;#2^63-1 |
|
|
|
proc Dolog {lvl txt} { |
|
#return "$lvl -- $txt" |
|
set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" |
|
puts stderr $msg |
|
} |
|
logger::initNamespace ::tomlish |
|
foreach lvl [logger::levels] { |
|
interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl |
|
log::logproc $lvl tomlish_log_$lvl |
|
} |
|
|
|
#*** !doctools |
|
#[subsection {Namespace tomlish}] |
|
#[para] Core API functions for tomlish |
|
#[list_begin definitions] |
|
|
|
proc tags {} { |
|
return $::tomlish::tags |
|
} |
|
|
|
#helper function for get_dict |
|
proc _get_keyval_value {keyval_element} { |
|
log::notice ">>> _get_keyval_value from '$keyval_element'<<<" |
|
set found_value 0 |
|
#find the value |
|
# 3 is the earliest index at which the value could occur (depending on whitespace) |
|
set found_sub [list] |
|
foreach sub [lrange $keyval_element 2 end] { |
|
#note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey |
|
switch -exact -- [lindex $sub 0] { |
|
STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { |
|
set type [lindex $sub 0] |
|
set value [lindex $sub 1] |
|
set found_sub $sub |
|
incr found_value 1 |
|
} |
|
default {} |
|
} |
|
} |
|
if {!$found_value} { |
|
error "Failed to find value element in KEYVAL. '$keyval_element'" |
|
} |
|
if {$found_value > 1} { |
|
error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" |
|
} |
|
|
|
switch -exact -- $type { |
|
INT - FLOAT - BOOL - DATETIME { |
|
#simple (non-container, no-substitution) datatype |
|
set result [list type $type value $value] |
|
} |
|
STRING - STRINGPART { |
|
set result [list type $type value [::tomlish::utils::unescape_string $value]] |
|
} |
|
LITSTRING { |
|
#REVIEW |
|
set result [list type $type value $value] |
|
} |
|
TABLE - ITABLE - ARRAY - MULTISTRING { |
|
#jmn2024 - added ITABLE - review |
|
#we need to recurse to get the corresponding dict for the contained item(s) |
|
#pass in the whole $found_sub - not just the $value! |
|
set result [list type $type value [::tomlish::get_dict [list $found_sub]]] |
|
} |
|
default { |
|
error "Unexpected value type '$type' found in keyval '$keyval_element'" |
|
} |
|
} |
|
return $result |
|
} |
|
|
|
#get_dict is a *basic* programmatic datastructure for accessing the data. |
|
# produce a dictionary of keys and values from a tomlish tagged list. |
|
# get_dict is primarily for reading toml data. |
|
#Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, |
|
# so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. |
|
# creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. |
|
#A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. |
|
proc get_dict {tomlish} { |
|
|
|
#keep track of which tablenames have already been directly defined, |
|
# so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' |
|
#Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. |
|
#we don't error out just because a previous tablename segment has already appeared. |
|
variable tablenames_seen [list] |
|
|
|
|
|
log::info ">>> processing '$tomlish'<<<" |
|
set items $tomlish |
|
|
|
foreach lst $items { |
|
if {[lindex $lst 0] ni $::tomlish::tags} { |
|
error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" |
|
} |
|
} |
|
|
|
if {[lindex $tomlish 0] eq "TOMLISH"} { |
|
#ignore TOMLISH tag at beginning |
|
set items [lrange $tomlish 1 end] |
|
} |
|
|
|
set datastructure [dict create] |
|
foreach item $items { |
|
set tag [lindex $item 0] |
|
#puts "...> item:'$item' tag:'$tag'" |
|
switch -exact -- $tag { |
|
KEYVAL - QKEYVAL { |
|
log::debug "--> processing $tag: $item" |
|
set key [lindex $item 1] |
|
#!todo - normalize key. (may be quoted/doublequoted) |
|
|
|
if {[dict exists $datastructure $key]} { |
|
error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." |
|
} |
|
|
|
#lassign [_get_keyval_value $item] type val |
|
set keyval_dict [_get_keyval_value $item] |
|
dict set datastructure $key $keyval_dict |
|
} |
|
TABLE { |
|
set tablename [lindex $item 1] |
|
set tablename [::tomlish::utils::tablename_trim $tablename] |
|
|
|
if {$tablename in $tablenames_seen} { |
|
error "Table name '$tablename' has already been directly defined in the toml data. Invalid." |
|
} |
|
|
|
log::debug "--> processing $tag (name: $tablename): $item" |
|
set name_segments [::tomlish::utils::tablename_split $tablename] |
|
set last_seg "" |
|
#toml spec rule - all segments mst be non-empty |
|
#note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. |
|
|
|
set key_hierarchy [list] |
|
set key_hierarchy_raw [list] |
|
|
|
foreach rawseg $name_segments { |
|
|
|
set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes |
|
set c1 [::string index $rawseg 0] |
|
set c2 [::string index $rawseg end] |
|
if {($c1 eq "'") && ($c2 eq "'")} { |
|
#single quoted segment. No escapes are processed within it. |
|
set seg [::string range $rawseg 1 end-1] |
|
} elseif {($c1 eq "\"") && ($c2 eq "\"")} { |
|
#double quoted segment. Apply escapes. |
|
set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] |
|
#set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] |
|
} else { |
|
set seg $rawseg |
|
} |
|
|
|
#no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. |
|
#if {$rawseg eq ""} { |
|
# error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" |
|
#} |
|
lappend key_hierarchy $seg |
|
lappend key_hierarchy_raw $rawseg |
|
|
|
if {[dict exists $datastructure {*}$key_hierarchy]} { |
|
#It's ok for this key to already exist *if* it was defined by a previous tablename, |
|
# but not if it was defined as a keyval/qkeyval |
|
|
|
set testkey [join $key_hierarchy_raw .] |
|
set testkey_length [llength $key_hierarchy_raw] |
|
set found_testkey 0 |
|
if {$testkey in $tablenames_seen} { |
|
set found_testkey 1 |
|
} else { |
|
#see if it was defined by a longer entry |
|
foreach seen $tablenames_seen { |
|
set seen_segments [::tomlish::utils::tablename_split $seen] |
|
#these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, |
|
# and strip the quotes from both single-quoted and double-quoted entries. |
|
|
|
#note: it is not safe to compare normalized tablenames using join! |
|
# e.g a.'b.c'.d is not the same as a.b.c.d |
|
# instead compare {a b.c d} with {a b c d} |
|
# Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. |
|
#'a.b'.'c.d.e' vs 'a.b.c'.'d.e' |
|
#dots within table segments might seem like an 'edge case' |
|
# - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. |
|
|
|
#VVV the test below is wrong VVV! |
|
set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] |
|
if {$testkey eq $seen_match} { |
|
set found_testkey 1 |
|
} |
|
} |
|
} |
|
|
|
if {$found_testkey == 0} { |
|
#the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset |
|
error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." |
|
} |
|
} |
|
|
|
} |
|
|
|
|
|
#We must do this after the key-collision test above! |
|
lappend tablenames_seen $tablename |
|
|
|
|
|
log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" |
|
log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" |
|
|
|
#now add the contained elements |
|
foreach element [lrange $item 2 end] { |
|
set type [lindex $element 0] |
|
switch -exact -- $type { |
|
KEYVAL - QKEYVAL { |
|
set keyval_key [lindex $element 1] |
|
set keyval_dict [_get_keyval_value $element] |
|
dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict |
|
} |
|
NEWLINE - COMMENT - WS { |
|
#ignore |
|
} |
|
default { |
|
error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" |
|
} |
|
} |
|
} |
|
#now make sure we add an empty value if there were no contained elements! |
|
#!todo. |
|
} |
|
ITABLE { |
|
#SEP??? |
|
set datastructure [list] |
|
foreach element [lrange $item 1 end] { |
|
set type [lindex $element 0] |
|
switch -exact -- $type { |
|
KEYVAL - QKEYVAL { |
|
set keyval_key [lindex $element 1] |
|
set keyval_dict [_get_keyval_value $element] |
|
dict set datastructure $keyval_key $keyval_dict |
|
} |
|
NEWLINE - COMMENT - WS { |
|
#ignore |
|
} |
|
default { |
|
error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" |
|
} |
|
} |
|
} |
|
} |
|
ARRAY { |
|
#arrays in toml are allowed to contain mixtures of types |
|
set datastructure [list] |
|
log::debug "--> processing array: $item" |
|
|
|
foreach element [lrange $item 1 end] { |
|
set type [lindex $element 0] |
|
switch -exact -- $type { |
|
INT - FLOAT - BOOL - DATETIME { |
|
set value [lindex $element 1] |
|
lappend datastructure [list type $type value $value] |
|
} |
|
STRING { |
|
set value [lindex $element 1] |
|
lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] |
|
} |
|
TABLE - ARRAY - MULTISTRING { |
|
set value [lindex $element 1] |
|
lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] |
|
} |
|
WS - SEP { |
|
#ignore whitespace and commas |
|
} |
|
default { |
|
error "Unexpected value type '$type' found in array" |
|
} |
|
} |
|
} |
|
} |
|
MULTISTRING { |
|
#triple dquoted string |
|
log::debug "--> processing multistring: $item" |
|
set stringvalue "" |
|
set idx 0 |
|
set parts [lrange $item 1 end] |
|
for {set idx 0} {$idx < [llength $parts]} {incr idx} { |
|
set element [lindex $parts $idx] |
|
set type [lindex $element 0] |
|
switch -exact -- $type { |
|
STRINGPART { |
|
append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] |
|
} |
|
CONT { |
|
#When the last non-whitespace character on a line is an unescaped backslash, |
|
#it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter |
|
# review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? |
|
set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] |
|
if {$next_nl == -1} { |
|
#last line |
|
set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] |
|
if {$non_ws >= 0} { |
|
append stringvalue "\\" ;#add the sep |
|
} else { |
|
#skip over ws without emitting |
|
set idx [llength $parts] |
|
} |
|
} else { |
|
set parts_til_nl [lrange $parts 0 $next_nl-1] |
|
set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] |
|
if {$non_ws >= 0} { |
|
append stringvalue "\\" |
|
} else { |
|
#skip over ws on this line |
|
set idx $next_nl |
|
#then have to check each subsequent line until we get to first non-whitespace |
|
set trimming 1 |
|
while {$trimming && $idx < [llength $parts]} { |
|
set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] |
|
if {$next_nl == -1} { |
|
#last line |
|
set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] |
|
if {$non_ws >= 0} { |
|
set idx [expr {$non_ws -1}] |
|
} else { |
|
set idx [llength $parts] |
|
} |
|
set trimming 0 |
|
} else { |
|
set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] |
|
if {$non_ws >= 0} { |
|
set idx [expr {$non_ws -1}] |
|
set trimming 0 |
|
} else { |
|
set idx $next_nl |
|
#keep trimming |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
NEWLINE { |
|
#if newline is first element - it is not part of the data of a multistring |
|
if {$idx > 0} { |
|
set val [lindex $element 1] |
|
if {$val eq "nl"} { |
|
append stringvalue \n |
|
} else { |
|
append stringvalue \r\n |
|
} |
|
} |
|
} |
|
WS { |
|
append stringvalue [lindex $element 1] |
|
} |
|
default { |
|
error "Unexpected value type '$type' found in multistring" |
|
} |
|
} |
|
} |
|
set datastructure $stringvalue |
|
} |
|
WS - COMMENT - NEWLINE { |
|
#ignore |
|
} |
|
default { |
|
error "Unexpected tag '$tag' in Tomlish list '$tomlish'" |
|
} |
|
} |
|
} |
|
return $datastructure |
|
} |
|
|
|
proc json_to_toml {json} { |
|
#*** !doctools |
|
#[call [fun json_to_toml] [arg json]] |
|
#[para] |
|
|
|
set tomlish [::tomlish::from_json $json] |
|
set toml [::tomlish::to_toml $tomlish] |
|
} |
|
|
|
proc from_json {json} { |
|
set jstruct [::tomlish::json_struct $json] |
|
return [::tomlish::from_json_struct $jstruct] |
|
} |
|
|
|
proc from_json_struct {jstruct} { |
|
package require fish::json_toml |
|
return [fish::json_toml::jsonstruct2tomlish $jstruct] |
|
} |
|
|
|
proc toml_to_json {toml} { |
|
set tomlish [::tomlish::from_toml $toml] |
|
return [::tomlish::get_json $tomlish] |
|
} |
|
|
|
proc get_json {tomlish} { |
|
package require fish::json |
|
set d [::tomlish::get_dict $tomlish] |
|
#return [::tomlish::dict_to_json $d] |
|
return [fish::json::from "struct" $d] |
|
} |
|
|
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
namespace eval tomlish::encode { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::encode}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
#STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness |
|
# take a value of the appropriate type and wrap as a tomlish tagged item |
|
proc string {s} { |
|
return [list STRING $s] |
|
} |
|
|
|
proc int {i} { |
|
#whole numbers, may be prefixed with a + or - |
|
#Leading zeros are not allowed |
|
#Hex,octal binary forms are allowed (toml 1.0) |
|
#We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) |
|
#!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. |
|
# - We should probably raise an error for number larger than this and suggest the user supply it as a string? |
|
if {[tcl::string::last , $i] > -1} { |
|
error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" |
|
} |
|
if {![::tomlish::utils::int_validchars $i]} { |
|
error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" |
|
} |
|
|
|
if {[::tomlish::utils::is_int $i]} { |
|
return [list INT $i] |
|
} else { |
|
error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" |
|
} |
|
|
|
} |
|
|
|
proc float {f} { |
|
#convert any non-lower case variants of special values to lowercase for Toml |
|
if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { |
|
return [list FLOAT [string tolower $f]] |
|
} |
|
if {[::tomlish::utils::is_float $f]} { |
|
return [list FLOAT $f] |
|
} else { |
|
error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" |
|
} |
|
} |
|
|
|
proc datetime {str} { |
|
if {[::tomlish::utils::is_datetime $str]} { |
|
return [list DATETIME $str] |
|
} else { |
|
error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" |
|
} |
|
} |
|
|
|
proc boolean {b} { |
|
#convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false |
|
if {![string is boolean -strict $b]} { |
|
error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" |
|
} else { |
|
if {[expr {$b && 1}]} { |
|
return [list BOOL true] |
|
} else { |
|
return [list BOOL false] |
|
} |
|
} |
|
} |
|
|
|
#Take tablename followed by |
|
# a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} |
|
# b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types |
|
proc table {name args} { |
|
set pairs [list] |
|
foreach t $args { |
|
if {[llength $t] == 3} { |
|
if {[lindex $t 0] ne "KEYVAL"} { |
|
error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" |
|
} |
|
lappend pairs $t |
|
} elseif {[llength $t] == 2} { |
|
#!todo - type heuristics |
|
lassign $t n v |
|
lappend pairs [list KEYVAL $n [list STRING $v]] |
|
} else { |
|
error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" |
|
} |
|
} |
|
return [list TABLE $name $pairs] |
|
} |
|
|
|
|
|
#the tomlish root is basically a nameless table representing the root of the document |
|
proc root {args} { |
|
set table [::tomlish::encode::table TOMLISH {*}$args] |
|
set result [lindex $table 2] ;#Take only the key-value pair list |
|
} |
|
|
|
#WS = whitepace, US = underscore |
|
proc tomlish {list {context ""}} { |
|
if {![tcl::string::is list $list]} { |
|
error "Supplied 'tomlish' is not a valid Tcl list. Expected a tagged list (parsed Toml)" |
|
} |
|
set toml "" ;#result string |
|
|
|
foreach item $list { |
|
set tag [lindex $item 0] |
|
#puts "tomlish::encode::tomlish processing item '$item', tag '$tag'" |
|
#during recursion, some tags require different error checking in different contexts. |
|
set nextcontext $tag ; |
|
|
|
|
|
#Handle invalid tag nestings |
|
switch -- $context { |
|
QKEYVAL - |
|
KEYVAL { |
|
if {$tag in {KEYVAL QKEYVAL}} { |
|
error "Invalid tag '$tag' encountered within '$context'" |
|
} |
|
} |
|
MULTISTRING { |
|
#explicitly list the valid child tags |
|
if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { |
|
error "Invalid tag '$tag' encountered within a MULTISTRING" |
|
} |
|
} |
|
default { |
|
#no context, or no defined nesting error for this context |
|
} |
|
} |
|
|
|
switch -- $tag { |
|
TOMLISH { |
|
#optional root tag. Ignore. |
|
} |
|
QKEYVAL - |
|
KEYVAL { |
|
if {$tag eq "KEYVAL"} { |
|
append toml [lindex $item 1] ;#Key |
|
} else { |
|
append toml \"[lindex $item 1]\" ;#Quoted Key |
|
} |
|
foreach part [lrange $item 2 end] { |
|
if {$part eq "="} { |
|
append toml "=" |
|
} else { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
} |
|
} |
|
TABLE { |
|
append toml "\[[lindex $item 1]\]" ;#table name |
|
foreach part [lrange $item 2 end] { |
|
append toml [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
|
|
} |
|
ITABLE { |
|
#inline table - e.g within array or on RHS of keyval/qkeyval |
|
set data "" |
|
foreach part [lrange $item 1 end] { |
|
append data [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\{$data\}" |
|
} |
|
ARRAY { |
|
|
|
set arraystr "" |
|
foreach part [lrange $item 1 end] { |
|
append arraystr [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\[$arraystr\]" |
|
} |
|
WS { |
|
append toml [lindex $item 1] |
|
} |
|
SEP { |
|
append toml "," |
|
} |
|
NEWLINE { |
|
set chartype [lindex $item 1] |
|
if {$chartype eq "lf"} { |
|
append toml \n |
|
} elseif {$chartype eq "crlf"} { |
|
append toml \r\n |
|
} else { |
|
error "Unrecognized newline type '$chartype'" |
|
} |
|
} |
|
CONT { |
|
#line continuation character "\" |
|
append toml "\\" |
|
} |
|
STRING { |
|
#simple double quoted strings only |
|
# |
|
return \"[lindex $item 1]\" |
|
} |
|
STRINGPART { |
|
return [lindex $item 1] |
|
} |
|
MULTISTRING { |
|
#Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts |
|
set multistring "" ;#variable to build up the string |
|
foreach part [lrange $item 1 end] { |
|
append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml "\"\"\"$multistring\"\"\"" |
|
} |
|
LITSTRING { |
|
#Single Quoted string(literal string) |
|
append toml '[lindex $item 1]' |
|
} |
|
MULTILITSTRING { |
|
#review - multilitstring can be handled as a single string? |
|
set litstring "" |
|
foreach part [lrange $item 1 end] { |
|
append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] |
|
} |
|
append toml '''$litstring''' |
|
} |
|
INT - |
|
BOOL - |
|
FLOAT - |
|
DATETIME { |
|
append toml [lindex $item 1] |
|
} |
|
INCOMPLETE { |
|
error "cannot process tomlish term tagged as INCOMPLETE" |
|
} |
|
COMMENT { |
|
append toml "#[lindex $item 1]" |
|
} |
|
BOM { |
|
#Byte Order Mark may appear at beginning of a file. Needs to be preserved. |
|
append toml "\uFEFF" |
|
} |
|
default { |
|
error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." |
|
} |
|
} |
|
|
|
} |
|
return $toml |
|
} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::encode ---}] |
|
} |
|
#fish toml from tomlish |
|
|
|
#(encode tomlish as toml) |
|
interp alias {} tomlish::to_toml {} tomlish::encode::tomlish |
|
|
|
# |
|
|
|
|
|
namespace eval tomlish::decode { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::decode}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
#return a Tcl list of tomlish tokens |
|
#i.e get a standard list of all the toml terms in string $s |
|
#where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. |
|
#(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) |
|
#Note that we deliberately don't check certain things such as duplicate table declarations here. |
|
#Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. |
|
# (e.g perhaps a toml editor to highlight violations for fixing) |
|
# A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. |
|
# e.g dicts or an object oriented structure |
|
#Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage |
|
#e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc |
|
#This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. |
|
# (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) |
|
#If we were to unescape a tab character for example |
|
# - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. |
|
# For this reason, we also do absolutely no line-ending transformations based on platform. |
|
# All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' |
|
proc toml {s} { |
|
#*** !doctools |
|
#[call [fun toml] [arg s]] |
|
#[para] return a Tcl list of tomlish tokens |
|
|
|
namespace upvar ::tomlish::parse is_parsing is_parsing |
|
set is_parsing 1 |
|
|
|
|
|
if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { |
|
tomlish::parse::spacestack destroy |
|
} |
|
struct::stack ::tomlish::parse::spacestack |
|
|
|
namespace upvar ::tomlish::parse last_space_action last_space_action |
|
namespace upvar ::tomlish::parse last_space_type last_space_type |
|
|
|
|
|
namespace upvar ::tomlish::parse tok tok |
|
set tok "" |
|
|
|
namespace upvar ::tomlish::parse type type |
|
namespace upvar ::tomlish::parse tokenType tokenType |
|
::tomlish::parse::set_tokenType "" |
|
namespace upvar ::tomlish::parse tokenType_list tokenType_list |
|
set tokenType [list] ;#Flat (un-nested) list of tokentypes found |
|
|
|
namespace upvar ::tomlish::parse lastChar lastChar |
|
set lastChar "" |
|
|
|
|
|
set result "" |
|
namespace upvar ::tomlish::parse nest nest |
|
set nest 0 |
|
|
|
namespace upvar ::tomlish::parse v v ;#array keyed on nest level |
|
|
|
|
|
set v(0) {TOMLISH} |
|
array set s0 [list] ;#whitespace data to go in {SPACE {}} element. |
|
set parentlevel 0 |
|
|
|
namespace upvar ::tomlish::parse i i |
|
set i 0 |
|
|
|
namespace upvar ::tomlish::parse state state |
|
|
|
namespace upvar ::tomlish::parse braceCount braceCount |
|
set barceCount 0 |
|
namespace upvar ::tomlish::parse bracketCount bracketCount |
|
set bracketCount 0 |
|
|
|
set sep 0 |
|
set r 1 |
|
namespace upvar ::tomlish::parse token_waiting token_waiting |
|
set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. |
|
|
|
|
|
set state "key-space" |
|
::tomlish::parse::spacestack push {space key-space} |
|
namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) |
|
set linenum 1 |
|
|
|
try { |
|
while {$r} { |
|
set r [::tomlish::parse::tok $s] |
|
#puts stdout "got tok: '$tok' while parsing string '$s' " |
|
set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' |
|
|
|
|
|
|
|
#puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" |
|
#puts "-->tok: $tok tokenType='$tokenType'" |
|
set prevstate $state |
|
##### |
|
set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] |
|
::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" |
|
|
|
set state $nextstate |
|
if {$state eq "err"} { |
|
error "State error - aborting parse. [tomlish::parse::report_line]" |
|
} |
|
|
|
if {$last_space_action eq "pop"} { |
|
#pop_trigger_tokens: newline tablename endarray endinlinetable |
|
#note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. |
|
switch -exact -- $tokenType { |
|
newline { |
|
incr linenum |
|
lappend v($nest) [list NEWLINE $tok] |
|
} |
|
tablename { |
|
#note: a tablename only 'pops' if we are greater than zero |
|
error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" |
|
} |
|
tablearrayname { |
|
#!review - tablearrayname different to tablename regarding push/pop? |
|
#note: a tablename only 'pops' if we are greater than zero |
|
error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" |
|
} |
|
endarray { |
|
#nothing to do here. |
|
} |
|
comma { |
|
#comma for inline table will pop the keyvalue space |
|
lappend v($nest) "SEP" |
|
} |
|
endinlinetable { |
|
puts stderr "endinlinetable" |
|
} |
|
endmultiquote { |
|
puts stderr "endmultiquote for last_space_action 'pop'" |
|
} |
|
default { |
|
error "unexpected tokenType '$tokenType' for last_space_action 'pop'" |
|
} |
|
} |
|
set parentlevel [expr {$nest -1}] |
|
lappend v($parentlevel) [set v($nest)] |
|
incr nest -1 |
|
|
|
} elseif {$last_space_action eq "push"} { |
|
incr nest 1 |
|
set v($nest) [list] |
|
# push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname |
|
switch -exact -- $tokenType { |
|
barekey { |
|
set v($nest) [list KEYVAL $tok] ;#$tok is the keyname |
|
} |
|
quotedkey - itablequotedkey { |
|
set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname |
|
} |
|
tablename { |
|
#note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! |
|
#The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish |
|
# back to toml file will be identical. |
|
#It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. |
|
# we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, |
|
# so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. |
|
|
|
#todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, |
|
# so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the |
|
# tomlish list? |
|
|
|
set test_only [::tomlish::utils::tablename_trim $tok] |
|
::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" |
|
set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name |
|
#note also that equivalent tablenames may have different toml representations even after being trimmed! |
|
#e.g ["x\t\t"] & ["x "] (tab escapes vs literals) |
|
#These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. |
|
} |
|
tablearrayname { |
|
set test_only [::tomlish::utils::tablename_trim $tok] |
|
puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" |
|
set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name |
|
} |
|
startarray { |
|
set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. |
|
} |
|
startinlinetable { |
|
set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. |
|
} |
|
startmultiquote { |
|
puts stderr "push trigger tokenType startmultiquote (todo)" |
|
set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE |
|
#JMN ??? |
|
#set next_tokenType_known 1 |
|
#::tomlish::parse::set_tokenType "multistring" |
|
#set tok "" |
|
} |
|
default { |
|
error "push trigger tokenType '$tokenType' not yet implemented" |
|
} |
|
} |
|
|
|
} else { |
|
#no space level change |
|
switch -exact -- $tokenType { |
|
starttablename { |
|
#$tok is triggered by the opening bracket and sends nothing to output |
|
} |
|
starttablearrayname { |
|
#$tok is triggered by the double opening brackets and sends nothing to output |
|
} |
|
tablename - tablenamearray { |
|
error "did not expect 'tablename/tablearrayname' without space level change" |
|
#set v($nest) [list TABLE $tok] |
|
} |
|
endtablename - endtablearrayname { |
|
#no output into the tomlish list for this token |
|
} |
|
startinlinetable { |
|
puts stderr "decode::toml error. did not expect startlinetable without space level change" |
|
} |
|
startquote { |
|
switch -exact -- $nextstate { |
|
string { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "string" |
|
set tok "" |
|
} |
|
quotedkey { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "quotedkey" |
|
set tok "" |
|
} |
|
itablequotedkey { |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "itablequotedkey" |
|
set tok "" |
|
} |
|
default { |
|
error "startquote switch case not implemented for nextstate: $nextstate" |
|
} |
|
} |
|
} |
|
startmultiquote { |
|
#review |
|
puts stderr "no space level change - got startmultiquote" |
|
set next_tokenType_known 1 |
|
::tomlish::parse::set_tokenType "stringpart" |
|
set tok "" |
|
} |
|
endquote { |
|
#nothing to do? |
|
set tok "" |
|
} |
|
endmultiquote { |
|
#JMN!! |
|
set tok "" |
|
} |
|
string { |
|
lappend v($nest) [list STRING $tok] |
|
} |
|
stringpart { |
|
lappend v($nest) [list STRINGPART $tok] |
|
} |
|
multistring { |
|
#review |
|
lappend v($nest) [list MULTISTRING $tok] |
|
} |
|
quotedkey { |
|
#lappend v($nest) [list QKEY $tok] ;#TEST |
|
} |
|
itablequotedkey { |
|
|
|
} |
|
untyped-value { |
|
#we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. |
|
if {$tok in {true false}} { |
|
set tag BOOL |
|
} elseif {[::tomlish::utils::is_int $tok]} { |
|
set tag INT |
|
} elseif {[::tomlish::utils::is_float $tok]} { |
|
set tag FLOAT |
|
} elseif {[::tomlish::utils::is_datetime $tok]} { |
|
set tag DATETIME |
|
} else { |
|
error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" |
|
} |
|
lappend v($nest) [list $tag $tok] |
|
} |
|
comment { |
|
#puts stdout "----- comment token returned '$tok'------" |
|
lappend v($nest) [list COMMENT "$tok"] |
|
} |
|
equal { |
|
#we append '=' to the nest so that any surrounding whitespace is retained. |
|
lappend v($nest) = |
|
} |
|
comma { |
|
lappend v($nest) SEP |
|
} |
|
newline { |
|
incr linenum |
|
lappend v($nest) [list NEWLINE $tok] |
|
} |
|
whitespace { |
|
lappend v($nest) [list WS $tok] |
|
} |
|
continuation { |
|
lappend v($nest) CONT |
|
} |
|
bom { |
|
lappend v($nest) BOM |
|
} |
|
eof { |
|
#ok - nothing more to add to the tomlish list. |
|
#!todo - check previous tokens are complete/valid? |
|
} |
|
default { |
|
error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
|
|
if {!$next_tokenType_known} { |
|
::tomlish::parse::set_tokenType "" |
|
set tok "" |
|
} |
|
|
|
if {$state eq "end"} { |
|
break |
|
} |
|
|
|
|
|
} |
|
|
|
#while {$nest > 0} { |
|
# lappend v([expr {$nest -1}]) [set v($nest)] |
|
# incr nest -1 |
|
#} |
|
while {[::tomlish::parse::spacestack size] > 1} { |
|
::tomlish::parse::spacestack pop |
|
lappend v([expr {$nest -1}]) [set v($nest)] |
|
incr nest -1 |
|
|
|
#set parent [spacestack peek] ;#the level being appended to |
|
#lassign $parent type state |
|
#if {$type eq "space"} { |
|
# |
|
#} elseif {$type eq "buffer"} { |
|
# lappend v([expr {$nest -1}]) {*}[set v($nest)] |
|
#} else { |
|
# error "invalid spacestack item: $parent" |
|
#} |
|
} |
|
|
|
} finally { |
|
set is_parsing 0 |
|
} |
|
|
|
|
|
return $v(0) |
|
} |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::decode ---}] |
|
} |
|
#decode toml to tomlish |
|
interp alias {} tomlish::from_toml {} tomlish::decode::toml |
|
|
|
namespace eval tomlish::utils { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::utils}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
|
|
#tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace |
|
# tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] |
|
#trimmed, the tablename becomes {a.b.c} |
|
# A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] |
|
#ie whitespace is only irrelevant if it's outside a quoted segment |
|
#trimmed, the tablename becomes {a.b."c etc "} |
|
proc tablename_trim {tablename} { |
|
set segments [tablename_split $tablename false] |
|
set trimmed_segments [list] |
|
foreach seg $segments { |
|
lappend trimmed_segments [::string trim $seg [list " " \t]] |
|
} |
|
return [join $trimmed_segments .] |
|
} |
|
|
|
#utils::tablename_split |
|
proc tablename_split {tablename {normalize false}} { |
|
#we can't just split on . because we have to handle quoted segments which may contain a dot. |
|
#eg {dog."tater.man"} |
|
set i 0 |
|
set sLen [::string length $tablename] |
|
set segments [list] |
|
set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax |
|
#quoted is for double-quotes, litquoted is for single-quotes (string literal) |
|
set seg "" |
|
for {} {$i < $sLen} {} { |
|
|
|
if {$i > 0} { |
|
set lastChar [::string index $tablename [expr {$i - 1}]] |
|
} else { |
|
set lastChar "" |
|
} |
|
|
|
set c [::string index $tablename $i] |
|
incr i |
|
|
|
if {$c eq "."} { |
|
switch -exact -- $mode { |
|
unquoted { |
|
#dot marks end of segment. |
|
lappend segments $seg |
|
set seg "" |
|
set mode "unknown" |
|
} |
|
quoted { |
|
append seg $c |
|
} |
|
unknown { |
|
lappend segments $seg |
|
set seg "" |
|
} |
|
litquoted { |
|
append seg $c |
|
} |
|
default { |
|
#mode: syntax |
|
#we got our dot. - the syntax mode is now satisfied. |
|
set mode "unknown" |
|
} |
|
} |
|
} elseif {($c eq "\"") && ($lastChar ne "\\")} { |
|
if {$mode eq "unknown"} { |
|
if {[::string trim $seg] ne ""} { |
|
#we don't allow a quote in the middle of a bare key |
|
error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" |
|
} |
|
set mode "quoted" |
|
set seg "\"" |
|
} elseif {$mode eq "unquoted"} { |
|
append seg $c |
|
} elseif {$mode eq "quoted"} { |
|
append seg $c |
|
lappend segments $seg |
|
set seg "" |
|
set mode "syntax" ;#make sure we only accept a dot or end-of-data now. |
|
} elseif {$mode eq "litquoted"} { |
|
append seg $c |
|
} elseif {$mode eq "syntax"} { |
|
error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" |
|
} |
|
} elseif {($c eq "\'")} { |
|
if {$mode eq "unknown"} { |
|
append seg $c |
|
set mode "litquoted" |
|
} elseif {$mode eq "unquoted"} { |
|
#single quote inside e.g o'neill |
|
append seg $c |
|
} elseif {$mode eq "quoted"} { |
|
append seg $c |
|
|
|
} elseif {$mode eq "litquoted"} { |
|
append seg $c |
|
lappend segments $seg |
|
set seg "" |
|
set mode "syntax" |
|
} elseif {$mode eq "syntax"} { |
|
error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" |
|
} |
|
|
|
} elseif {$c in [list " " \t]} { |
|
if {$mode eq "syntax"} { |
|
#ignore |
|
} else { |
|
append seg $c |
|
} |
|
} else { |
|
if {$mode eq "syntax"} { |
|
error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" |
|
} |
|
if {$mode eq "unknown"} { |
|
set mode "unquoted" |
|
} |
|
append seg $c |
|
} |
|
if {$i == $sLen} { |
|
#end of data |
|
::tomlish::log::debug "End of data: mode='$mode'" |
|
switch -exact -- $mode { |
|
quoted { |
|
if {$c ne "\""} { |
|
error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" |
|
} |
|
if {$normalize} { |
|
lappend segments $seg |
|
} else { |
|
lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] |
|
#lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong |
|
} |
|
} |
|
litquoted { |
|
set trimmed_seg [::string trim $seg] |
|
if {[::string index $trimmed_seg end] ne "\'"} { |
|
error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" |
|
} |
|
lappend segments $seg |
|
} |
|
unquoted - unknown { |
|
lappend segments $seg |
|
} |
|
syntax { |
|
#ok - segment already lappended |
|
} |
|
default { |
|
lappend segments $seg |
|
} |
|
} |
|
} |
|
} |
|
foreach seg $segments { |
|
set trimmed [::string trim $seg [list " " \t]] |
|
#note - we explicitly allow 'empty' quoted strings '' & "" |
|
# (these are 'discouraged' but valid toml keys) |
|
#if {$trimmed in [list "''" "\"\""]} { |
|
# puts stderr "tablename_split. warning - Empty quoted string as tablename segment" |
|
#} |
|
if {$trimmed eq "" } { |
|
error "tablename_split. Empty segment found. tablename: '$tablename'" |
|
} |
|
} |
|
return $segments |
|
} |
|
|
|
proc unicode_escape_info {slashu} { |
|
#!todo |
|
# validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and |
|
# is a valid 'unicode scalar value' |
|
# ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive |
|
#expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} |
|
if {[::string match {\\u*} $slashu]} { |
|
set exp {^\\u([0-9a-fA-F]{4}$)} |
|
if {[regexp $exp $slashu match hex]} { |
|
if {[scan $hex %4x dec] != 1} { |
|
#why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? |
|
return [list err [list reason "Failed to convert '$hex' to decimal"]] |
|
} else { |
|
return [list ok [list char [subst -nocommand -novariable $slashu]]] |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] |
|
} |
|
} elseif {[::string match {\\U*} $slashu]} { |
|
set exp {^\\U([0-9a-fA-F]{8}$)} |
|
if {[regexp $exp $slashu match hex]} { |
|
if {[scan $hex %8x dec] != 1} { |
|
#why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? |
|
return [list err [list reason "Failed to convert '$hex' to decimal"]] |
|
} else { |
|
if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { |
|
return [list ok [list char [subst -nocommand -novariable $slashu]]] |
|
} else { |
|
return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] |
|
} |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] |
|
} |
|
} else { |
|
return [list err [list reason "Supplied string did not start with \\u or \\U" ]] |
|
} |
|
|
|
} |
|
|
|
proc unescape_string {str} { |
|
#note we can't just use Tcl subst because: |
|
# it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. |
|
# it would strip out backslashes inappropriately: e.g "\j" becomes just j |
|
# it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn |
|
# it replaces\<newline><whitespace> with a single whitespace |
|
#This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh |
|
|
|
set buffer "" |
|
set buffer4 "" ;#buffer for 4 hex characters following a \u |
|
set buffer8 "" ;#buffer for 8 hex characters following a \u |
|
|
|
set sLen [::string length $str] |
|
|
|
#we need to handle arbitrarily long sequences of backslashes. \\\\\ etc |
|
set slash_active 0 |
|
set unicode4_active 0 |
|
set unicode8_active 0 |
|
|
|
|
|
#!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? |
|
set i 0 |
|
for {} {$i < $sLen} {} { |
|
if {$i > 0} { |
|
set lastChar [::string index $str [expr {$i - 1}]] |
|
} else { |
|
set lastChar "" |
|
} |
|
|
|
set c [::string index $str $i] |
|
::tomlish::log::debug "unescape_string. got char $c" |
|
scan $c %c n |
|
if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { |
|
#we don't expect unescaped unicode characters from 0000 to 001F - |
|
#*except* for raw tab (which is whitespace) and newlines |
|
error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" |
|
} |
|
incr i ;#must incr here because we do'returns'inside the loop |
|
if {$c eq "\\"} { |
|
if {$slash_active} { |
|
append buffer "\\" |
|
set slash_active 0 |
|
} elseif {$unicode4_active} { |
|
error "unescape_string. unexpected case slash during unicode4 not yet handled" |
|
} elseif {$unicode8_active} { |
|
error "unescape_string. unexpected case slash during unicode8 not yet handled" |
|
} else { |
|
# don't output anything (yet) |
|
set slash_active 1 |
|
} |
|
} else { |
|
if {$unicode4_active} { |
|
if {[::string length $buffer4] < 4} { |
|
append buffer4 $c |
|
} |
|
if {[::string length $buffer4] == 4} { |
|
#we have a \uHHHH to test |
|
set unicode4_active 0 |
|
set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] |
|
if {[lindex $result 0] eq "ok"} { |
|
append buffer [dict get $result ok char] |
|
} else { |
|
error "unescape_string error: [lindex $result 1]" |
|
} |
|
} |
|
} elseif {$unicode8_active} { |
|
if {[::string length $buffer8] < 8} { |
|
append buffer8 $c |
|
} |
|
if {[::string length $buffer8] == 8} { |
|
#we have a \UHHHHHHHH to test |
|
set unicode8_active 0 |
|
set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] |
|
if {[lindex $result 0] eq "ok"} { |
|
append buffer [dict get $result ok char] |
|
} else { |
|
error "unescape_string error: [lindex $result 1]" |
|
} |
|
} |
|
} elseif {$slash_active} { |
|
set slash_active 0 |
|
set ctest [string map {{"} dq} $c] |
|
switch -exact -- $ctest { |
|
dq { |
|
set e "\\\"" |
|
append buffer [subst -nocommand -novariable $e] |
|
} |
|
b - t - n - f - r { |
|
set e "\\$c" |
|
append buffer [subst -nocommand -novariable $e] |
|
} |
|
u { |
|
set unicode4_active 1 |
|
set buffer4 "" |
|
} |
|
U { |
|
set unicode8_active 1 |
|
set buffer8 "" |
|
} |
|
default { |
|
set slash_active 0 |
|
|
|
append buffer "\\" |
|
append buffer $c |
|
} |
|
} |
|
} else { |
|
append buffer $c |
|
} |
|
} |
|
} |
|
#puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" |
|
if {$unicode4_active} { |
|
error "End of string reached before complete unicode escape sequence \uHHHH" |
|
} |
|
if {$unicode8_active} { |
|
error "End of string reached before complete unicode escape sequence \UHHHHHHHH" |
|
} |
|
if {$slash_active} { |
|
append buffer "\\" |
|
} |
|
return $buffer |
|
} |
|
|
|
proc normalize_key {rawkey} { |
|
set c1 [::string index $rawkey 0] |
|
set c2 [::string index $rawkey end] |
|
if {($c1 eq "'") && ($c2 eq "'")} { |
|
#single quoted segment. No escapes allowed within it. |
|
set key [::string range $rawkey 1 end-1] |
|
} elseif {($c1 eq "\"") && ($c2 eq "\"")} { |
|
#double quoted segment. Apply escapes. |
|
# |
|
set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only |
|
set key [::tomlish::utils::unescape_string $keydata] |
|
#set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. |
|
} else { |
|
set key $rawkey |
|
} |
|
return $key |
|
} |
|
|
|
proc string_to_slashu {string} { |
|
set rv {} |
|
foreach c [split $string {}] { |
|
scan $c %c c |
|
append rv {\u} |
|
append rv [format %.4X $c] |
|
} |
|
return $rv |
|
} |
|
|
|
#'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. |
|
proc nonprintable_to_slashu {s} { |
|
set res "" |
|
foreach i [split $s ""] { |
|
scan $i %c c |
|
|
|
set printable 0 |
|
if {($c>31) && ($c<127)} { |
|
set printable 1 |
|
} |
|
if {$printable} {append res $i} else {append res \\u[format %.4X $c]} |
|
} |
|
set res |
|
} ;#RS |
|
|
|
#check if str is valid for use as a toml bare key |
|
proc is_barekey {str} { |
|
if {[::string length $str] == 0} { |
|
return 0 |
|
} else { |
|
set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] |
|
if {[::string length $str] == $matches} { |
|
#all characters match the regexp |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'integer'. |
|
proc int_validchars1 {str} { |
|
set numchars [::string length $str] |
|
if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
#add support for hex,octal,binary 0x.. 0o.. 0b... |
|
proc int_validchars {str} { |
|
set numchars [::string length $str] |
|
if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
proc is_int {str} { |
|
set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] |
|
|
|
if {[tcl::string::length $str] == $matches} { |
|
#all characters in legal range |
|
|
|
# --------------------------------------- |
|
#check for leading zeroes in non 0x 0b 0o |
|
#first strip any +, - or _ (just for this test) |
|
set check [::string map {+ "" - "" _ ""} $str] |
|
if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { |
|
return 0 |
|
} |
|
# --------------------------------------- |
|
|
|
#check +,- only occur in the first position. |
|
if {[::string last - $str] > 0} { |
|
return 0 |
|
} |
|
if {[::string last + $str] > 0} { |
|
return 0 |
|
} |
|
set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores |
|
#use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) |
|
if {![string is integer -strict $numeric_value]} { |
|
return 0 |
|
} |
|
#!todo - check bounds only based on some config value |
|
#even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. |
|
#presumably very large numbers would have to be supplied in a toml file as strings. |
|
#Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max |
|
if {$numeric_value > $::tomlish::max_int} { |
|
return 0 |
|
} |
|
if {$numeric_value < $::tomlish::min_int} { |
|
return 0 |
|
} |
|
} else { |
|
return 0 |
|
} |
|
#Got this far - didn't find anything wrong with it. |
|
return 1 |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'float'. |
|
proc float_validchars {str} { |
|
set numchars [::string length $str] |
|
if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
#only allow lower case for these special values - as per Toml 1.0 spec |
|
if {$str ni {inf +inf -inf nan +nan -nan}} { |
|
return 0 |
|
} else { |
|
return 1 |
|
} |
|
} |
|
} |
|
|
|
proc is_float {str} { |
|
set matches [regexp -all {[eE0-9\_\-\+\.]} $str] |
|
#don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) |
|
if {$str in {inf +inf -inf nan +nan -nan}} { |
|
return 1 |
|
} |
|
|
|
if {[::string length $str] == $matches} { |
|
#all characters in legal range |
|
#A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) |
|
#Toml spec also disallows leading zeros in the exponent part |
|
#... but this seems less interoperable (some libraries generate leading zeroes in exponents) |
|
#for now we will allow leading zeros in exponents |
|
#!todo - configure 'strict' option to disallow? |
|
#first strip any +, - or _ (just for this test) |
|
set check [::string map {+ "" - "" _ ""} $str] |
|
set r {([0-9])*} |
|
regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E |
|
set z {([0])*} |
|
regexp $z $intpart leadingzeros |
|
if {[::string length $leadingzeros] > 1} { |
|
return 0 |
|
} |
|
#for floats, +,- may occur in multiple places |
|
#e.g -2E-22 +3e34 |
|
#!todo - check bounds ? |
|
|
|
#strip underscores for tcl double check |
|
set check [::string map {_ ""} $str] |
|
#string is double accepts inf nan +NaN etc. |
|
if {![::string is double $check]} { |
|
return 0 |
|
} |
|
|
|
} else { |
|
return 0 |
|
} |
|
#Got this far - didn't find anything wrong with it. |
|
return 1 |
|
} |
|
|
|
#test only that the characters in str are valid for the toml specified type 'datetime'. |
|
proc datetime_validchars {str} { |
|
set numchars [::string length $str] |
|
if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
proc is_datetime {str} { |
|
#e.g 1979-05-27T00:32:00-07:00 |
|
set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] |
|
if {[::string length $str] == $matches} { |
|
#all characters in legal range |
|
#!todo - use full RFC 3339 parser? |
|
lassign [split $str T] datepart timepart |
|
#!todo - what if the value is 'time only'? |
|
|
|
if {[catch {clock scan $datepart} err]} { |
|
puts stderr "tcl clock scan failed err:'$err'" |
|
return 0 |
|
} |
|
#!todo - verify time part is reasonable |
|
} else { |
|
return 0 |
|
} |
|
return 1 |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::utils ---}] |
|
} |
|
|
|
namespace eval tomlish::parse { |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::parse}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text |
|
|
|
variable state |
|
# states: |
|
# key-space, curly-space, array-space |
|
# value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring |
|
# |
|
# notes: |
|
# key-space i |
|
# only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack |
|
# value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax |
|
# |
|
#stateMatrix defines for each state, actions to take for each possible token. |
|
#single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. |
|
#dual-element actions are a push command and the name of the space to push on the stack. |
|
# - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) |
|
|
|
#test |
|
variable stateMatrix |
|
set stateMatrix [dict create] |
|
|
|
dict set stateMatrix\ |
|
key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} |
|
|
|
|
|
dict set stateMatrix\ |
|
curly-space {\ |
|
whitespace "curly-space"\ |
|
newline "curly-space"\ |
|
barekey {pushspace "itablekeyval-space"}\ |
|
itablequotedkey "itablekeyval-space"\ |
|
endinlinetable "popspace"\ |
|
startquote "itablequotedkey"\ |
|
comma "curly-space"\ |
|
eof "err"\ |
|
comment "err"\ |
|
} |
|
|
|
#REVIEW |
|
#toml spec looks like heading towards allowing newlines within inline tables |
|
#https://github.com/toml-lang/toml/issues/781 |
|
dict set stateMatrix\ |
|
curly-syntax {\ |
|
whitespace "curly-syntax"\ |
|
newline "curly-syntax"\ |
|
barekey {pushspace "itablekeyval-space"}\ |
|
itablequotedkey "itablekeyval-space"\ |
|
endinlinetable "popspace"\ |
|
startquote "itablequotedkey"\ |
|
comma "curly-space"\ |
|
eof "err"\ |
|
comment "err"\ |
|
} |
|
|
|
|
|
dict set stateMatrix\ |
|
value-expected {\ |
|
whitespace "value-expected"\ |
|
newline "err"\ |
|
eof "err"\ |
|
untyped-value "samespace"\ |
|
startquote "string"\ |
|
startmultiquote {pushspace "multistring-space"}\ |
|
startinlinetable {pushspace curly-space}\ |
|
comment "err"\ |
|
comma "err"\ |
|
startarray {pushspace array-space}\ |
|
} |
|
dict set stateMatrix\ |
|
array-space {\ |
|
whitespace "array-space"\ |
|
newline "array-space"\ |
|
eof "err"\ |
|
untyped-value "samespace"\ |
|
startarray {pushspace "array-space"}\ |
|
endarray "popspace"\ |
|
startquote "string"\ |
|
startmultiquote "multistring"\ |
|
comma "array-space"\ |
|
comment "array-space"\ |
|
} |
|
dict set stateMatrix\ |
|
array-syntax {\ |
|
whitespace "array-syntax"\ |
|
newline "array-syntax"\ |
|
untyped-value "samespace"\ |
|
startarray {pushspace array-space}\ |
|
endarray "popspace"\ |
|
startquote "string"\ |
|
startmultiquote "multistring"\ |
|
comma "array-space"\ |
|
comment "err"\ |
|
} |
|
|
|
|
|
dict set stateMatrix\ |
|
itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} |
|
#dict set stateMatrix\ |
|
# itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} |
|
dict set stateMatrix\ |
|
itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} |
|
dict set stateMatrix\ |
|
itablekeyval-space {} |
|
dict set stateMatrix\ |
|
itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} |
|
|
|
|
|
dict set stateMatrix\ |
|
keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} |
|
dict set stateMatrix\ |
|
keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} |
|
dict set stateMatrix\ |
|
keyval-space {} |
|
|
|
|
|
|
|
dict set stateMatrix\ |
|
quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} |
|
dict set stateMatrix\ |
|
string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} |
|
dict set stateMatrix\ |
|
stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} |
|
dict set stateMatrix\ |
|
multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} |
|
dict set stateMatrix\ |
|
multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} |
|
dict set stateMatrix\ |
|
tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} |
|
dict set stateMatrix\ |
|
baretablename {whitespace "NA" newline "err" equal "value-expected"} |
|
dict set stateMatrix\ |
|
tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} |
|
dict set stateMatrix\ |
|
tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} |
|
dict set stateMatrix\ |
|
tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} |
|
dict set stateMatrix\ |
|
end {} |
|
|
|
#see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push |
|
variable stateMatrix_orig { |
|
key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} |
|
curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} |
|
value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} |
|
array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} |
|
array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} |
|
keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} |
|
keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} |
|
keyval-space {} |
|
quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} |
|
string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} |
|
stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} |
|
multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} |
|
multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} |
|
tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} |
|
tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} |
|
tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} |
|
tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} |
|
end {} |
|
} |
|
#build a list of 'push triggers' from the stateMatrix |
|
# ie tokens which can push a new space onto spacestack |
|
set push_trigger_tokens [list] |
|
tcl::dict::for {s transitions} $stateMatrix { |
|
tcl::dict::for {token transition_to} $transitions { |
|
set action [lindex $transition_to 0] |
|
switch -exact -- $action { |
|
pushspace - zeropoppushspace { |
|
if {$token ni $push_trigger_tokens} { |
|
lappend push_trigger_tokens $token |
|
} |
|
} |
|
} |
|
} |
|
} |
|
puts stdout "push_trigger_tokens: $push_trigger_tokens" |
|
#!todo - hard code once stateMatrix finalised? |
|
|
|
|
|
#mainly for the -space states: |
|
#redirect to another state $c based on a state transition from $whatever to $b |
|
# e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. |
|
#this is useful as we often don't know state $b. e.g when it is decided by 'popspace' |
|
variable spacePopTransitions { |
|
array-space array-syntax |
|
curly-space curly-syntax |
|
keyval-space keytail |
|
itablekeyval-space itablevaltail |
|
} |
|
variable spacePushTransitions { |
|
keyval-space keyval-syntax |
|
itablekeyval-space itablekeyval-syntax |
|
array-space array-space |
|
curly-space curly-space |
|
key-space tablename |
|
} |
|
|
|
|
|
variable state_list |
|
|
|
namespace export tomlish toml |
|
namespace ensemble create |
|
|
|
proc getNextState {tokentype currentstate} { |
|
variable nest |
|
variable v |
|
|
|
variable spacePopTransitions |
|
variable spacePushTransitions |
|
variable last_space_action "none" |
|
variable last_space_type "none" |
|
variable state_list |
|
|
|
set result "" |
|
|
|
if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { |
|
set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] |
|
::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" |
|
switch -exact -- [lindex $transition_to 0] { |
|
popspace { |
|
spacestack pop |
|
set parent [spacestack peek] |
|
lassign $parent type target |
|
set last_space_action "pop" |
|
set last_space_type $type |
|
|
|
if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { |
|
set next [dict get $::tomlish::parse::spacePopTransitions $target] |
|
::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" |
|
} else { |
|
set next $target |
|
} |
|
set result $next |
|
} |
|
samespace { |
|
#note the same data as popspace (spacePopTransitions) is used here. |
|
set parent [spacestack peek] |
|
::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" |
|
lassign $parent type target |
|
if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { |
|
set next [dict get $::tomlish::parse::spacePopTransitions $target] |
|
::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" |
|
} else { |
|
set next $target |
|
} |
|
set result $next |
|
} |
|
zeropoppushspace { |
|
if {$nest > 0} { |
|
#pop back down to the root level (key-space) |
|
spacestack pop |
|
set parent [spacestack peek] |
|
lassign $parent type target |
|
set last_space_action "pop" |
|
set last_space_type $type |
|
|
|
#----- |
|
#standard pop |
|
set parentlevel [expr {$nest -1}] |
|
lappend v($parentlevel) [set v($nest)] |
|
incr nest -1 |
|
#----- |
|
} |
|
#re-entrancy |
|
|
|
#set next [list pushspace [lindex $transition_to 1]] |
|
set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 |
|
::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" |
|
set result [::tomlish::parse::getNextState $nexttokentype $tokentype] |
|
} |
|
pushspace { |
|
set target [lindex $transition_to 1] |
|
spacestack push [list space $target] |
|
set last_space_action "push" |
|
set last_space_type "space" |
|
|
|
#puts $::tomlish::parse::spacePushTransitions |
|
if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { |
|
set next [dict get $::tomlish::parse::spacePushTransitions $target] |
|
::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" |
|
} else { |
|
set next $target |
|
} |
|
set result $next |
|
} |
|
default { |
|
set result $transition_to |
|
} |
|
} |
|
} else { |
|
set result "nostate-err" |
|
|
|
} |
|
lappend state_list $result |
|
return $result |
|
} |
|
|
|
proc report_line {{line ""}} { |
|
variable linenum |
|
variable is_parsing |
|
if {$is_parsing} { |
|
if {$line eq ""} { |
|
set line $linenum |
|
} |
|
return "Line Number: $line" |
|
} else { |
|
#not in the middle of parsing tomlish text - return nothing. |
|
return "" |
|
} |
|
} |
|
|
|
#produce a *slightly* more readable string rep of the nest for puts etc. |
|
proc nest_pretty1 {list} { |
|
set prettier "{" |
|
|
|
foreach el $list { |
|
if { [lindex $el 0] eq "NEWLINE"} { |
|
append prettier "[list $el]\n" |
|
} elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { |
|
append prettier [nest_pretty1 $el] |
|
} else { |
|
append prettier "[list $el] " |
|
} |
|
} |
|
append prettier "}" |
|
return $prettier |
|
} |
|
|
|
proc set_tokenType {t} { |
|
variable tokenType |
|
variable tokenType_list |
|
if {![info exists tokenType]} { |
|
set tokenType "" |
|
} |
|
lappend tokenType_list $t |
|
set tokenType $t |
|
} |
|
|
|
proc switch_tokenType {t} { |
|
variable tokenType |
|
variable tokenType_list |
|
lset tokenType_list end $t |
|
set tokenType $t |
|
} |
|
|
|
proc get_tokenType {} { |
|
variable tokenType |
|
return $tokenType |
|
} |
|
|
|
proc _shortcircuit_startquotesequence {} { |
|
variable tok |
|
variable i |
|
set toklen [::string length $tok] |
|
if {$toklen == 1} { |
|
set_tokenType "startquote" |
|
incr i -1 |
|
return -level 2 1 |
|
} elseif {$toklen == 2} { |
|
set_tokenType "startquote" |
|
set tok "\"" |
|
incr i -2 |
|
return -level 2 1 |
|
} |
|
} |
|
|
|
#return a list of 0 1 or 2 tokens |
|
#tomlish::parse::tok |
|
proc tok {s} { |
|
variable nest |
|
variable v |
|
variable i |
|
variable tok |
|
variable type ;#character type |
|
variable state ;#FSM |
|
|
|
set resultlist [list] |
|
|
|
variable tokenType |
|
variable tokenType_list |
|
|
|
|
|
variable endToken |
|
set sLen [::string length $s] |
|
|
|
variable lastChar |
|
|
|
variable braceCount |
|
variable bracketCount |
|
|
|
|
|
#------------------------------ |
|
#Previous run found another (presumably single-char) token |
|
variable token_waiting |
|
if {[dict size $token_waiting]} { |
|
set tokenType [dict get $token_waiting type] |
|
set tok [dict get $token_waiting tok] |
|
dict unset token_waiting type |
|
dict unset token_waiting tok |
|
return 1 |
|
} |
|
#------------------------------ |
|
|
|
set slash_active 0 |
|
set quote 0 |
|
set c "" |
|
set multi_dquote "" |
|
for {} {$i < $sLen} {} { |
|
if {$i > 0} { |
|
set lastChar [string index $s [expr {$i - 1}]] |
|
} else { |
|
set lastChar "" |
|
} |
|
|
|
set c [string index $s $i] |
|
#puts "got char $c during tokenType '$tokenType'" |
|
incr i ;#must incr here because we do'returns'inside the loop |
|
|
|
set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] |
|
switch -exact -- $ctest { |
|
# { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
|
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
barekey { |
|
error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" |
|
} |
|
whitespace { |
|
# hash marks end of whitespace token |
|
#do a return for the whitespace, set token_waiting |
|
#dict set token_waiting type comment |
|
#dict set token_waiting tok "" |
|
incr i -1 ;#leave comment for next run |
|
return 1 |
|
} |
|
untyped-value { |
|
#REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? |
|
#we will accept a comment marker as an immediate terminator of the untyped-value. |
|
incr i -1 |
|
return 1 |
|
} |
|
default { |
|
#quotedkey, string, multistring |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
#start of token if we're not in a token |
|
set_tokenType comment |
|
set tok "" ;#The hash is not part of the comment data |
|
} |
|
} |
|
lc { |
|
set multi_dquote "" ;#!! |
|
#test jmn2024 |
|
#left curly brace |
|
try { |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - stringpart { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename { |
|
error "unexpected tablename problem" |
|
#$slash_active not relevant to this tokentype |
|
#change the tokenType |
|
switch_tokenType "starttablearrayname" |
|
set tok "" ;#no output into the tomlish list for this token |
|
#any following whitespace is part of the tablearrayname, so return now |
|
return 1 |
|
} |
|
comment { |
|
if {$slash_active} {append tok "\\"} |
|
append tok "\[" |
|
} |
|
default { |
|
#end any other token. |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
value-expected { |
|
#switch last key to tablename?? |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
if {$slash_active} { |
|
set tok "\\\{" |
|
} else { |
|
set tok "\{" |
|
} |
|
} |
|
key-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
array-space - array-syntax { |
|
#nested anonymous inline table |
|
set_tokenType "startinlinetable" |
|
set tok "\{" |
|
return 1 |
|
} |
|
default { |
|
error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} on error {em} { |
|
error $em |
|
} finally { |
|
set slash_active 0 |
|
} |
|
|
|
} |
|
rc { |
|
set multi_dquote "" ;#!! |
|
#right curly brace |
|
try { |
|
if {[string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - stringpart - comment { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
tablename { |
|
if {$slash_active} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
dict set token_waiting type endinlinetable |
|
dict set token_waiting tok "" |
|
return 1 |
|
} |
|
tablearrayname { |
|
if {$slash_active} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
dict set token_waiting type endtablearrayname |
|
dict set token_waiting tok "" |
|
return 1 |
|
} |
|
itablevaltail { |
|
|
|
} |
|
default { |
|
#end any other token |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
value-expected { |
|
#invalid - but allow parser statemachine to report it. |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
key-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
tablename { |
|
#e.g [] - empty tablename - allowed or not? |
|
#empty tablename/tablearrayname ? |
|
error "unexpected tablename problem" |
|
|
|
set_tokenType "endinlinetable" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
tablearrayname { |
|
error "unexpected tablearrayname problem" |
|
set_tokenType "endinlinetable" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
curly-syntax - curly-space { |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
array-syntax - array-space { |
|
#invalid |
|
set_tokenType "endinlinetable" |
|
set tok "\}" |
|
return 1 |
|
} |
|
itablevaltail { |
|
set_tokenType "endinlinetable" |
|
set tok "" |
|
#we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 |
|
incr i -1 |
|
return 1 |
|
} |
|
itablekeyval-syntax { |
|
error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" |
|
} |
|
default { |
|
#JMN2024b keytail? |
|
error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} on error {em} { |
|
error $em |
|
} finally { |
|
set slash_active 0 |
|
} |
|
|
|
} |
|
lb { |
|
set multi_dquote "" ;#!! |
|
#left square bracket |
|
try { |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - stringpart { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename { |
|
#$slash_active not relevant to this tokentype |
|
#change the tokenType |
|
switch_tokenType "starttablearrayname" |
|
set tok "" ;#no output into the tomlish list for this token |
|
#any following whitespace is part of the tablearrayname, so return now |
|
return 1 |
|
} |
|
comment { |
|
if {$slash_active} {append tok "\\"} |
|
append tok "\[" |
|
} |
|
default { |
|
#end any other token. |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
value-expected { |
|
set_tokenType "startarray" |
|
set tok "\[" |
|
return 1 |
|
} |
|
key-space { |
|
#table name |
|
#assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray |
|
#note that a starttablearrayname token may contain whitespace between the brackets |
|
# e.g \[ \[ |
|
set_tokenType "starttablename" |
|
set tok "" ;#there is no output into the tomlish list for this token |
|
} |
|
array-space - array-syntax { |
|
#nested array? |
|
set_tokenType "startarray" |
|
set tok "\[" |
|
return 1 |
|
#error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
default { |
|
error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} on error {em} { |
|
error $em |
|
} finally { |
|
set slash_active 0 |
|
} |
|
} |
|
rb { |
|
set multi_dquote "" ;#!! |
|
#right square bracket |
|
try { |
|
|
|
if {[string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - stringpart - comment { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
tablename { |
|
if {$slash_active} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
dict set token_waiting type endtablename |
|
dict set token_waiting tok "" |
|
return 1 |
|
} |
|
tablearraynames { |
|
if {$slash_active} {append tok "\\"} |
|
#invalid! - but leave for datastructure loading stage to catch |
|
dict set token_waiting type endtablearrayname |
|
dict set token_waiting tok "" |
|
return 1 |
|
} |
|
default { |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
switch -exact -- $state { |
|
value-expected { |
|
#invalid - but allow parser statemachine to report it. |
|
set_tokenType "endarray" |
|
set tok "\]" |
|
return 1 |
|
} |
|
key-space { |
|
#invalid - but allow parser statemachine to report it. ? |
|
set_tokenType "endarray" |
|
set tok "\]" |
|
return 1 |
|
} |
|
tablename { |
|
#e.g [] - empty tablename - allowed or not? |
|
#empty tablename/tablearrayname ? |
|
error "unexpected tablename problem" |
|
|
|
set_tokenType "endtablename" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
tablearrayname { |
|
error "unexpected tablearrayname problem" |
|
set_tokenType "endtablearray" |
|
set tok "" ;#no output into the tomlish list for this token |
|
return 1 |
|
} |
|
array-syntax - array-space { |
|
set_tokenType "endarray" |
|
set tok "\]" |
|
return 1 |
|
} |
|
default { |
|
error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} on error {em} { |
|
error $em |
|
} finally { |
|
set slash_active 0 |
|
} |
|
} |
|
bsl { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
#backslash |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - litstring - multilitstring - comment - tablename - tablearrayname { |
|
if {$slash_active} { |
|
set slash_active 0 |
|
append tok "\\\\" |
|
} else { |
|
set slash_active 1 |
|
} |
|
} |
|
stringpart { |
|
if {$slash_active} { |
|
#assert - quotes empty - or we wouldn't have slash_active |
|
set slash_active 0 |
|
append tok "\\\\" |
|
} else { |
|
append tok $dquotes |
|
set slash_active 1 |
|
} |
|
} |
|
whitespace { |
|
if {$state eq "multistring-space"} { |
|
#end whitespace token |
|
incr i -1 |
|
return 1 |
|
} else { |
|
error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" |
|
} |
|
} |
|
barekey { |
|
error "Unexpected backslash during barekey. [tomlish::parse::report_line]" |
|
} |
|
default { |
|
error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
if {$state eq "multistring-space"} { |
|
set slash_active 1 |
|
} else { |
|
error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
dq { |
|
#double quote |
|
try { |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
set toklen [::string length $tok] |
|
if {$toklen == 1} { |
|
append tok $c |
|
} elseif {$toklen == 2} { |
|
append tok $c |
|
set_tokenType "startmultiquote" |
|
return 1 |
|
} else { |
|
error "unexpected token length in 'startquotesequence'" |
|
} |
|
} |
|
endquotesequence { |
|
set toklen [::string length $tok] |
|
if {$toklen == 1} { |
|
append tok $c |
|
} elseif {$toklen == 2} { |
|
append tok $c |
|
set_tokenType "endmultiquote" |
|
return 1 |
|
} else { |
|
error "unexpected token length in 'endquotesequence'" |
|
} |
|
} |
|
string { |
|
if {$slash_active} { |
|
append tok "\\" |
|
append tok $c |
|
} else { |
|
#unescaped quote always terminates a string? |
|
dict set token_waiting type endquote |
|
dict set token_waiting tok "\"" |
|
return 1 |
|
} |
|
} |
|
stringpart { |
|
#sub element of multistring |
|
if {$slash_active} { |
|
append tok "\\" |
|
append tok $c |
|
} else { |
|
#incr i -1 |
|
|
|
if {$multi_dquote eq "\"\""} { |
|
dict set token_waiting type endmultiquote |
|
dict set token_waiting tok "\"\"\"" |
|
set multi_dquote "" |
|
return 1 |
|
} else { |
|
append multi_dquote "\"" |
|
} |
|
} |
|
} |
|
whitespace { |
|
switch -exact -- $state { |
|
multistring-space { |
|
#REVIEW |
|
if {$multi_dquote eq "\"\""} { |
|
dict set token_waiting type endmultiquote |
|
dict set token_waiting tok "\"\"\"" |
|
set multi_dquote "" |
|
return 1 |
|
} else { |
|
append multi_dquote "\"" |
|
} |
|
} |
|
value-expected { |
|
if {$multi_dquote eq "\"\""} { |
|
dict set token_waiting type startmultiquote |
|
dict set token_waiting tok "\"\"\"" |
|
set multi_dquote "" |
|
return 1 |
|
} else { |
|
#end whitespace token and reprocess |
|
incr i -1 |
|
return 1 |
|
#append multi_dquote "\"" |
|
} |
|
} |
|
default { |
|
dict set token_waiting type startquote |
|
dict set token_waiting tok "\"" |
|
return 1 |
|
} |
|
} |
|
} |
|
comment { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
quotedkey - itablequotedkey { |
|
if {$slash_active} { |
|
append tok "\\" |
|
append tok $c |
|
} else { |
|
dict set token_waiting type endquote |
|
dict set token_waiting tok "\"" |
|
return 1 |
|
} |
|
} |
|
tablename - tablearrayname { |
|
if {$slash_active} {append tok "\\"} |
|
append tok $c |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 ;## |
|
return 1 |
|
} |
|
default { |
|
error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
#$slash_active not relevant when no tokenType |
|
#token is string only if we're expecting a value at this point |
|
switch -exact -- $state { |
|
value-expected - array-space { |
|
#!? start looking for possible multistartquote |
|
#set_tokenType startquote |
|
#set tok $c |
|
#return 1 |
|
set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote |
|
set tok $c |
|
} |
|
multistring-space { |
|
#REVIEW |
|
if {$multi_dquote eq "\"\""} { |
|
dict set token_waiting type endmultiquote |
|
dict set token_waiting tok "\"\"\"" |
|
set multi_dquote "" |
|
return 1 |
|
} else { |
|
append multi_dquote "\"" |
|
} |
|
} |
|
key-space { |
|
set tokenType startquote |
|
set tok $c |
|
return 1 |
|
} |
|
curly-space { |
|
set tokenType startquote |
|
set tok $c |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
set_tokenType $state |
|
set tok $c |
|
} |
|
default { |
|
error "Unexpected quote during state '$state' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} |
|
} on error {em} { |
|
error $em |
|
} finally { |
|
set slash_active 0 |
|
} |
|
} |
|
= { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
|
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - comment - quotedkey { |
|
#for these tokenTypes an = is just data. |
|
append tok $c |
|
} |
|
stringpart { |
|
append tok $dquotes$c |
|
} |
|
whitespace { |
|
dict set token_waiting type equal |
|
dict set token_waiting tok = |
|
return 1 |
|
} |
|
barekey { |
|
dict set token_waiting type equal |
|
dict set token_waiting tok = |
|
return 1 |
|
} |
|
default { |
|
error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType stringpart |
|
set tok ${dquotes}= |
|
} |
|
default { |
|
set_tokenType equal |
|
set tok = |
|
return 1 |
|
} |
|
} |
|
} |
|
} |
|
cr { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
# \r carriage return |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
stringpart { |
|
append tok $dquotes$c |
|
} |
|
default { |
|
#!todo - error out if cr inappropriate for tokenType |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
#lf may be appended if next |
|
#review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) |
|
set_tokenType newline |
|
set tok cr |
|
} |
|
} |
|
lf { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
# \n newline |
|
if {[::string length $tokenType]} { |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
newline { |
|
#this lf is the trailing part of a crlf |
|
append tok lf |
|
return 1 |
|
} |
|
stringpart { |
|
if {$dquotes ne ""} { |
|
append tok $dquotes |
|
incr i -1 |
|
return 1 |
|
} else { |
|
dict set token_waiting type newline |
|
dict set token_waiting tok lf |
|
return 1 |
|
} |
|
} |
|
default { |
|
#newline ends all other tokens. |
|
#note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) |
|
#note for whitespace: |
|
# we will use the convention that \n terminates the current whitespace even if whitespace follows |
|
# ie whitespace is split into separate whitespace tokens at each newline |
|
|
|
#puts "-------------- newline lf during tokenType $tokenType" |
|
dict set token_waiting type newline |
|
dict set token_waiting tok lf |
|
return 1 |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_tokenType newline |
|
set tok lf |
|
return 1 |
|
} |
|
} |
|
} |
|
, { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - comment - quotedkey - tablename - tablearrayname { |
|
append tok $c |
|
} |
|
stringpart { |
|
append tok $dquotes$c |
|
} |
|
default { |
|
dict set token_waiting type comma |
|
dict set token_waiting tok "," |
|
return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType stringpart |
|
set tok "," |
|
} |
|
multiliteral-space { |
|
set_tokenType literalpart |
|
set tok "," |
|
} |
|
default { |
|
set_tokenType comma |
|
set tok "," |
|
return 1 |
|
} |
|
} |
|
} |
|
} |
|
. { |
|
set multi_dquote "" ;#!! |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
if {[::string length $tokenType]} { |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
string - stringpart - comment - quotedkey - untyped-value { |
|
append tok $c |
|
} |
|
baretablename - tablename - tablearrayname { |
|
#subtable - split later - review |
|
append tok $c |
|
} |
|
barekey { |
|
#we need to transition the barekey to become a structured table name ??? review |
|
switch_tokenType tablename |
|
incr i -1 |
|
|
|
#error "barekey period unimplemented" |
|
} |
|
default { |
|
error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
#dict set token_waiting type period |
|
#dict set token_waiting tok "." |
|
#return 1 |
|
} |
|
} |
|
} else { |
|
switch -exact -- $state { |
|
multistring-space { |
|
set_tokenType stringpart |
|
set tok "." |
|
} |
|
multiliteral-space { |
|
set_tokenType literalpart |
|
set tok "." |
|
} |
|
default { |
|
set_tokenType untyped-value |
|
set tok "." |
|
} |
|
} |
|
} |
|
|
|
} |
|
" " { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
if {[::string length $tokenType]} { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
barekey { |
|
#whitespace is a terminator for bare keys |
|
#dict set token_waiting type whitespace |
|
#dict set token_waiting tok $c |
|
incr i -1 |
|
return 1 |
|
} |
|
untyped-value { |
|
#unquoted values (int,date,float etc) are terminated by whitespace |
|
#dict set token_waiting type whitespace |
|
#dict set token_waiting tok $c |
|
incr i -1 |
|
return 1 |
|
} |
|
comment { |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
append tok $c |
|
} |
|
quotedkey - string { |
|
if {$had_slash} { |
|
append tok "\\" |
|
} |
|
#if {$dquotes eq "\""} { |
|
#} |
|
append tok $c |
|
} |
|
whitespace { |
|
append tok $c |
|
} |
|
stringpart { |
|
if {$had_slash} { |
|
#REVIEW |
|
incr i -2 |
|
return 1 |
|
} else { |
|
#split into STRINGPART aaa WS " " |
|
#keeping WS separate allows easier processing of CONT stripping |
|
append tok $dquotes |
|
incr i -1 |
|
return 1 |
|
} |
|
} |
|
starttablename { |
|
incr i -1 |
|
return 1 |
|
} |
|
starttablearrayname { |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename - tablearrayname { |
|
#include whitespace in the tablename/tablearrayname |
|
#Will need to be normalized upon interpreting the tomlish as a datastructure |
|
append tok $c |
|
} |
|
default { |
|
error "Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
if {$slash_active} { |
|
set slash_active 0 |
|
} |
|
switch -exact -- $state { |
|
tablename - tablearrayname { |
|
#tablename can have leading,trailing and interspersed whitespace! |
|
#These will not be treated as whitespace tokens, instead forming part of the name. |
|
set_tokenType $state |
|
if {$had_slash} { |
|
set tok "\\$c" |
|
} else { |
|
set tok $c |
|
} |
|
} |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
if {$dquotes ne ""} { |
|
set_tokenType "stringpart" |
|
set tok $dquotes |
|
incr i -1 |
|
return |
|
} |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
default { |
|
if {$had_slash} { |
|
error "unexpected backslash [tomlish::parse::report_line]" |
|
} |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
} |
|
} |
|
tab { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
|
|
if {[::string length $tokenType]} { |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
barekey { |
|
#whitespace is a terminator for bare keys |
|
incr i -1 |
|
#set token_waiting type whitespace |
|
#set token_waiting tok $c |
|
return 1 |
|
} |
|
untyped_value { |
|
#unquoted values (int,date,float etc) are terminated by whitespace |
|
#dict set token_waiting type whitespace |
|
#dict set token_waiting tok $c |
|
incr i -1 |
|
return 1 |
|
} |
|
quotedkey { |
|
append tok $c |
|
} |
|
string - comment - whitespace { |
|
append tok $c |
|
} |
|
stringpart { |
|
append tok $dquotes$c |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 |
|
return 1 |
|
} |
|
tablename - tablearraynames { |
|
#include whitespace in the tablename/tablearrayname |
|
#Will need to be normalized upon interpreting the tomlish as a datastructure |
|
append tok $c |
|
} |
|
default { |
|
error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
if {$slash_active} { |
|
set slash_active 0 |
|
} |
|
switch -exact -- $state { |
|
tablename - tablearrayname { |
|
#tablename can have leading,trailing and interspersed whitespace! |
|
#These will not be treated as whitespace tokens, instead forming part of the name. |
|
set_tokenType $state |
|
set tok $c |
|
} |
|
multistring-space { |
|
if {$had_slash} { |
|
set_tokenType "continuation" |
|
set tok "\\" |
|
incr i -1 |
|
return 1 |
|
} else { |
|
if {$dquotes ne ""} { |
|
set_tokenType stringpart |
|
set tok $dquotes |
|
incr i -1 |
|
return 1 |
|
} else { |
|
set_tokenType whitespace |
|
append tok $c |
|
} |
|
} |
|
} |
|
default { |
|
set_tokenType "whitespace" |
|
append tok $c |
|
} |
|
} |
|
} |
|
} |
|
bom { |
|
#BOM (Byte Order Mark) - ignored by token consumer |
|
set_tokenType "bom" |
|
set tok "\uFEFF" |
|
return 1 |
|
} |
|
default { |
|
set dquotes $multi_dquote |
|
set multi_dquote "" ;#!! |
|
|
|
if {[::string length $tokenType]} { |
|
if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. |
|
set slash_active 0 |
|
switch -exact -- $tokenType { |
|
startquotesequence { |
|
_shortcircuit_startquotesequence |
|
} |
|
endquotesequence { |
|
puts stderr "endquotesequence: $tok" |
|
} |
|
whitespace { |
|
incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. |
|
return 1 |
|
} |
|
barekey { |
|
if {[tomlish::utils::is_barekey $c]} { |
|
append tok $c |
|
} else { |
|
error "Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" |
|
} |
|
} |
|
starttablename - starttablearrayname { |
|
incr i -1 |
|
#allow statemachine to set context for subsequent chars |
|
return 1 |
|
} |
|
stringpart { |
|
append tok $dquotes$c |
|
} |
|
default { |
|
#e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname |
|
append tok $c |
|
} |
|
} |
|
} else { |
|
set had_slash $slash_active |
|
set slash_active 0 |
|
switch -exact -- $state { |
|
key-space - curly-space - curly-syntax { |
|
#if no currently active token - assume another key value pair |
|
if {[tomlish::utils::is_barekey $c]} { |
|
set_tokenType "barekey" |
|
append tok $c |
|
} else { |
|
error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" |
|
} |
|
} |
|
multistring-space { |
|
set_tokenType "stringpart" |
|
if {$had_slash} { |
|
#assert - we don't get had_slash and dquotes at same time |
|
set tok \\$c |
|
} else { |
|
set tok $dquotes$c |
|
} |
|
} |
|
tablename { |
|
set_tokenType "tablename" |
|
set tok $c |
|
} |
|
tablearrayname { |
|
set_tokenType "tablearrayname" |
|
set tok $c |
|
} |
|
default { |
|
set_tokenType "untyped-value" |
|
set tok $c |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
} |
|
|
|
#run out of characters (eof) |
|
if {[::string length $tokenType]} { |
|
#check for invalid ending tokens |
|
#if {$state eq "err"} { |
|
# error "Reached end of data whilst tokenType = '$tokenType'. INVALID" |
|
#} |
|
if {$tokenType eq "startquotesequence"} { |
|
set toklen [::string length $tok] |
|
if {$toklen == 1} { |
|
#invalid |
|
#eof with open string |
|
eror "eof reached without closing quote for string. [tomlish::parse::report_line]" |
|
} elseif {$toklen == 2} { |
|
#valid |
|
#we ended in a double quote, not actually a startquoteseqence - effectively an empty string |
|
switch_tokenType "startquote" |
|
incr i -1 |
|
#dict set token_waiting type "string" |
|
#dict set token_waiting tok "" |
|
return 1 |
|
} |
|
} |
|
dict set token_waiting type "eof" |
|
dict set token_waiting tok "eof" |
|
return 1 |
|
} else { |
|
::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" |
|
set tokenType "eof" |
|
set tok "eof" |
|
} |
|
return 0 |
|
} |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::parse ---}] |
|
} |
|
|
|
tcl::namespace::eval tomlish::app { |
|
variable applist [list encoder decoder test] |
|
|
|
#*** !doctools |
|
#[subsection {Namespace tomlish::app}] |
|
#[para] |
|
#[list_begin definitions] |
|
|
|
proc decoder {args} { |
|
#*** !doctools |
|
#[call app::[fun decoder] [arg args]] |
|
#[para] read toml on stdin until EOF |
|
#[para] on error - returns non-zero exit code and writes error on stderr |
|
#[para] on success - returns zero exit code and writes JSON encoding of the data on stdout |
|
#[para] This decoder is intended to be compatible with toml-test |
|
|
|
set opts [dict merge [dict create] $args] |
|
#fconfigure stdin -encoding utf-8 |
|
fconfigure stdin -translation binary |
|
#Just slurp it all - presumably we are not handling massive amounts of data on stdin. |
|
# - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. |
|
if {[catch { |
|
set toml [read stdin] |
|
}]} { |
|
exit 2 ;#read error |
|
} |
|
try { |
|
set j [::tomlish::toml_to_json $toml] |
|
} on error {em} { |
|
puts stderr "decoding failed: '$em'" |
|
exit 1 |
|
} |
|
puts -nonewline stdout $j |
|
exit 0 |
|
} |
|
|
|
proc encoder {args} { |
|
#*** !doctools |
|
#[call app::[fun encoder] [arg args]] |
|
#[para] read JSON on stdin until EOF |
|
#[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation |
|
#[para] return zero exitcode and TOML data on stdout if JSON data can be converted. |
|
#[para] This encoder is intended to be compatible with toml-test |
|
|
|
set opts [dict merge [dict create] $args] |
|
fconfigure stdin -translation binary |
|
if {[catch { |
|
set json [read stdin] |
|
}]} { |
|
exit 2 ;#read error |
|
} |
|
try { |
|
set toml [::tomlish::json_to_toml $json] |
|
} on error {em} { |
|
puts stderr "encoding failed: '$em'" |
|
exit 1 |
|
} |
|
puts -nonewline stdout $toml |
|
exit 0 |
|
} |
|
|
|
proc test {args} { |
|
set opts [dict merge [dict create] $args] |
|
|
|
package require test::tomlish |
|
if {[dict exists $opts -suite]} { |
|
test::tomlish::suite [dict get $opts -suite] |
|
} |
|
test::tomlish::run |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::app ---}] |
|
} |
|
|
|
proc ::tomlish::appnames {} { |
|
set applist [list] |
|
foreach cmd [info commands ::tomlish::app::*] { |
|
lappend applist [namespace tail $cmd] |
|
} |
|
return $applist |
|
} |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Secondary API namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval tomlish::lib { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
namespace path [namespace parent] |
|
#*** !doctools |
|
#[subsection {Namespace tomlish::lib}] |
|
#[para] Secondary functions that are part of the API |
|
#[list_begin definitions] |
|
|
|
#proc utility1 {p1 args} { |
|
# #*** !doctools |
|
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] |
|
# #[para]Description of utility1 |
|
# return 1 |
|
#} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace tomlish::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
if {$argc > 0} { |
|
puts stderr "argc: $argc args: $argv" |
|
|
|
if {($argc == 1)} { |
|
if {[::string tolower $argv] in {help -help h -h}} { |
|
puts stdout "Usage: -app <appname> where appname one of:[tomlish::appnames]" |
|
exit 0 |
|
} else { |
|
puts stderr "Argument '$argv' not understood. Try -help" |
|
exit 1 |
|
} |
|
} |
|
set opts [dict create] |
|
set opts [dict merge $opts $argv] |
|
|
|
set opts_understood [list -app ] |
|
if {"-app" in [dict keys $opts]} { |
|
#Don't vet the remaining opts - as they are interpreted by each app |
|
} else { |
|
foreach key [dict keys $opts] { |
|
if {$key ni $opts_understood} { |
|
puts stderr "Option '$key' not understood" |
|
exit 1 |
|
} |
|
} |
|
} |
|
if {[dict exists $opts -app]} { |
|
set app [dict get $opts -app] |
|
if {$app ni [tomlish::appnames]} { |
|
puts stderr "app '[dict get $opts -app]' not found" |
|
exit 1 |
|
} |
|
tomlish::app::$app {*}$opts |
|
} |
|
} |
|
|
|
## Ready |
|
package provide tomlish [namespace eval tomlish { |
|
variable pkg tomlish |
|
variable version |
|
set version 1.1.1 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|