From dbb7360568a04c23e0b4fce712338a5e8535df3a Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Tue, 10 Sep 2024 03:20:11 +1000 Subject: [PATCH] ansi fixes + tomlish --- .../modules/include_modules.config | 2 + src/bootsupport/modules/overtype-1.6.5.tm | 458 ++- src/bootsupport/modules/punk/ansi-0.1.1.tm | 298 +- src/bootsupport/modules/punk/console-0.1.1.tm | 22 +- src/bootsupport/modules/punk/ns-0.1.0.tm | 6 +- src/bootsupport/modules/test/tomlish-1.1.1.tm | Bin 0 -> 24693 bytes src/bootsupport/modules/textblock-0.1.1.tm | 37 +- src/bootsupport/modules/tomlish-1.1.1.tm | 3357 +++++++++++++++++ src/modules/punk/aliascore-999999.0a1.0.tm | 18 +- src/modules/punk/ansi-999999.0a1.0.tm | 298 +- src/modules/punk/blockletter-999999.0a1.0.tm | 6 +- src/modules/punk/config-0.1.tm | 19 +- src/modules/punk/console-999999.0a1.0.tm | 22 +- src/modules/punk/nav/fs-999999.0a1.0.tm | 5 +- src/modules/punk/ns-999999.0a1.0.tm | 6 +- src/modules/punk/repl-0.1.tm | 66 +- .../punk/repl/codethread-999999.0a1.0.tm | 26 +- src/modules/punk/rest-999999.0a1.0.tm | 296 ++ src/modules/punk/rest-buildversion.txt | 3 + src/modules/shellfilter-0.1.9.tm | 58 +- src/modules/textblock-999999.0a1.0.tm | 37 +- .../modules/include_modules.config | 4 +- .../src/bootsupport/modules/overtype-1.6.5.tm | 458 ++- .../bootsupport/modules/punk/ansi-0.1.1.tm | 298 +- .../bootsupport/modules/punk/console-0.1.1.tm | 22 +- .../src/bootsupport/modules/punk/ns-0.1.0.tm | 6 +- .../bootsupport/modules/test/tomlish-1.1.1.tm | Bin 0 -> 24693 bytes .../bootsupport/modules/textblock-0.1.1.tm | 37 +- .../src/bootsupport/modules/tomlish-1.1.1.tm | 3357 +++++++++++++++++ .../src/bootsupport/modules/uuid-1.0.8.tm | 246 ++ .../modules/include_modules.config | 4 +- .../src/bootsupport/modules/overtype-1.6.5.tm | 458 ++- .../bootsupport/modules/punk/ansi-0.1.1.tm | 298 +- .../bootsupport/modules/punk/console-0.1.1.tm | 22 +- .../src/bootsupport/modules/punk/ns-0.1.0.tm | 6 +- .../bootsupport/modules/test/tomlish-1.1.1.tm | Bin 0 -> 24693 bytes .../bootsupport/modules/textblock-0.1.1.tm | 37 +- .../src/bootsupport/modules/tomlish-1.1.1.tm | 3357 +++++++++++++++++ .../src/bootsupport/modules/uuid-1.0.8.tm | 246 ++ src/testansi/palettes/AppleII.ans | 9 + src/testansi/palettes/Solarized.ans | 12 + src/testansi/palettes/Solarized_light.ans | 13 + src/testansi/palettes/VSCode.ans | 6 + src/testansi/palettes/Windows.ans | 6 + src/testansi/palettes/windows_legacy.ans | 7 + src/vendormodules/overtype-1.6.5.tm | 458 ++- src/vendormodules/test/tomlish-1.1.1.tm | Bin 24365 -> 24693 bytes src/vendormodules/tomlish-1.1.1.tm | 1092 ++++-- src/vfs/_vfscommon/modules/overtype-1.6.5.tm | 458 ++- .../modules/punk/aliascore-0.1.0.tm | 18 +- src/vfs/_vfscommon/modules/punk/ansi-0.1.1.tm | 298 +- .../modules/punk/blockletter-0.1.0.tm | 6 +- src/vfs/_vfscommon/modules/punk/config-0.1.tm | 19 +- .../_vfscommon/modules/punk/console-0.1.1.tm | 22 +- .../_vfscommon/modules/punk/nav/fs-0.1.0.tm | 5 +- src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm | 6 +- src/vfs/_vfscommon/modules/punk/repl-0.1.tm | 66 +- .../modules/punk/repl/codethread-0.1.0.tm | 26 +- src/vfs/_vfscommon/modules/punk/rest-0.1.0.tm | 296 ++ .../_vfscommon/modules/shellfilter-0.1.9.tm | 58 +- .../_vfscommon/modules/test/tomlish-1.1.1.tm | Bin 24365 -> 24693 bytes src/vfs/_vfscommon/modules/textblock-0.1.1.tm | 37 +- src/vfs/_vfscommon/modules/tomlish-1.1.1.tm | 1092 ++++-- 63 files changed, 15921 insertions(+), 1985 deletions(-) create mode 100644 src/bootsupport/modules/test/tomlish-1.1.1.tm create mode 100644 src/bootsupport/modules/tomlish-1.1.1.tm create mode 100644 src/modules/punk/rest-999999.0a1.0.tm create mode 100644 src/modules/punk/rest-buildversion.txt create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/uuid-1.0.8.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/uuid-1.0.8.tm create mode 100644 src/testansi/palettes/AppleII.ans create mode 100644 src/testansi/palettes/Solarized.ans create mode 100644 src/testansi/palettes/Solarized_light.ans create mode 100644 src/testansi/palettes/VSCode.ans create mode 100644 src/testansi/palettes/Windows.ans create mode 100644 src/testansi/palettes/windows_legacy.ans create mode 100644 src/vfs/_vfscommon/modules/punk/rest-0.1.0.tm diff --git a/src/bootsupport/modules/include_modules.config b/src/bootsupport/modules/include_modules.config index 03952e18..fc436d8c 100644 --- a/src/bootsupport/modules/include_modules.config +++ b/src/bootsupport/modules/include_modules.config @@ -21,6 +21,8 @@ set bootsupport_modules [list\ src/vendormodules uuid\ src/vendormodules md5\ src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ modules punkcheck\ modules natsort\ modules punk::ansi\ diff --git a/src/bootsupport/modules/overtype-1.6.5.tm b/src/bootsupport/modules/overtype-1.6.5.tm index 38ce71c2..492341d6 100644 --- a/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/bootsupport/modules/overtype-1.6.5.tm @@ -233,7 +233,6 @@ tcl::namespace::eval overtype { -width \uFFEF\ -height \uFFEF\ -startcolumn 1\ - -wrap 0\ -ellipsis 0\ -ellipsistext $default_ellipsis_horizontal\ -ellipsiswhitespace 0\ @@ -243,11 +242,13 @@ tcl::namespace::eval overtype { -exposed1 \uFFFD\ -exposed2 \uFFFD\ -experimental 0\ - -cp437 1\ + -cp437 0\ -looplimit \uFFEF\ -crm_mode 0\ -reverse_mode 0\ -insert_mode 0\ + -wrap 0\ + -info 0\ -console {stdin stdout stderr}\ ] #expand_right is perhaps consistent with the idea of the page_size being allowed to grow horizontally.. @@ -263,14 +264,19 @@ tcl::namespace::eval overtype { #-ellipsis args not used if -wrap is true foreach {k v} $argsflags { switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace + -looplimit - -width - -height - -startcolumn - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -transparent - -exposed1 - -exposed2 - -experimental - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -console { + - -info - -console { tcl::dict::set opts $k $v } + -wrap - -autowrap_mode { + #temp alias -autowrap_mode for consistency with renderline + #todo - + tcl::dict::set opts -wrap $v + } default { error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" } @@ -280,10 +286,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- #review - expand_left for RTL text? set opt_expand_right [tcl::dict::get $opts -expand_right] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. set opt_width [tcl::dict::get $opts -width] set opt_height [tcl::dict::get $opts -height] @@ -298,50 +300,34 @@ tcl::namespace::eval overtype { set opt_crm_mode [tcl::dict::get $opts -crm_mode] set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] set opt_insert_mode [tcl::dict::get $opts -insert_mode] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_autowrap_mode [tcl::dict::get $opts -wrap] + #??? wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] + set opt_info [tcl::dict::get $opts -info] - #initial state for renderspace 'terminal' reset - set initial_state [dict create\ - -width $opt_width\ - -height $opt_height\ - -crm_mode $opt_crm_mode\ - -reverse_mode $opt_reverse_mode\ - -insert_mode $opt_insert_mode\ - -cp437 $opt_cp437\ - ] # ---------------------------- # -experimental dev flag to set flags etc # ---------------------------- set data_mode 0 - set info_mode 0 set edit_mode 0 set opt_experimental [tcl::dict::get $opts -experimental] foreach o $opt_experimental { switch -- $o { - old_mode { - set info_mode 1 - } data_mode { set data_mode 1 } - info_mode { - set info_mode 1 - } edit_mode { set edit_mode 1 } } } # ---------------------------- - - #modes - set insert_mode $opt_insert_mode ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode $opt_reverse_mode - set crm_mode $opt_crm_mode set underblock [tcl::string::map {\r\n \n} $underblock] @@ -367,6 +353,20 @@ tcl::namespace::eval overtype { set renderwidth $opt_width set renderheight $opt_height } + #initial state for renderspace 'terminal' reset + set initial_state [dict create\ + renderwidth $renderwidth\ + renderheight $renderheight\ + crm_mode $opt_crm_mode\ + reverse_mode $opt_reverse_mode\ + insert_mode $opt_insert_mode\ + autowrap_mode $opt_autowrap_mode\ + cp437 $opt_cp437\ + ] + #modes + #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l + #opt_startcolumn ?? - DECSLRM ? + set vtstate $initial_state # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -494,50 +494,55 @@ tcl::namespace::eval overtype { } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderargs [list -experimental $opt_experimental\ - -cp437 $opt_cp437\ - -info 1\ - -crm_mode $crm_mode\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ - -reverse_mode $reverse_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -transparent $opt_transparent\ - -width $renderwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -expand_right $opt_expand_right\ - -cursor_column $col\ - -cursor_row $row\ + set renderargs [list -experimental $opt_experimental\ + -cp437 $opt_cp437\ + -info 1\ + -crm_mode [tcl::dict::get $vtstate crm_mode]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ + -reverse_mode [tcl::dict::get $vtstate reverse_mode]\ + -cursor_restore_attributes $cursor_saved_attributes\ + -transparent $opt_transparent\ + -width [tcl::dict::get $vtstate renderwidth]\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -expand_right $opt_expand_right\ + -cursor_column $col\ + -cursor_row $row\ $undertext\ $overtext\ ] set LASTCALL $renderargs set rinfo [renderline {*}$renderargs] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - set reverse_mode [tcl::dict::get $rinfo reverse_mode] + set instruction [tcl::dict::get $rinfo instruction] + tcl::dict::set vtstate crm_mode [tcl::dict::get $rinfo crm_mode] + tcl::dict::set vtstate insert_mode [tcl::dict::get $rinfo insert_mode] + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + tcl::dict::set vtstate reverse_mode [tcl::dict::get $rinfo reverse_mode] #how to support reverse_mode in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? # - review - the answer is probably that we don't need to - it is set/reset only during application of overtext - set crm_mode [tcl::dict::get $rinfo crm_mode] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + + #Note carefully the difference betw overflow_right and unapplied. + #overflow_right may need to be included in next run before the unapplied data + #overflow_right most commonly has data when in insert_mode + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - if {0 && $reverse_mode} { + if {0 && [tcl::dict::get $vtstate reverse_mode]} { #test branch - todo - prune puts stderr "---->[ansistring VIEW $replay_codes_overlay] rendered: $rendered" #review @@ -593,19 +598,29 @@ tcl::namespace::eval overtype { #todo - handle potential insertion mode as above for cursor restore? #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { + set instruction_type [lindex $instruction 0] ;#some instructions have params + tcl::dict::incr instruction_stats $instruction_type + switch -- $instruction_type { reset { #reset the 'renderspace terminal' (not underlying terminal) set row 1 set col 1 - - + set vtstate [tcl::dict::merge $vtstate $initial_state] + #todo - clear screen } {} { + #end of supplied line input #lf included in data set row $post_render_row set col $post_render_col + if {![llength $unapplied_list]} { + if {$overflow_right ne ""} { + incr row + } + } else { + puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" + } + set col $opt_startcolumn } up { @@ -708,17 +723,18 @@ tcl::namespace::eval overtype { puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - set sub_info [overtype::renderline -info 1\ - -width $renderwidth\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ + set sub_info [overtype::renderline\ + -info 1\ + -width [tcl::dict::get $vtstate renderwidth]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ -expand_right [tcl::dict::get $opts -opt_expand_right]\ ""\ $overflow_right\ ] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + set foldline [tcl::dict::get $sub_info result] + tcl::dict::set vtstate insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -745,6 +761,53 @@ tcl::namespace::eval overtype { set col $post_render_col #overflow + unapplied? } + clear_and_move { + #e.g 2J + if {$post_render_row > [llength $outputlines]} { + set row [llength $outputlines] + } else { + set row $post_render_row + } + set col $post_render_col + set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant + set clearedlines [list] + foreach ln $outputlines { + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $renderwidth]\x1b\[0m + if 0 { + + set lineparts [punk::ansi::ta::split_codes $ln] + set numcells 0 + foreach {pt _code} $lineparts { + if {$pt ne ""} { + foreach grapheme [punk::char::grapheme_split $pt] { + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + incr numcells 1 + } + default { + if {$grapheme eq "\u0000"} { + incr numcells 1 + } else { + incr numcells [grapheme_width_cached $grapheme] + } + } + } + + } + } + } + #replays/resets each line + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $numcells]\x1b\[0m + } + } + set outputlines $clearedlines + #todo - determine background/default to be in effect - DECECM ? + puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" + #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] + + } lf_start { #raw newlines # ---------------------- @@ -780,27 +843,48 @@ tcl::namespace::eval overtype { append rendered $overflow_right set overflow_right "" } else { + #review - we should really make renderline do the work...? set overflow_width [punk::ansi::printing_length $overflow_right] if {$visualwidth + $overflow_width <= $renderwidth} { append rendered $overflow_right set overflow_right "" } else { - if {$visualwidth < $renderwidth} { - set graphemes [punk::char::grapheme_split $overflow_width] - set add "" - set addlen $visualwidth - set remaining_overflow $graphemes - foreach g $graphemes { - set w [overtype::grapheme_width_cached] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - lpop remaining_overflow - } else { - break - } + if {[tcl::dict::get $vtstate autowrap_mode]} { + set outputlines [linsert $outputlines $renderedrow $overflow_right] + set overflow_right "" + set row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } + + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break + } + } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code + } } - append rendered $add set overflow_right [join $remaining_overflow ""] } } @@ -829,14 +913,16 @@ tcl::namespace::eval overtype { #we may also have other control sequences that came after col 80 e.g cursor save if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" - #assertion - there should be no overflow.. - puts $lhs + #assertion - there should be no overflow.. + puts $lhs + } + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right set row $post_render_row #set row $renderedrow @@ -981,7 +1067,7 @@ tcl::namespace::eval overtype { #normal single-width grapheme overflow #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { incr row set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { @@ -1014,7 +1100,7 @@ tcl::namespace::eval overtype { #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { if {$renderwidth < 2} { #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character set idx 0 @@ -1055,6 +1141,14 @@ tcl::namespace::eval overtype { set row $post_render_row set col $post_render_col } + set_window_title { + set newtitle [lindex $instruction 1] + puts stderr "overtype::renderspace set_window_title [ansistring VIEW $newtitle] instruction '$instruction'" + # + } + reset_colour_palette { + puts stderr "overtype::renderspace instruction '$instruction' unimplemented" + } default { puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" } @@ -1062,7 +1156,7 @@ tcl::namespace::eval overtype { } - if {!$opt_expand_right && !$autowrap_mode} { + if {!$opt_expand_right && ![tcl::dict::get $vtstate autowrap_mode]} { #not allowed to overflow column or wrap therefore we get overflow data to truncate if {[tcl::dict::get $opts -ellipsis]} { set show_ellipsis 1 @@ -1160,11 +1254,22 @@ tcl::namespace::eval overtype { } set result [join $outputlines \n] - if {$info_mode} { + if {!$opt_info} { + return $result + } else { #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? #append result \n$instruction_stats\n + set inforesult [dict create\ + result $result\ + last_instruction $instruction\ + instruction_stats $instruction_stats\ + ] + if {$opt_info == 2} { + return [pdict -channel none inforesult] + } else { + return $inforesult + } } - return $result } #todo - left-right ellipsis ? @@ -2368,14 +2473,22 @@ tcl::namespace::eval overtype { } else { #linefeed occurred in middle or at end of text #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - if {$idx == -1 || $overflow_idx > $idx} { - #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + if {$insert_mode == 0} { + incr cursor_row + if {$idx == -1 || $overflow_idx > $idx} { + #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + } + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + incr cursor_row + #don't adjust the overflow_idx + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction lf_mid + break ;# could have overdata following the \n - don't keep processing } - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break } } @@ -3077,8 +3190,11 @@ tcl::namespace::eval overtype { set idx [expr {$cursor_column -1}] set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] incr idx_over + if {[llength $outcols]} { + priv::render_erasechar 0 [llength $outcols] + } priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move + set instruction clear_and_move break } 3 { @@ -3749,6 +3865,72 @@ tcl::namespace::eval overtype { } 7OSC - 8OSC { + # OSCs are terminated with ST of either \007 or \x1b\\ - we allow either whether code was 7 or 8 bit + if {[tcl::string::index $codenorm end] eq "\007"} { + set code_content [tcl::string::range $codenorm 4 end-1] ;#ST is \007 + } else { + set code_content [tcl::string::range $codenorm 4 end-2] ;#ST is \x1b\\ + } + set first_colon [tcl::string::first {;} $code_content] + if {$first_colon == -1} { + #there probably should always be a colon - but we'll try to make sense of it without + set osc_code $code_content ;#e.g \x1b\]104\007 vs \x1b\]104\;\007 + } else { + set osc_code [tcl::string::range $code_content 0 $first_colon-1] + } + switch -exact -- $osc_code { + 2 { + set newtitle [tcl::string::range $code_content 2 end] + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list set_window_title $newtitle] + break + } + 4 { + #OSC 4 - set colour palette + #can take multiple params + #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ + set params [tcl::string::range $code_content 1 end] + + puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 { + #OSC 10 through 17 - so called 'dynamic colours' + #can take multiple params - each successive parameter changes the next colour in the list + #- e.g if code started at 11 - next param is for 12. 17 takes only one param because there are no more + #10 change text foreground colour + #11 change text background colour + #12 change text cursor colour + #13 change mouse foreground colour + #14 change mouse background colour + #15 change tektronix foreground colour + #16 change tektronix background colour + #17 change highlight colour + set params [tcl::string::range $code_content 2 end] + + puts stderr "overtype::renderline OSC $osc_code set dynamic colours unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 18 { + #why is this not considered one of the dynamic colours above? + #https://www.xfree86.org/current/ctlseqs.html + #tektronix cursor color + puts stderr "overtype::renderline OSC 18 - set tektronix cursor color unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + 104 { + #reset colour palette + #we want to do it for the current rendering context only (vvt) - not just pass through to underlying vt + puts stderr "overtype::renderline OSC 104 reset colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list reset_colour_palette] + break + } + default { + puts stderr "overtype::renderline OSC - UNIMPLEMENTED. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } } default { @@ -3791,6 +3973,23 @@ tcl::namespace::eval overtype { #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW set in_overflow 1 } + set trailing_nulls 0 + foreach ch [lreverse $outcols] { + if {$ch eq "\u0000"} { + incr trailing_nulls + } else { + break + } + } + if {$trailing_nulls} { + set first_tail_null_posn [expr {[llength $outcols] - $trailing_nulls}] + } else { + set first_tail_null_posn -1 + } + + #puts stderr "first_tail_null_posn: $first_tail_null_posn" + #puts stderr "colview: [ansistring VIEW $outcols]" + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -3865,8 +4064,17 @@ tcl::namespace::eval overtype { } append outstring $gxleader append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] + if {$ch eq "\u0000"} { + if {$cp437_glyphs} { + #map all nulls including at tail to space + append outstring " " + } else { + if {$trailing_nulls && $i < $first_tail_null_posn} { + append outstring " " ;#map inner nulls to space + } else { + append outstring \u0000 + } + } } else { append outstring $ch } @@ -3874,12 +4082,20 @@ tcl::namespace::eval overtype { incr i } #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + #review - presence of overflow_right doesn't indicate line's trailing nulls should remain. + #The cells could have been erased? + #if {!$cp437_glyphs} { + # #if {![ansistring length $overflow_right]} { + # # set outstring [tcl::string::trimright $outstring "\u0000"] + # #} + # set outstring [tcl::string::trimright $outstring "\u0000"] + # set outstring [tcl::string::map {\u0000 " "} $outstring] + #} + + + #REVIEW + #set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + #set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] set replay_codes "" if {[llength $understacks] > 0} { @@ -4207,8 +4423,9 @@ tcl::namespace::eval overtype::priv { upvar outcols o upvar understacks ustacks upvar understacks_gx gxstacks + upvar replay_codes_overlay replay #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller of render_erasechar should do that mapping and only supply 1 or greater. if {![tcl::string::is integer -strict $count] || $count < 1} { error "render_erasechar count must be integer >= 1" } @@ -4223,8 +4440,9 @@ tcl::namespace::eval overtype::priv { } set num [expr {$end - $start + 1}] set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + #DECECM ??? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list $replay]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] ;# ??? review return } proc render_setchar {i c } { diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm index 267e680e..1a40c952 100644 --- a/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -413,6 +413,7 @@ tcl::namespace::eval punk::ansi { tcl::namespace::export\ {a?} {a+} a \ ansistring\ + ansiwrap\ convert*\ clear*\ cursor_*\ @@ -722,77 +723,11 @@ tcl::namespace::eval punk::ansi { #candidate for zig/c implementation? proc stripansi2 {text} { - - set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } - proc stripansi1 {text} { - - #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW - - variable escape_terminals ;#dict - variable ::punk::ansi::ta::standalone_code_map ;#map to empty string - - set text [convert_g0 $text] - - - set text [tcl::string::map $standalone_code_map $text] - #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm - #\x1b#3 double-height letters top half - #\x1b#4 double-height letters bottom half - #\x1b#5 single-width line - #\x1b#6 double-width line - #\x1b#8 dec test fill screen - - - #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. - - #Theoretically line endings can occur within an ST payload (review e.g title?) - #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) - - set inputlist [split $text ""] - set outputlist [list] - - set in_escapesequence 0 - #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) - - set i 0 - foreach u $inputlist { - set v [lindex $inputlist $i+1] - set uv ${u}${v} - if {$in_escapesequence eq "2b"} { - #2nd byte - done. - set in_escapesequence 0 - } elseif {$in_escapesequence != 0} { - set endseq [tcl::dict::get $escape_terminals $in_escapesequence] - if {$u in $endseq} { - set in_escapesequence 0 - } elseif {$uv in $endseq} { - set in_escapesequence 2b ;#flag next byte as last in sequence - } - } else { - #handle both 7-bit and 8-bit CSI and OSC - if {[regexp {^(?:\033\[|\u009b)} $uv]} { - set in_escapesequence CSI - } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { - set in_escapesequence OSC - } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { - set in_escapesequence DCS - } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { - #SOS,PM,APC - all terminated with ST - set in_escapesequence MISC - } else { - lappend outputlist $u - } - } - incr i - } - return [join $outputlist ""] - } - #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set @@ -2029,7 +1964,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map - set fcposn [lsearch $args "forcecol*"] + set fcposn [lsearch $args "force*"] set fc "" set opt_forcecolour 0 if {$fcposn >= 0} { @@ -2409,7 +2344,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >= 0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -2767,7 +2702,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >=0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -3199,6 +3134,16 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emitblock {row col textblock} { + #*** !doctools + #[call [fun move_emitblock] [arg row] [arg col] [arg textblock]] + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + return $commands + } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] @@ -3315,18 +3260,90 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) - #Alt screen buffer + #Alt screen buffer - smcup/rmcup ti/te + #Note \x1b\[?47h doesn't work at all in some terminals (e.g alacritty,cmd on windows and reportedly kitty) + #It is also reported to have 'deceptively similar' effects to 1049 -but to be 'subtly broken' on some terminals. + #see: https://xn--rpa.cc/irl/term.html + #1049 (introduced by xterm in 1998?) considered the more modern version? + #1047,1048,1049 xterm private modes are 'composite' control sequences as replacement for original 47 sequence + #1049 - includes save cursor,switch to alt screen, clear screen + #e.g ? (below example not known to be exactly what 1049 does - but it's something similar) + #SMCUP + # \x1b7 (save cursor) + # \x1b\[?47h (switch) + # \x1b\[2J (clear screen) + #RMCUP + # \x1b\[?47l (switch back) + # \x1b8 (restore cursor) + + #1047 - clear screen on the way out (ony if actually on alt screen) proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? - #\x1b\[?1049h ;#xterm - return \x1b\[?47h + return \x1b\[?1049h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] - #\x1b\[?1049l + return \x1b\[?1049l + } + #47 - less widely supported(?) doesn't restore cursor or clear alt screen + proc enable_alt_screen2 {} { + return \x1b\[?47h + } + proc disable_alt_screen2 {} { return \x1b\[?47l } + proc term_colour_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_color_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_colour_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_color_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_colour_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_color_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_colour_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_color_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_colour_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + proc term_color_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + #15,16 tektronix fg, tektronix bg ??? + proc term_colour_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + proc term_color_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + #18 tektronix cursor colour ??? + proc term_colour_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + proc term_color_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + #22 pointer shape - there are other methods too - not known to work on windows terminal based VTs - review + proc term_colour_reset {} { + return "\x1b\]104\;\x1b\\" + } + proc term_color_reset {} { + return "\x1b\]104\;\x1b\\" + } # -- --- --- proc erase_line {} { @@ -4398,6 +4415,10 @@ tcl::namespace::eval punk::ansi::ta { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } + proc detect_st_open {text} { + variable re_ST_open + expr {[regexp $re_ST_open $text]} + } #not in perl ta proc detect_csi {text} { @@ -4672,7 +4693,8 @@ tcl::namespace::eval punk::ansi::class { } oo::class create base_renderer { variable o_width - variable o_wrap o_overflow o_appendlines o_looplimit + variable o_autowrap_mode + variable o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered @@ -4690,25 +4712,33 @@ tcl::namespace::eval punk::ansi::class { } tcl::namespace::path $nspath #-- -- - if {[llength $args] < 2} { - error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + if {[llength $args] < 1} { + error {usage: ?-width ? ?-height ? ?-autowrap_mode [1|0]? ?-overflow [1|0]? from_ansistring} } - lassign [lrange $args end-1 end] from_ansistring to_ansistring + #lassign [lrange $args end-1 end] from_ansistring to_ansistring + set from_ansistring [lindex $args end] + set opts [tcl::dict::create\ - -width \uFFEF\ - -wrap 1\ - -overflow 0\ - -appendlines 1\ - -looplimit 15000\ - -experimental {}\ - -cursor_column 1\ - -cursor_row 1\ + -width \uFFEF\ + -height \uFFEF\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + -insert_mode 0\ + -autowrap_mode 1\ + -initial_ansistring ""\ ] - puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] foreach {k v} $argsflags { switch -- $k { - -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + -width - -height - + -overflow - -appendlines - -looplimit - -experimental - + -autowrap_mode - + -insert_mode - + -initial_ansistring { tcl::dict::set opts $k $v } default { @@ -4717,8 +4747,19 @@ tcl::namespace::eval punk::ansi::class { } } } + set initial_ansistring [tcl::dict::get $opts -initial_ansistring] + if {$initial_ansistring eq ""} { + set to_ansistring [punk::ansi::class::class_ansistring new ""] + } else { + #todo - verify obj vs raw string + set to_ansistring $initial_ansistring + } + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set o_width [tcl::dict::get $opts -width] - set o_wrap [tcl::dict::get $opts -wrap] + set o_height [tcl::dict::get $opts -height] + set o_autowrap_mode [tcl::dict::get $opts -autowrap_mode] + set o_insert_mode [tcl::dict::get $opts -insert_mode] set o_overflow [tcl::dict::get $opts -overflow] set o_appendlines [tcl::dict::get $opts -appendlines] set o_looplimit [tcl::dict::get $opts -looplimit] @@ -4736,6 +4777,9 @@ tcl::namespace::eval punk::ansi::class { method eval_in {script} { eval $script } + method renderbuf {} { + return $o_to_ansistring + } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column @@ -4755,6 +4799,12 @@ tcl::namespace::eval punk::ansi::class { set o_cursor_row $row } + #set/query cursor state + method cursor_state {args} { + lassign $args r c + return [dict create row [my cursor_row $r] column [my cursor_column $c]] + } + #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { @@ -4834,7 +4884,8 @@ tcl::namespace::eval punk::ansi::class { #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] } } - + #renderspace equivalent? channel based? + #todo $o_to_ansistring append $newtext return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] @@ -4960,11 +5011,10 @@ tcl::namespace::eval punk::ansi::class { if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" - } - if {$o_renderout ne ""} { - append result \n " render target ansistring: $o_renderout" - append result \n " render target has ansi : [$o_renderout has_ansi]" - append result \n " render target count : [$o_renderout count]" + set renderstring [$o_renderer renderbuf] + append result \n " render target ansistring: $renderstring" + append result \n " render target has ansi : [$renderstring has_ansi]" + append result \n " render target count : [$renderstring count]" } if {$verbose} { append result \n "ansisplits listing" @@ -5052,7 +5102,8 @@ tcl::namespace::eval punk::ansi::class { } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} - + #review + return [string length [join $o_ptlist ""]] } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { @@ -5119,10 +5170,9 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } - if {$o_renderout eq ""} { - #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? - set o_renderout [punk::ansi::class::class_ansistring new ""] - } + #if {$o_renderout eq ""} { + # set o_renderout [punk::ansi::class::class_ansistring new ""] + #} if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [tcl::namespace::tail $oinfo] @@ -5130,13 +5180,15 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } else { return $currenttype } } else { puts "creating first renderer" - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } } #--- progressive rendering buffer - another ansistring object @@ -5149,10 +5201,13 @@ tcl::namespace::eval punk::ansi::class { return $o_renderwidth } #re-render if needed? - + puts stderr "renderwidth todo? re-render?" set o_renderwidth $rw } + method renderer {} { + return $o_renderer + } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. @@ -5160,10 +5215,36 @@ tcl::namespace::eval punk::ansi::class { } method renderbuf {} { #get the underlying renderobj - if any - return $o_renderout ;#also class_ansistring + #return $o_renderout ;#also class_ansistring + return [$o_renderer renderbuf] } - method render {} { + method render {{maxgraphemes ""}} { #full render - return buffer ansistring + set do_render 1 + set grapheme_count 0 + set other_count 0 + if {$maxgraphemes eq ""} { + while {$do_render} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } else { + while {$do_render && $grapheme_count <= $maxgraphemes} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } + return [dict create graphemes $grapheme_count other $other_count] } method rendernext {} { #render next available pt/code chunk only - not to end of available input @@ -5198,6 +5279,13 @@ tcl::namespace::eval punk::ansi::class { #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + #like Tcl's append class_ansistring append returns the result directly - which for ANSI - can be inconvenient in the terminal + #class_ansistring append_string is a convenience wrapper to avoid returning the raw result + method append_string {args} { + my append {*}$args + return + } + #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { @@ -5369,7 +5457,7 @@ tcl::namespace::eval punk::ansi::class { } - #method append_and_render - append and render up to end of appended data at same time + #method append_and_render? - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { diff --git a/src/bootsupport/modules/punk/console-0.1.1.tm b/src/bootsupport/modules/punk/console-0.1.1.tm index 4dd7bd66..e367ce9e 100644 --- a/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/bootsupport/modules/punk/console-0.1.1.tm @@ -864,6 +864,7 @@ namespace eval punk::console { #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? #It will stop underlines/bold/reverse as well as SGR colours #what about ansi movement codes etc? + #we already have colour on|off which disables SGR codes in a+ etc as well as stderr/stdout channel transforms proc ansi {{onoff {}}} { variable ansi_wanted if {[string length $onoff]} { @@ -891,6 +892,7 @@ namespace eval punk::console { } } catch {punk::repl::reset_prompt} + puts stderr "::punk::console::ansi - use 'colour' command to turn SGR codes on/off" return [expr {$ansi_wanted}] } @@ -1295,10 +1297,10 @@ namespace eval punk::console { if {![catch {twapi::set_console_title $windowtitle} result]} { return $windowtitle } else { - error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + error "punk::console::local::titleset failed to set title - try punk::console::ansi::titleset" } } else { - error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + error "punk::console::local::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" } } proc titleget {} { @@ -1306,12 +1308,12 @@ namespace eval punk::console { if {![catch {twapi::get_console_title} result]} { return $result } else { - error "punk::console::titleset failed to set title - ensure twapi is available" + error "punk::console::local::titleset failed to set title - ensure twapi is available" } } else { #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title - # won't work on all platforms/terminals - but may be worth implementing - error "punk::console::titleget has no local mechanism to get the window title on this platform." + # won't work on all platforms/terminals - but may be worth implementing (for overtype::renderspace / frames etc) + error "punk::console::local::titleget has no local mechanism to get the window title on this platform." } } } @@ -1327,7 +1329,7 @@ namespace eval punk::console { if { $ansi_wanted <= 0} { punk::console::local::titleset $windowtitle } else { - tailcall ansi::titleset $windowtitle + ansi::titleset $windowtitle } } #no known pure-ansi solution @@ -1486,8 +1488,6 @@ namespace eval punk::console { namespace import ansi::insert_lines namespace import ansi::delete_lines - interp alias {} smcup {} ::punk::console::enable_alt_screen - interp alias {} rmcup {} ::punk::console::disable_alt_screen #experimental proc rhs_prompt {col text} { @@ -1881,12 +1881,6 @@ namespace eval punk::console { -interp alias {} colour {} punk::console::colour -interp alias {} ansi {} punk::console::ansi -interp alias {} color {} punk::console::colour -interp alias {} a+ {} punk::console::code_a+ -interp alias {} a {} punk::console::code_a -interp alias {} a? {} punk::console::code_a? diff --git a/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/bootsupport/modules/punk/ns-0.1.0.tm index 70f924d7..cf0bf70c 100644 --- a/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1561,7 +1561,8 @@ tcl::namespace::eval punk::ns { #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] - set origin [nseval $targetns [list ::namespace origin $name]] + set origin [nseval $targetns [list ::namespace origin $name]] + set resolved [nseval $targetns [list ::namespace which $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { @@ -1613,7 +1614,8 @@ tcl::namespace::eval punk::ns { } lappend argl $a } - list proc [nsjoin ${targetns} $name] $argl $body + #list proc [nsjoin ${targetns} $name] $argl $body + list proc $resolved $argl $body } diff --git a/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/bootsupport/modules/test/tomlish-1.1.1.tm new file mode 100644 index 0000000000000000000000000000000000000000..4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd GIT binary patch literal 24693 zcmch92Rzm7`@dvli;T=;@2zYh%Fc+g=P}PYIOkX?va|Q5vKmT+j1-w=kB~BwJ&J@R z;{Q1)$3xHae0zSc|EW)$b3XTVUH3KL_kCZV@Ih!VmMiwoARBuK802n`v;$qVf`A}S z7S>=m$QcfH2E(ku*8KQr_k&n$!7wn~0tvPTxgzXgwjijJwX>78q$JeB5o`ufGu!!K z-ynM%ke88_1=7k6#zH?x3Qo4Em|-WdaT zazbto+Ifq#u!ro8H`zx3VgYE<76vjxI794_+jspU#%Hpt_H6CE8J>lcE1}ZfKG%T3Ed~bH4qqr03YIFEC+lCr4fX)1;7pA z1ab$1tSn%_{9Iv3kcG1|+{qbk55xk*b02w>vjX2)?f4i1Y=s1Q$%7OQ^J&zr+xmb& zz%W}N0$JrvKr%8QLEvUM80iX!fxNEmH#TYr>a~BE!XFQl1sS0+|DBL1jwrtY;sucL zz;sbxqh6?6&(U9*52mb=%<+IsDUIKR8rp2S=c$ z53#a9fX$FlRFIh=z_w5@3`kB!s7wcBY@`#!$^BQ@?}acBN+u=;B;KP3L?w_-QOOj6 zn!-VPNGph>BoZnq2?3JfA*6mP9%5w%MUerq?g)xb_+Z*cu|f&4Jp^qw+Zkk6`kxdJ zijNw-FH(Sl10??KpxkBuS3R-r=VY=SS$i?^mv8?mU{GlgO=<|-0%i-8J#^`T`J;4? z7KeY~0RlYA1SG}c3M4I5(gDMf5KkZ#A%H53qJO|>s9B?4mOx-Q9{TM;$2s`PKE1K- zo5*kD0A(o~R|o`1nT{5yi8?sh1BJrM2?o@E(0(G@g#&OY6lD{Tq$FyJsIj2!GUQ|f z6z&}&76<@r0CNQ>{rm_Hc5$_bgAbkCwzr^iv>Cv1x2Op0Xnb25FYasm1u;RD9^ihk zH6LIL0M6_yK?ivJwtS#e{#ON{>d@{S4-oo!|2A`UVgEOA@DC8)w%m5^vT}k#fjW-> zZ3ia+nrsjNRzpqJ1Oy-?X6E1dFN+1-))kdFKu&NJ9<;JUIWFLDP9RI53<7Wo2y)cd z+rZHcMHZkgDXFnt9KlGd145x@%L2B51FpLZ3lNeh7>63VGiqBdXTV{QsLxSea5xbF zcmf4kSWsAmPf$VgJP{x#7@!!KwUavnQS#>k+Cx+kdZY2eL;?3KwzVT4)XhK$}AwTV>lGd+6gpf z5X>GG(7*KcJI6xF0pKizvpo_FlGjq%^(-_8{T1Z9ss_2E5&*DHfFoUD+i(j|BVamb z>usIz?LxC1gF3>T+(CcwU6hMDxgtSGxaYRwkcVprCE0f5>@HOfwGQeg)QjbSzJ8%) z&@b)$T^s|+4(x%1gP{O?1R;QujN*;lHN6GM9qt6!*wz({K!7+c;7EHL3&2!BQnWyo zQB+_8xEioLn(qQWgK!0d&K&MmJ7NQZJG%m4fcbC4-?Qo8YT(~c_`RZmBnpRX{I-t# zk`}NT$O!^?xU>D=N`o>S)6YfbLD7Dh+a@OkTl9HvLNZbc#s9z0A&tXP8-840ax6elH_4y(Fg4kLbzI4fx*CP+!)ABD44aI zH@9!y^S8Zl1pS)c{x$7Sy`ulzH?^~<%#KR@z7P{Usr$sXyJV-*uenXy9+4m?&uiE z_J7EH*YJSlc2u=Z4=8;`Xm3saUU~bHjVo+NqC@2Q+X&g+KiS(Tu>lIywwwH1Yy<0L z6vhHVWP2B8yI}3W7Z58NsQbOFhnw=+;aPwM$VEG<-Uqip2vXKslkZ>>H!#c|xD80CsMHE1OjKz^t;SqoJ3|30 zqDt|hdfMGp_(LlnAo;f_k;TRW03doPi1SBcx3h$tu9(Int%+k2$eAWKgG_Q6~MQ+fgefpB)- zPM3h@w{zC7I6|#kAiy3H|NkV}MS#u@Rlth@r~_aElpz6byWS40n@|VfZ~vlR;Pk)g9`iDDD2O*Q0Jgy|&j#)*z#w z9{&^n|CC}|oc;dyTZSll|H6U)Z^{IU*`CmPyNQ_9W+#pAllX51M$!5Ikd9Go4o}Iy z|K{-f|FYu>7-|Rg|0}f~P{n>8``@8XB%FUbJm8=}Zrs}{q|s74igf}51LGJ5#o2cC zJ5L{cKYA1c!-)_BLk0sE<7YWRIzfRgC_6qOe&CM)!WCF)AOuhm1nhvhS^>MN;QjCT zQOmupEr8My%uknw90vqRecV7>bukn7qvgLRcLfvI>L$L0^ zqHtPyEjOtyZ=;-w?mjN8v99TWuVU&1rSaB3SYGxsGb$9kiGRthp*Q)M*+0=(Ft9|P zE6Zsj-I*X5ukUTS2vtfqSKKH^ac#=F$&`#l$jORqGCA^MI*Gz?{}Ys_)_kh(D1CCq zaSvop`g+gYIlu7GIxFsvgVi@v6Ql-D$DvnNLvl{^RMFYsI|Nt)m{~o47AOpU0KSk-Y3_ zh)HM}!pjmAe=|!lnVyqvxHh5~zh!2srKw@%Jm=l11r94tch8*pr!+ok#>Nry?8~3+ zH-5~mvC{eAOsp)oH1y*36Oydt{0 z$Scl?dHAB9Ac>+6L!5zz$))L&1Z`xe`f2GYmppK)byj?`W)U8R>zx-Oq+JWJH}BY0 zi8#mGJzneU{Ir3&7i7l@I1Qp5{3#=`FfizVAUg$w-roip9LNjSz&5k9z^~u7gDY2G z&1vj3sqDb@t-6beAFQ?6lr;m1y^KqOu|wUwIG;Jlsm139F+X8V&>;O*?H)v;;*F)D z%@RTZXL#RwRU~zS3dY+nQ`{*8rKvdj!0?DGOS$j4+I;=z4?A6 zo2MLnvfxg|OrUG%MoNPIvuIxGKKQ+4@EQI|wP(Ca3q6wYQ{S5mNHMDxXj?R1s?I-C z-ZT*8;05Nk(@f)-(7`pBL*KcW6r~?)?~C?EW%K9BX~W zG_O>z`)%q;*xAr$osb|O;xLJkyK_QLjrzup%IYC_WqR3&el61LQ2|(|BOX$Go6Az> zzXwbCU>fsm&H1x{{#~JUX_7bC(LS-DB@+5NthAfmXDByHmWNNyL_d)WZqvNSXRTwA zo?)K+gRt%F_r$RQBr@5c@7Oozv%=WDmZiwPl+82~;SU1Jys}fC_jBr>ouw(kVIjZq zDk}6$9s8{KD{?vpc~R{bCN3Yd9a=+ZpRJy0X_;~M&EROL$hq5Eh2%7es_C#it`{}_ zs^}xj)vRTfD>W0u+IOb^#l9u@EOcxo2MOy?VM@(alm(H5rb#ND6$UKy`K327RJ z?G4T&2uy04Pa6K98a?4V@EA_4Qb_I~$R`qV-0@1fnQy_AG@{g*?tWyja=M;LdRU)0 z-<-`G$7TiWvDD51$@Q5cdtN$-bN9s@cny1OyEuY_a3w=Nt{MBB%<9=${}6iFFcF%# zAS+gzg1)?KpZys#sIsRilLAB1_2jwJ)me!a=FQ`#GVM~`>FQpM%x_+v-ZY*r(0&!c z9(v2iKR%u0#gAISXMtn0f;UGb>jD<(ZdvdLd`NN-PO_@ZhnbvQa-H*&%PLB_E8v1V>GQuaYhn1!F&g zfw2Hwihq#c%urw-Az%TA11nWCA$}D4Fo}_ItP>;+T9w6iefr&czmB?t!@q4xsp~fc` z;Wh^6zI{Jqtj&E}MfU+b^rP2Ce$^GZh?8zkw+7w}gzm^ z>y#oN6W0n7Dz2*jz1HGsXiD$cD_wlp)pRvWN%mfRuWhNnvU z!V7$g29IGf)$a-kv3L*Z@W{BbuDx;hIN9ieaoxH1BoCi6#s%jvuo~>R^XXV_>zi`e zSf}iBNQ@v!bBgE7GLNbgySo)m$8BO?DtRPITisN~<|Im6vv#4~?$Z@$!>g^iO|$3( zjHRmeLH9FziZCg|SGln82`mmMK?k@LF|hw!5pXbSYk%7zx3&Q9e$bg;qNl2^sy{k0 zI+53@SE#3@%IEv}MS4zpJ>jte%=)%hZP-iU;lAk`kQ?DKLdtoe@jQ`Wv+c7bGpGgJ(F_cAMtbha>Q(J^295Dc;w|OU0lorlfT`a0 z_2&-q7sja50W;L(ju~jL(c@_bg06 zeoQ4t?i~{rU=V45*SSAeAs{puwJW=ubhd3A5%U=44`d#9*5MC1geI86Ie1KDXYb~D zCAn4hvtJgL4T$nBD-cpm(6^^(d3~*Vej|gTi2t^4)}vYVkHtk-sCw-$-3<7qPEKLB z78aUd!^B=s^L8j|+*SI`_@qDkvC#=-jEJlGyqM<}bSDj@t!9UX`me zk5D{4E@y8aPt0X4_$c>r)C!&Hh(ZpglS`4Wr@3F#T)$xHjJ8|j49Aw0^G)^#4NF6O z4auKi^}%beD*#~eDIa2qn>W)Xmp6xw{{}O4*M_Id&*{J`Er@%GsXR*fU={3>+CD+0AvhtGO=+7H1rLmck((`5}O++ZkySnFJTLk7f5>ekgNY z{F(HbnlaUo+oF@YagMJQ$DToCpY9&yFmJ>LS0ob6Qxk%i+PN5kB=YKU{J!Ty?MUNrW0cA z@*XGd?%OIaBu8jMRc6_x;Rl8=k?FO!(eXMp7dBcI=KTk|huO(Eyv|k8K=Bs}sbj)81ZtZq(jFfr)D7tjkEy80EPvBWhkYL&yB|D}%((KzX({h1 zI3EiSN8$Ag%&_rSVuqQg?UY^f<}$CoU)L{vC&`f^a1;?orH%KAcq-H>wV?M}K}TX# zEG{8kXzBb%)-f-^^7RF#FVP};+zX2#MZIC zCw1aJsTLXE@kC|coUGY}^~m^aacJ>nriJ0>UaOHOKP8YLJ2SHUT}ynvx%qg;botNN z;OBp}j>sA*`Er8yzV}CKEmkXY)wFbC`IFqXbWx@fMYuWKk`B5{t04MY@O1c199uO5 z%qncMH}4Q9v9MD=mCs1RRow(ulc=IFc=!b{Z^(LT2@wrNe@n5HxN${L5<^<3Doeey z#q69PEaI`q7kKwD1aqd)%Dl?vGnB`~BxlQtVkSLZdpt%_o*7r6(Tu?KNMkdXjao#T zuFvD;$}(z3GUXoe%BnkuaxX9UPWsw-=2at~ZyAMfE5%q=Y z4tUv&$Ak!par0tn`b*Ic>&Hu4XI(C+#|m+`biwa+)9WxQp^cDic>S)?^J9G~Q5x=# z9{X&4frn+st4Y_^@My;}b5EvyNeDbs&6n(c&b`Rt!n#YLi7iLp;=Renw=SO-7L_`R zXeo?yAM;i{vAVR>-eZNGM61(oP?pPRqaF2z%}pC3Qn&Z}9Z%2H6C{dmZ|1 zYyBB+OW1@=a106FppBBy1^15c}TG1+s)SkdzhvJW~LtAAiXcu920fJfQ?2Z~2if zYRFoO{IagPy+{C=dsAmefn}hZ-R#UD0-Tu(9dER-r^U3uy)Yt$1v)l{5gwBzmxZN} zGC;V|^K9t*Rf}x;@1$Po;;Bx^MB@q8nwa_fjqwejl&JgQB1;*K8^g9GvI*4i$OZ;R z%$aHv)};Z888Z4xkr~aU$RKSb)5XtMjT_~MZb65i7$)gg_%G!TP&AXF!FLe|W zY$?a#yb(r$DL@q%vVX1%hz)ve8pA|GPpklJqxn|B#Q|_&11=5V`d{~PcFQ0Xb?9pb z0Ztx(rZnKVV+UU9wyTV(0@zY#bW}%^zgcB$LbXV52WRTEtLQ4LUubRJClNlzmNJ8k z%PO#`gn7wSDNT&YbAgQ`9=MzaD@%n#*zTGDeF0ONMxQu^~UvD3=A z=lZA`!51k+P%B6pxT>&K$3ib@ke7DWMMtZ?LFJh8ysl6}NDArE~{cg{8*f4=B(p3;vcpT=1IDSY&{)xdn&yIU1vmTw&^7*Y*N zn}lO!wG#qlq|TeZV9=k%*z^eR(}g8PGPp+;UFj0IfIH<@Jd-9=$a1p#!fl~w1?-e? zdPHGsOBV0up2tTW+fsg@NSiGOXuy733xDj3KeZsdZFpTyelEc2E&_`itfFv8Ladzn zv5w0%`dlZutt=nhj`xceexj$Ur*plq4U*6ue|wa&RCNYl)wxswN0C5_4b4zG&|@l< zS}{1OfOwohR4V2mof^coAX4m+=OOON%UGqG0@A?*=0aMQv^+Vj%bWP> z*&%9GRKtR#cPU|=oT*gm3=oCYW%WC6VK;P>L^?E|TYhQO6u~)PX3v4%C2b zaq)&WHxQ;F+B%%=mpGd)aq=r`v;vD$^oW(>`Et3HsD(TfkbxYq#l`;~G^flgfh8cS zfz-_cXuC)5iiL_HXt)q+pFn9feUztF3G*~7tfrrh55^Th9b1x{&Tw9y@Q6U^=BgJD zT}MD<{)DHqjKXpeW;lnaT27voS?fol!aF>E{NB^TpOEy@6opgt$^abO_0h#L@ zUvB6aEg3qGdgtramg2oOOt8+6S7>@emRUIc@h$kmX7t+1wb1J3?(Z6Ln?!q-V9UT) z$>}2`2m>_02Wa3w>`@_6=h{EBhDl@vajhJo;=1x$a17U=0i?W|EvuhL?-;h|hD@cK z96`cV+qZYp*OEmjeOgEclh%6Ovkm9pxeH2MTw{BCiZgIE@QkkCQT)ilCw?rVe2GaQ z?btgxiy_>}vSQyA?4AzxbslR!l`)F*edvP982*VF#<=IrScQcap>Q+Bi(JNB2S~m-ZW7)krSw(20q> zLcKg7Uxc2cyrdPXr`6sfprXgwq^u{%ud2?}}NPKZ0Y3_r{Q@=<(A}3qPpih(;^0+SK?Rdpl`tHiX;SKo5;x-7)ZCR?AXEkrb(sn4#m< z4;Peg4=KEnUBWpQX#6S%A@h{Ak?JnT^fK%Xvr?$5pj`~9nv8PqfW8XC^l3kT;^+GM z7ebUTRJ6#}(r&)_RH`IKyo49;X5E|k5LsVafO8+B5O*njJVwU3wRttO*RGUO(f1B( z_fiZTiKnMT?me=y%&PVp`i2LJH$ru(m#zJb&`?pti{6gu&M%WVU#?1;K6s?t{kEB~ zEK`sn9{laeOS6qk2}_^%BI=J%-ov;*9^7z7=Ez1TO`aQR{jB^wqIjLm4pYHapSLA6 z_>AP}W0cA!h+56d%HxOTIHG@PbCCOytUm4l6g zB3|R+%fTebH<5>ya{RaASl5>8zx6qvt^fKWaM7}k$X9u@_-9+K^k9#hXZ)y~&SEmp;g(oJvT=I86o_k7 znBvt;92L?G=#xE}@Mb+3^zM@GWkIQ8Bk_3J_l#0}cNv{b5K?w28Z5VXO6=Qwm1V5s z*Sl=s1EU^l6{DR-_GVWUonH4zSgxI{3XrT$$at{ERjux7mL)41PRmF$-|=m3vG6Wm zV8`J3&zdk2yP5^V-oaK{4GK6hQ#L=xbbUnrG2g54Td!F%i2W(M`u)>iB@YOck5a{& zgw8&_vEdgV(U5rl-Lf{;N`-b4)1@fS9^=L*jp5_|NDiK_L45Oc7p{7aTg=czPZbdk zCwg9t^AgT~OHd#8=2H~2U{km6;M>Hkg1|h+`>gcEvJ8GVE8h})l=fye+c4pO3hx@^ z^L|8GtrvD?$yIu0(HySy>Wt5~xM1*2d!*!(iBLcKpYC985c9X_T1U9C zFZybmS0t-yc}0dlEJ}WhH9Gsf$5(6##+mi?b`O^jv0I{kWkT%XZ)S6%YH1k@#^c{| zdQIM~ZuZNn!Hw*igKi2*EzXDQusJpnE0GahZkvJzrvvq?=@cKVKg<0zOQ#>!uNHUfUvv%z{!cIetuv-f!J zQW9ZKZ}H{FOqfLdB65|iDl=dlR$Mgi&c7gv6Qp|@0phI8u&d_lHnyKT;kW4Qp}J0c z2ir)sXfwtN5<_wuICHZ~xWm14IYwTMKUv}3?Gs(qaLw|-I;=U!U`m+!OuWisoWY0I zACH!9rq_oLYUAAwF^HtQ+$fJ9mby`G_BzW=+`m#GM$PM@V7aPL@x1YLoRktV$CKb6 zJ3SkW?MB8#x6{A&x3+&Q5+j+BT!7+?%Z6mzz4dBY9Tg$F6nsoSS10+D`dou^vRwlI zAZU<7uJJQ&q9hYWX&e?Ym8;R{;*nj1<(o+PzOd$kYZwaXx`E-rwlR za{ChKS`$i~d0ca&$k@2ck`K2!pW~Y7g<-$=&r6+>CC~45d1-&sEe$pH?sssy-0;eD>~SAs{zQVJHO{Zf-@FO)<(;3fUWiOO!G&|{8!PKY z3IVK|Ie3gl3-pcS*kvZjqr3|xo2=?QrLk^jRxe9IU`!oTUku$d^oR9rx*ASk=kjI` z#B5H#bPQk3q&o4CUYtVrOGitgF^0*r;3Wn%M0L^?g52)$;v_#|?bG?b%=L=yI?zv< zd8M_?`GY6o>bgVpYwtSty>&J{)}2r$(CY5eq!x|sJKje6yJnA}{kj2~5@i~xO zeVR2-CX%P#p9++Wi9LB;Ofz@_&+MVE zt0>4&2VdLz0UGD1|kVH0i z=?b<42#sRvxR)^4DaLAOysk*hF35evSiwrq#M2!_;=RHW@4{sEL-)~Bc<)t{ABOqf z1^PY{X+04JZWCeAKHjSdFQG^5;gV$&VG2tfZ(so^NU`x!=`LABZdqL-R)NpW*ckS> zFHtdaA!q21Bj0py8t#izaX|=80nFJq|P7 zRmLOD_o$yP(yDcYO$Ux`+{)Oe5Ybgb0Ic- zB?1S0WAg}@Jx%N!uilTj0Xp&r7ghq=ykyR{eohQA9X6}6(!E35*GPo5bfmbpuDjK& z;!@z&h!Kn-@ig;n>t>(Kbp8?U4MnSLTS+14?al|J1$?mrIJH4ZlO1C*o0C`X(Tq_p z9k;|V%W#HcHDx{&TE~>qSb3?G{pPbt%7s1MXAp52*e`#BDF7+{IkaP9#Yp#~l-e594nVJ*tJ+pbVE{4=?7fQ3DK z&uQ)Dv8e9I4`cpFD_sfmGdfXn(_G`VcyrC%2)&To;jivbOw}FTY-fz0AsH>Q}9}-IKW`LE?9z|L!e|?}CZ^ZjwgoZ*^A3^SfA)#59cYJ$7$N z=;GG%u4ng;Exs6k-C3|eeg4*mM*X9+6PIcAnMo2msH~m{sv)y{z3CXIZHEg!QL$8n za6D&t$-eSff_X|_HzZ3A*p-;nzW5dT<=$frrA{e?q)G7_PdOQ68*@fs|_E( zSnxwK*wXa%UzBvuHaYJSaQ9damXSQ+vmxAf3>5d z{fmg&;6~uF6btSk;+7K}foXkoo`H{@tJ-gOXCd7an7D>B?^ zh6v@xe>KjaD$bkg(9d1)KmMa^?#yRT3UjlyPsG<;?aPFk8o&0wTX^X#tCZ_AXrE~+ z&%sh5=ydygB(l4Q3%Y-gYZw0U9b0Z#;^{|}!x3kXA{p!-GTO1x|Wwd4rA2;bnhU(Qu zdXkUNHA&A0mKR01@=_%uKeQNRW|juMqp{4Sozpt5`dWHE`Syu!cxK*_nHm!X)5S_L zIefiF?EKVPMZ=c76yeQjdWhFjXWMZFdX}~#7@4X26R^sv%5$2xY3k*WK`dBhr9*ph zWns2A?b40uSLMjXD*lz0&#c2;%t@YkP0aq86t&pn5~ z`LVCnu$LG2+m+mKjA^91?Y1Rh#Td)awj^U!S>NwuCcv*LP=LqFn3mraDr2$~Tv_Mu zx&FX35}*Eb^&CCJRkXe`CtPlWZ)2Av9D!5$_gpB%`48<>yNO5uJt1x9|4*qvRuTuY zQd#GgV0oaIFaO7yn&Z>K6P)f7@8%!34Z%fgI7_>4NESIZK@m(_KWtdv#?XstyqkxK zrx&>h(?ks4>U=2c|0*o$#O%R4FEp2ugHr=oLs8)F%Xzs?9)b@3Snq`#-YRCA5j(&LVW zG*cuZMpk)!O*@v`j!ojOd$m{!wRIcE`Q;DJ{kSo)%1Z@_H~;i9{Cw)m1*T(HJ>xa+ ziZd5Ixm(`Ie~-CK-=1WP?;iQGFC_JN5D9svN%j)_6@J=zN7LhRLdu)b$C@bK`dW#a zSv!v#N;Bjxww5~5Oo}ahFr;7s9(9dAiudi#jIUFeMXvs7k5978S4yxl4g6S2EKZRp zg6X72&LPI*H4MNG0mbugyY3f{vi5ab=!5XB-)^buwYdugNl;)`7k3BJ#%8pZL_cm< zqD*}KG=4;ulE6T{X%Lbx#zF;>=GVsjFz}#8^H@z4qDMbS5U$H23b^Lb|QdhEvkM zD6S8Ba04uTLbx89;x@xBsedHkBJ-qqf+-={ana72EEc<9yi}59OSK!_F0UF>r|8&5GFf_5p;@5o_~f4TCk_ZSY6Xs+dS2 zO|^z>g)pzk1bqCVK4xY-!nr4EZ?iX=Vj5}c#x3LXoXY*I)6)ZMl@}F}|13$XfBSjZ|2U-g#MV}J5iafNMCw;EsdfD`LpbgE< z`hngXN_46Z!8QWO)hjDcU2hGg4)@9$*qP#lx6CAV#`|qah@VRNXl@vI|HnIPdiBl4 zBz>PiTBb|p$MM;d-?MRsbW6XCWG&{CyM1at@=>Mh`N1pI!XA@Ym6sS899nxXvyhed zm}L~z$zT8QEE`hBxxO;i&@cmL>=J%ZeBAiD03$yevlrpLPiE{nDGIko0&p?NNh7jq z@^YGKuF^1nHl@%=mdBBoBf_5S%fDZ8#Kb#+Lue^M-^i0ex;lu7zfgCyBk814nnP+B zt?%8Hn=#sDM!k8x3b^wI9<*0JIiDSSA~fxt%eFX>YSe93T;^@@j#!WiWP`W09n96~nT>j(B3mFEnlw4Bwfr+K+E)s~z)*&I*Y^c zy2qjf%bxnZv8nKgYjrT7?`N9p%$#>9W{u8aFh4BOuJhUP@6W}(;46Rxc{Hut1B9cJfT{isGq-+4X7uKN!OGQ9HDaNRjMJvs8C&ar&U^uIA` z&laC4INaV3wKI8^kJL``kXl~%9P>P9SaI5jhsvFyUHrQ3)yZ6e$8t$^^X&KP);r#; z6JF1GfS(X-68nNfn@ODT)O*&8&XX(Y4`RO8N|_V#XhbW=%2<)#!jZ6eF?{Mp)hri2 ziSl$n%Y5Kv7bal_(%TKy4)!j|2BjtSp9}|T<;gK+XBR)5XL;E}6z zD}FMR@!NT^_mtT=63o?>F-l#TZ_`Q^PpqB#!RGnudV+AW8-Ar`Z1eQ-sv4TE*3B}A zPMhuB)n#0Um!XDr$6cf~)=i0=*_yKro`jXFjoxf+v{U;eOwsfPQsUF#R1vEmzgZY$ zctpE9z0a{kLOTqzS@rVC)C9^F?O~jP0Vp{!{rCBb6oyW7`UU~Il?S$~GdCAskmWv( zfi}QzIAj5jqFS8KG_S$?rnLKUH2R->dH)ElsZ%C ze`(QB=Y*7<)k9U#*#^UwSVfx~#fk z@&!)829BMGv>v2*+v@3Gi)@b#EP)W;(P=v+S|`xbZlcjP(eB?IE!J>VzI46!^NZZ} z_w2MM!s*iQsfSP%ico<*)_$sl|IiN<4QZ`7eK&rQ`1<#Zo|?~8(0k?YXKnDb&h#YT znHjj2xN=-!ak&he=v`#1e0x?wzMsf*X~zp(?OpD(tKO-EeL7^6+6Cjp5#AT&Rr=l5 z$6F3uJT_hdscu$Koi{|IXsD$63%h9(+_| zj~@m`81o-<{L6zI=sXWT6@kt(p5;IAM0-X8o#Vj=9nd+7u>Lv6zdU`g_t*nE`-6`= zptFDYf5(3R!w={p9DGgzT?99vjqrbU-d)f7%To*JJP)?(qw^eN|6`u$9o6Uz4>ql% zGt}h#V}|=$*wHy2>=j4nr~<4}|4^+5I>*tu9&EWq=jy@x$6V1GaMAf4Y->g57XX|| z{oyQkn_SU(9oF2s$8Gk{c^%&Biq86A7waBl42-M%e>}~<^t7V$JlMjD&U5L{c^+(N zMQ3}kc@>?lGqBJ5hZgjKCj9 NB4FDF^%OJ4{{tdaZ=nDH literal 0 HcmV?d00001 diff --git a/src/bootsupport/modules/textblock-0.1.1.tm b/src/bootsupport/modules/textblock-0.1.1.tm index 88fdc3fd..96fb263d 100644 --- a/src/bootsupport/modules/textblock-0.1.1.tm +++ b/src/bootsupport/modules/textblock-0.1.1.tm @@ -4041,6 +4041,9 @@ tcl::namespace::eval textblock { (default 0 if no existing -table supplied)" -table -default "" -type string -help "existing table object to use" -headers -default "" -help "list of header values. Must match number of columns" + -show_header -default "" -help "Whether to show a header row. + Leave as empty string for unspecified/automatic, + in which case it will display only if -headers list was supplied." -action -default "append" -choices {append replace} -help "row insertion method if existing -table is supplied if append is chosen the new values will always start at the first column" -columns -default "" -type integer -help "Number of table columns @@ -4101,11 +4104,34 @@ tcl::namespace::eval textblock { } } else { set is_new_table 1 + set headers {} + if {[tcl::dict::get $opts -headers] ne ""} { + set headers [dict get $opts -headers] + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 1 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } else { + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 0 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } + if {[tcl::string::is integer -strict $opt_columns]} { set cols $opt_columns + if {[llength $headers] && $cols != [llength $headers]} { + error "list_as_table number of columns ($cols) doesn't match supplied number of headers ([llength $headers])" + } } else { #review - set cols 2 ;#seems a reasonable default + if {[llength $headers]} { + set cols [llength $headers] + } else { + set cols 2 ;#seems a reasonable default + } } #defaults for new table only if {[tcl::dict::get $opts -frametype] eq ""} { @@ -4123,15 +4149,6 @@ tcl::namespace::eval textblock { if {[tcl::dict::get $opts -show_hseps] eq ""} { tcl::dict::set opts -show_hseps 0 } - set headers {} - set show_header 0 - if {[tcl::dict::get $opts -headers] ne ""} { - set headers [dict get $opts -headers] - if {[llength $headers] ne $cols} { - error "list_as_table number of headers ([llength $headers]) must match number of columns ($cols)" - } - set show_header 1 - } set t [textblock::class::table new\ -show_header $show_header\ diff --git a/src/bootsupport/modules/tomlish-1.1.1.tm b/src/bootsupport/modules/tomlish-1.1.1.tm new file mode 100644 index 00000000..d85d4416 --- /dev/null +++ b/src/bootsupport/modules/tomlish-1.1.1.tm @@ -0,0 +1,3357 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.1] +#[copyright "2024"] +#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEYVAL = bare key and value + #QKEYVAL = quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for get_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "Failed to find value element in KEYVAL. '$keyval_element'" + } + if {$found_value > 1} { + error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITSTRING { + #REVIEW + set result [list type $type value $value] + } + TABLE - ITABLE - ARRAY - MULTISTRING { + #jmn2024 - added ITABLE - review + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } + default { + error "Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + #get_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # get_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + proc get_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEYVAL - QKEYVAL { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set key_hierarchy [list] + set key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [::string index $rawseg 0] + set c2 [::string index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [::string range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] + #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend key_hierarchy $seg + lappend key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a keyval/qkeyval + + set testkey [join $key_hierarchy_raw .] + set testkey_length [llength $key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #dots within table segments might seem like an 'edge case' + # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. + + #VVV the test below is wrong VVV! + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset + error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + } + } + + } + + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + TABLE - ARRAY - MULTISTRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] + } + WS - SEP { + #ignore whitespace and commas + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::get_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [string tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![string is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [list BOOL true] + } else { + return [list BOOL false] + } + } + } + + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 3} { + if {[lindex $t 0] ne "KEYVAL"} { + error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + } + lappend pairs $t + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEYVAL $n [list STRING $v]] + } else { + error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + } + } + return [list TABLE $name $pairs] + } + + + #the tomlish root is basically a nameless table representing the root of the document + proc root {args} { + set table [::tomlish::encode::table TOMLISH {*}$args] + set result [lindex $table 2] ;#Take only the key-value pair list + } + + #WS = whitepace, US = underscore + proc tomlish {list {context ""}} { + if {![tcl::string::is list $list]} { + error "Supplied 'tomlish' is not a valid Tcl list. Expected a tagged list (parsed Toml)" + } + set toml "" ;#result string + + foreach item $list { + set tag [lindex $item 0] + #puts "tomlish::encode::tomlish processing item '$item', tag '$tag'" + #during recursion, some tags require different error checking in different contexts. + set nextcontext $tag ; + + + #Handle invalid tag nestings + switch -- $context { + QKEYVAL - + KEYVAL { + if {$tag in {KEYVAL QKEYVAL}} { + error "Invalid tag '$tag' encountered within '$context'" + } + } + MULTISTRING { + #explicitly list the valid child tags + if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { + error "Invalid tag '$tag' encountered within a MULTISTRING" + } + } + default { + #no context, or no defined nesting error for this context + } + } + + switch -- $tag { + TOMLISH { + #optional root tag. Ignore. + } + QKEYVAL - + KEYVAL { + if {$tag eq "KEYVAL"} { + append toml [lindex $item 1] ;#Key + } else { + append toml \"[lindex $item 1]\" ;#Quoted Key + } + foreach part [lrange $item 2 end] { + if {$part eq "="} { + append toml "=" + } else { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + } + } + TABLE { + append toml "\[[lindex $item 1]\]" ;#table name + foreach part [lrange $item 2 end] { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + + } + ITABLE { + #inline table - e.g within array or on RHS of keyval/qkeyval + set data "" + foreach part [lrange $item 1 end] { + append data [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\{$data\}" + } + ARRAY { + + set arraystr "" + foreach part [lrange $item 1 end] { + append arraystr [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\[$arraystr\]" + } + WS { + append toml [lindex $item 1] + } + SEP { + append toml "," + } + NEWLINE { + set chartype [lindex $item 1] + if {$chartype eq "lf"} { + append toml \n + } elseif {$chartype eq "crlf"} { + append toml \r\n + } else { + error "Unrecognized newline type '$chartype'" + } + } + CONT { + #line continuation character "\" + append toml "\\" + } + STRING { + #simple double quoted strings only + # + return \"[lindex $item 1]\" + } + STRINGPART { + return [lindex $item 1] + } + MULTISTRING { + #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts + set multistring "" ;#variable to build up the string + foreach part [lrange $item 1 end] { + append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\"\"\"$multistring\"\"\"" + } + LITSTRING { + #Single Quoted string(literal string) + append toml '[lindex $item 1]' + } + MULTILITSTRING { + #review - multilitstring can be handled as a single string? + set litstring "" + foreach part [lrange $item 1 end] { + append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$litstring''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + #Note that we deliberately don't check certain things such as duplicate table declarations here. + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { + #*** !doctools + #[call [fun toml] [arg s]] + #[para] return a Tcl list of tomlish tokens + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "key-space" + ::tomlish::parse::spacestack push {space key-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + ##### + set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] + ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + + set state $nextstate + if {$state eq "err"} { + error "State error - aborting parse. [tomlish::parse::report_line]" + } + + if {$last_space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + switch -exact -- $tokenType { + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + puts stderr "endinlinetable" + } + endmultiquote { + puts stderr "endmultiquote for last_space_action 'pop'" + } + default { + error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + + } elseif {$last_space_action eq "push"} { + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + switch -exact -- $tokenType { + barekey { + set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + } + quotedkey - itablequotedkey { + set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + puts stderr "push trigger tokenType startmultiquote (todo)" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + #JMN ??? + #set next_tokenType_known 1 + #::tomlish::parse::set_tokenType "multistring" + #set tok "" + } + default { + error "push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "did not expect 'tablename/tablearrayname' without space level change" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "decode::toml error. did not expect startlinetable without space level change" + } + startquote { + switch -exact -- $nextstate { + string { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itablequotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "startquote switch case not implemented for nextstate: $nextstate" + } + } + } + startmultiquote { + #review + puts stderr "no space level change - got startmultiquote" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped-value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + } + lappend v($nest) [list $tag $tok] + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + + + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg [list " " \t]] + } + return [join $trimmed_segments .] + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set i 0 + set sLen [::string length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {} {$i < $sLen} {} { + + if {$i > 0} { + set lastChar [::string index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $tablename $i] + incr i + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[::string trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [::string trim $seg] + if {[::string index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [::string trim $seg [list " " \t]] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename'" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[::string match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[::string match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [::string length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [::string index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[::string length $buffer4] < 4} { + append buffer4 $c + } + if {[::string length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[::string length $buffer8] < 8} { + append buffer8 $c + } + if {[::string length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [string map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [::string index $rawkey 0] + set c2 [::string index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [::string range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[::string length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[::string length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[::string last - $str] > 0} { + return 0 + } + if {[::string last + $str] > 0} { + return 0 + } + set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![string is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[::string length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[::string length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [::string map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![::string is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_datetime {str} { + #e.g 1979-05-27T00:32:00-07:00 + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[::string length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + if {[catch {clock scan $datepart} err]} { + puts stderr "tcl clock scan failed err:'$err'" + return 0 + } + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # key-space, curly-space, array-space + # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # + # notes: + # key-space i + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push command and the name of the space to push on the stack. + # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + + #test + variable stateMatrix + set stateMatrix [dict create] + + dict set stateMatrix\ + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + + + dict set stateMatrix\ + curly-space {\ + whitespace "curly-space"\ + newline "curly-space"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + newline "err"\ + eof "err"\ + untyped-value "samespace"\ + startquote "string"\ + startmultiquote {pushspace "multistring-space"}\ + startinlinetable {pushspace curly-space}\ + comment "err"\ + comma "err"\ + startarray {pushspace array-space}\ + } + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + eof "err"\ + untyped-value "samespace"\ + startarray {pushspace "array-space"}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "array-space"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped-value "samespace"\ + startarray {pushspace array-space}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "err"\ + } + + + dict set stateMatrix\ + itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + #dict set stateMatrix\ + # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablekeyval-space {} + dict set stateMatrix\ + itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + + + dict set stateMatrix\ + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + dict set stateMatrix\ + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + dict set stateMatrix\ + keyval-space {} + + + + dict set stateMatrix\ + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + dict set stateMatrix\ + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + dict set stateMatrix\ + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + dict set stateMatrix\ + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + dict set stateMatrix\ + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + dict set stateMatrix\ + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + dict set stateMatrix\ + baretablename {whitespace "NA" newline "err" equal "value-expected"} + dict set stateMatrix\ + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + dict set stateMatrix\ + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + dict set stateMatrix\ + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + dict set stateMatrix\ + end {} + + #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push + variable stateMatrix_orig { + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} + value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} + array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} + array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + keyval-space {} + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + end {} + } + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set action [lindex $transition_to 0] + switch -exact -- $action { + pushspace - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + puts stdout "push_trigger_tokens: $push_trigger_tokens" + #!todo - hard code once stateMatrix finalised? + + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + variable spacePopTransitions { + array-space array-syntax + curly-space curly-syntax + keyval-space keytail + itablekeyval-space itablevaltail + } + variable spacePushTransitions { + keyval-space keyval-syntax + itablekeyval-space itablekeyval-syntax + array-space array-space + curly-space curly-space + key-space tablename + } + + + variable state_list + + namespace export tomlish toml + namespace ensemble create + + proc getNextState {tokentype currentstate} { + variable nest + variable v + + variable spacePopTransitions + variable spacePushTransitions + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + popspace { + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + samespace { + #note the same data as popspace (spacePopTransitions) is used here. + set parent [spacestack peek] + ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" + lassign $parent type target + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (key-space) + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list pushspace [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" + set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + } + pushspace { + set target [lindex $transition_to 1] + spacestack push [list space $target] + set last_space_action "push" + set last_space_type "space" + + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $target] + ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + default { + set result $transition_to + } + } + } else { + set result "nostate-err" + + } + lappend state_list $result + return $result + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [::string length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + #return a list of 0 1 or 2 tokens + #tomlish::parse::tok + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + set resultlist [list] + + variable tokenType + variable tokenType_list + + + variable endToken + set sLen [::string length $s] + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + variable token_waiting + if {[dict size $token_waiting]} { + set tokenType [dict get $token_waiting type] + set tok [dict get $token_waiting tok] + dict unset token_waiting type + dict unset token_waiting tok + return 1 + } + #------------------------------ + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [string index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [string index $s $i] + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do'returns'inside the loop + + set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #dict set token_waiting type comment + #dict set token_waiting tok "" + incr i -1 ;#leave comment for next run + return 1 + } + untyped-value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped-value. + incr i -1 + return 1 + } + default { + #quotedkey, string, multistring + append tok $c + } + } + } else { + #$slash_active not relevant when no tokenType + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + lc { + set multi_dquote "" ;#!! + #test jmn2024 + #left curly brace + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + error "unexpected tablename problem" + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + if {$slash_active} { + set tok "\\\{" + } else { + set tok "\{" + } + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + rc { + set multi_dquote "" ;#!! + #right curly brace + try { + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endinlinetable + dict set token_waiting tok "" + return 1 + } + tablearrayname { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + itablevaltail { + + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + curly-syntax - curly-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itablevaltail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + default { + #JMN2024b keytail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + lb { + set multi_dquote "" ;#!! + #left square bracket + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + key-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + rb { + set multi_dquote "" ;#!! + #right square bracket + try { + + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } + tablearraynames { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - litstring - multilitstring - comment - tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + barekey { + error "Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + if {$state eq "multistring-space"} { + set slash_active 1 + } else { + error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + dq { + #double quote + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length in 'startquotesequence'" + } + } + endquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "endmultiquote" + return 1 + } else { + error "unexpected token length in 'endquotesequence'" + } + } + string { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + stringpart { + #sub element of multistring + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + value-expected { + if {$multi_dquote eq "\"\""} { + dict set token_waiting type startmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + #end whitespace token and reprocess + incr i -1 + return 1 + #append multi_dquote "\"" + } + } + default { + dict set token_waiting type startquote + dict set token_waiting tok "\"" + return 1 + } + } + } + comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + tablename - tablearrayname { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + key-space { + set tokenType startquote + set tok $c + return 1 + } + curly-space { + set tokenType startquote + set tok $c + return 1 + } + tablename - tablearrayname { + set_tokenType $state + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey { + #for these tokenTypes an = is just data. + append tok $c + } + stringpart { + append tok $dquotes$c + } + whitespace { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + barekey { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + default { + error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok ${dquotes}= + } + default { + set_tokenType equal + set tok = + return 1 + } + } + } + } + cr { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + stringpart { + append tok $dquotes$c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType newline + set tok cr + } + } + lf { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \n newline + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + newline { + #this lf is the trailing part of a crlf + append tok lf + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + } else { + set had_slash $slash_active + set slash_active 0 + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + set_tokenType newline + set tok lf + return 1 + } + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey - tablename - tablearrayname { + append tok $c + } + stringpart { + append tok $dquotes$c + } + default { + dict set token_waiting type comma + dict set token_waiting tok "," + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "," + } + multiliteral-space { + set_tokenType literalpart + set tok "," + } + default { + set_tokenType comma + set tok "," + return 1 + } + } + } + } + . { + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment - quotedkey - untyped-value { + append tok $c + } + baretablename - tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #we need to transition the barekey to become a structured table name ??? review + switch_tokenType tablename + incr i -1 + + #error "barekey period unimplemented" + } + default { + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #dict set token_waiting type period + #dict set token_waiting tok "." + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "." + } + multiliteral-space { + set_tokenType literalpart + set tok "." + } + default { + set_tokenType untyped-value + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[::string length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + untyped-value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $c + } + quotedkey - string { + if {$had_slash} { + append tok "\\" + } + #if {$dquotes eq "\""} { + #} + append tok $c + } + whitespace { + append tok $c + } + stringpart { + if {$had_slash} { + #REVIEW + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + #keeping WS separate allows easier processing of CONT stripping + append tok $dquotes + incr i -1 + return 1 + } + } + starttablename { + incr i -1 + return 1 + } + starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + if {$had_slash} { + set tok "\\$c" + } else { + set tok $c + } + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return + } + set_tokenType "whitespace" + append tok $c + } + } + default { + if {$had_slash} { + error "unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set token_waiting type whitespace + #set token_waiting tok $c + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + quotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + append tok $dquotes$c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + endquotesequence { + puts stderr "endquotesequence: $tok" + } + whitespace { + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + key-space - curly-space - curly-syntax { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + tablename { + set_tokenType "tablename" + set tok $c + } + tablearrayname { + set_tokenType "tablearrayname" + set tok $c + } + default { + set_tokenType "untyped-value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[::string length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + if {$tokenType eq "startquotesequence"} { + set toklen [::string length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + eror "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #dict set token_waiting type "string" + #dict set token_waiting tok "" + return 1 + } + } + dict set token_waiting type "eof" + dict set token_waiting tok "eof" + return 1 + } else { + ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[::string tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/aliascore-999999.0a1.0.tm b/src/modules/punk/aliascore-999999.0a1.0.tm index 90a31f7c..1019fe4a 100644 --- a/src/modules/punk/aliascore-999999.0a1.0.tm +++ b/src/modules/punk/aliascore-999999.0a1.0.tm @@ -102,6 +102,8 @@ tcl::namespace::eval punk::aliascore { variable aliases #use absolute ns ie must be prefixed with :: #single element commands are imported if source command already exists, otherwise aliased. multi element commands are aliased + + #functions must be in export list of their source namespace set aliases [tcl::dict::create\ tstr ::punk::lib::tstr\ list_as_lines ::punk::lib::list_as_lines\ @@ -109,11 +111,23 @@ tcl::namespace::eval punk::aliascore { linelist ::punk::lib::linelist\ linesort ::punk::lib::linesort\ pdict ::punk::lib::pdict\ - plist [list ::punk::lib::pdict -roottype list]\ - showlist [list ::punk::lib::showdict -roottype list]\ + plist {::punk::lib::pdict -roottype list}\ + showlist {::punk::lib::showdict -roottype list}\ showdict ::punk::lib::showdict\ ansistrip ::punk::ansi::ansistrip\ stripansi ::punk::ansi::ansistrip\ + ansiwrap ::punk::ansi::ansiwrap\ + colour ::punk::console::colour\ + ansi ::punk::console::ansi\ + color ::punk::console::colour\ + a+ ::punk::console::code_a+\ + A+ {::punk::console::code_a+ forcecolour}\ + a ::punk::console::code_a\ + A {::punk::console::code_a forcecolour}\ + a? ::punk::console::code_a?\ + A? {::punk::console::code_a? forcecolor}\ + smcup ::punk::console::enable_alt_screen\ + rmcup ::punk::console::disable_alt_screen\ ] #*** !doctools diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 6ef15c9e..0b4db903 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -413,6 +413,7 @@ tcl::namespace::eval punk::ansi { tcl::namespace::export\ {a?} {a+} a \ ansistring\ + ansiwrap\ convert*\ clear*\ cursor_*\ @@ -722,77 +723,11 @@ tcl::namespace::eval punk::ansi { #candidate for zig/c implementation? proc stripansi2 {text} { - - set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } - proc stripansi1 {text} { - - #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW - - variable escape_terminals ;#dict - variable ::punk::ansi::ta::standalone_code_map ;#map to empty string - - set text [convert_g0 $text] - - - set text [tcl::string::map $standalone_code_map $text] - #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm - #\x1b#3 double-height letters top half - #\x1b#4 double-height letters bottom half - #\x1b#5 single-width line - #\x1b#6 double-width line - #\x1b#8 dec test fill screen - - - #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. - - #Theoretically line endings can occur within an ST payload (review e.g title?) - #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) - - set inputlist [split $text ""] - set outputlist [list] - - set in_escapesequence 0 - #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) - - set i 0 - foreach u $inputlist { - set v [lindex $inputlist $i+1] - set uv ${u}${v} - if {$in_escapesequence eq "2b"} { - #2nd byte - done. - set in_escapesequence 0 - } elseif {$in_escapesequence != 0} { - set endseq [tcl::dict::get $escape_terminals $in_escapesequence] - if {$u in $endseq} { - set in_escapesequence 0 - } elseif {$uv in $endseq} { - set in_escapesequence 2b ;#flag next byte as last in sequence - } - } else { - #handle both 7-bit and 8-bit CSI and OSC - if {[regexp {^(?:\033\[|\u009b)} $uv]} { - set in_escapesequence CSI - } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { - set in_escapesequence OSC - } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { - set in_escapesequence DCS - } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { - #SOS,PM,APC - all terminated with ST - set in_escapesequence MISC - } else { - lappend outputlist $u - } - } - incr i - } - return [join $outputlist ""] - } - #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set @@ -2029,7 +1964,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map - set fcposn [lsearch $args "forcecol*"] + set fcposn [lsearch $args "force*"] set fc "" set opt_forcecolour 0 if {$fcposn >= 0} { @@ -2409,7 +2344,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >= 0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -2767,7 +2702,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >=0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -3199,6 +3134,16 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emitblock {row col textblock} { + #*** !doctools + #[call [fun move_emitblock] [arg row] [arg col] [arg textblock]] + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + return $commands + } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] @@ -3315,18 +3260,90 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) - #Alt screen buffer + #Alt screen buffer - smcup/rmcup ti/te + #Note \x1b\[?47h doesn't work at all in some terminals (e.g alacritty,cmd on windows and reportedly kitty) + #It is also reported to have 'deceptively similar' effects to 1049 -but to be 'subtly broken' on some terminals. + #see: https://xn--rpa.cc/irl/term.html + #1049 (introduced by xterm in 1998?) considered the more modern version? + #1047,1048,1049 xterm private modes are 'composite' control sequences as replacement for original 47 sequence + #1049 - includes save cursor,switch to alt screen, clear screen + #e.g ? (below example not known to be exactly what 1049 does - but it's something similar) + #SMCUP + # \x1b7 (save cursor) + # \x1b\[?47h (switch) + # \x1b\[2J (clear screen) + #RMCUP + # \x1b\[?47l (switch back) + # \x1b8 (restore cursor) + + #1047 - clear screen on the way out (ony if actually on alt screen) proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? - #\x1b\[?1049h ;#xterm - return \x1b\[?47h + return \x1b\[?1049h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] - #\x1b\[?1049l + return \x1b\[?1049l + } + #47 - less widely supported(?) doesn't restore cursor or clear alt screen + proc enable_alt_screen2 {} { + return \x1b\[?47h + } + proc disable_alt_screen2 {} { return \x1b\[?47l } + proc term_colour_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_color_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_colour_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_color_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_colour_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_color_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_colour_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_color_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_colour_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + proc term_color_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + #15,16 tektronix fg, tektronix bg ??? + proc term_colour_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + proc term_color_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + #18 tektronix cursor colour ??? + proc term_colour_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + proc term_color_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + #22 pointer shape - there are other methods too - not known to work on windows terminal based VTs - review + proc term_colour_reset {} { + return "\x1b\]104\;\x1b\\" + } + proc term_color_reset {} { + return "\x1b\]104\;\x1b\\" + } # -- --- --- proc erase_line {} { @@ -4398,6 +4415,10 @@ tcl::namespace::eval punk::ansi::ta { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } + proc detect_st_open {text} { + variable re_ST_open + expr {[regexp $re_ST_open $text]} + } #not in perl ta proc detect_csi {text} { @@ -4672,7 +4693,8 @@ tcl::namespace::eval punk::ansi::class { } oo::class create base_renderer { variable o_width - variable o_wrap o_overflow o_appendlines o_looplimit + variable o_autowrap_mode + variable o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered @@ -4690,25 +4712,33 @@ tcl::namespace::eval punk::ansi::class { } tcl::namespace::path $nspath #-- -- - if {[llength $args] < 2} { - error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + if {[llength $args] < 1} { + error {usage: ?-width ? ?-height ? ?-autowrap_mode [1|0]? ?-overflow [1|0]? from_ansistring} } - lassign [lrange $args end-1 end] from_ansistring to_ansistring + #lassign [lrange $args end-1 end] from_ansistring to_ansistring + set from_ansistring [lindex $args end] + set opts [tcl::dict::create\ - -width \uFFEF\ - -wrap 1\ - -overflow 0\ - -appendlines 1\ - -looplimit 15000\ - -experimental {}\ - -cursor_column 1\ - -cursor_row 1\ + -width \uFFEF\ + -height \uFFEF\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + -insert_mode 0\ + -autowrap_mode 1\ + -initial_ansistring ""\ ] - puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] foreach {k v} $argsflags { switch -- $k { - -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + -width - -height - + -overflow - -appendlines - -looplimit - -experimental - + -autowrap_mode - + -insert_mode - + -initial_ansistring { tcl::dict::set opts $k $v } default { @@ -4717,8 +4747,19 @@ tcl::namespace::eval punk::ansi::class { } } } + set initial_ansistring [tcl::dict::get $opts -initial_ansistring] + if {$initial_ansistring eq ""} { + set to_ansistring [punk::ansi::class::class_ansistring new ""] + } else { + #todo - verify obj vs raw string + set to_ansistring $initial_ansistring + } + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set o_width [tcl::dict::get $opts -width] - set o_wrap [tcl::dict::get $opts -wrap] + set o_height [tcl::dict::get $opts -height] + set o_autowrap_mode [tcl::dict::get $opts -autowrap_mode] + set o_insert_mode [tcl::dict::get $opts -insert_mode] set o_overflow [tcl::dict::get $opts -overflow] set o_appendlines [tcl::dict::get $opts -appendlines] set o_looplimit [tcl::dict::get $opts -looplimit] @@ -4736,6 +4777,9 @@ tcl::namespace::eval punk::ansi::class { method eval_in {script} { eval $script } + method renderbuf {} { + return $o_to_ansistring + } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column @@ -4755,6 +4799,12 @@ tcl::namespace::eval punk::ansi::class { set o_cursor_row $row } + #set/query cursor state + method cursor_state {args} { + lassign $args r c + return [dict create row [my cursor_row $r] column [my cursor_column $c]] + } + #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { @@ -4834,7 +4884,8 @@ tcl::namespace::eval punk::ansi::class { #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] } } - + #renderspace equivalent? channel based? + #todo $o_to_ansistring append $newtext return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] @@ -4960,11 +5011,10 @@ tcl::namespace::eval punk::ansi::class { if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" - } - if {$o_renderout ne ""} { - append result \n " render target ansistring: $o_renderout" - append result \n " render target has ansi : [$o_renderout has_ansi]" - append result \n " render target count : [$o_renderout count]" + set renderstring [$o_renderer renderbuf] + append result \n " render target ansistring: $renderstring" + append result \n " render target has ansi : [$renderstring has_ansi]" + append result \n " render target count : [$renderstring count]" } if {$verbose} { append result \n "ansisplits listing" @@ -5052,7 +5102,8 @@ tcl::namespace::eval punk::ansi::class { } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} - + #review + return [string length [join $o_ptlist ""]] } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { @@ -5119,10 +5170,9 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } - if {$o_renderout eq ""} { - #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? - set o_renderout [punk::ansi::class::class_ansistring new ""] - } + #if {$o_renderout eq ""} { + # set o_renderout [punk::ansi::class::class_ansistring new ""] + #} if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [tcl::namespace::tail $oinfo] @@ -5130,13 +5180,15 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } else { return $currenttype } } else { puts "creating first renderer" - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } } #--- progressive rendering buffer - another ansistring object @@ -5149,10 +5201,13 @@ tcl::namespace::eval punk::ansi::class { return $o_renderwidth } #re-render if needed? - + puts stderr "renderwidth todo? re-render?" set o_renderwidth $rw } + method renderer {} { + return $o_renderer + } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. @@ -5160,10 +5215,36 @@ tcl::namespace::eval punk::ansi::class { } method renderbuf {} { #get the underlying renderobj - if any - return $o_renderout ;#also class_ansistring + #return $o_renderout ;#also class_ansistring + return [$o_renderer renderbuf] } - method render {} { + method render {{maxgraphemes ""}} { #full render - return buffer ansistring + set do_render 1 + set grapheme_count 0 + set other_count 0 + if {$maxgraphemes eq ""} { + while {$do_render} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } else { + while {$do_render && $grapheme_count <= $maxgraphemes} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } + return [dict create graphemes $grapheme_count other $other_count] } method rendernext {} { #render next available pt/code chunk only - not to end of available input @@ -5198,6 +5279,13 @@ tcl::namespace::eval punk::ansi::class { #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + #like Tcl's append class_ansistring append returns the result directly - which for ANSI - can be inconvenient in the terminal + #class_ansistring append_string is a convenience wrapper to avoid returning the raw result + method append_string {args} { + my append {*}$args + return + } + #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { @@ -5369,7 +5457,7 @@ tcl::namespace::eval punk::ansi::class { } - #method append_and_render - append and render up to end of appended data at same time + #method append_and_render? - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { diff --git a/src/modules/punk/blockletter-999999.0a1.0.tm b/src/modules/punk/blockletter-999999.0a1.0.tm index 70e74271..e7e08528 100644 --- a/src/modules/punk/blockletter-999999.0a1.0.tm +++ b/src/modules/punk/blockletter-999999.0a1.0.tm @@ -20,12 +20,14 @@ #*** !doctools #[manpage_begin shellspy_module_punk::blockletter 0 999999.0a1.0] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[titledesc {punk::blockletter frame-based large lettering test/logo}] [comment {-- Name section and table of contents description --}] #[moddesc {-}] [comment {-- Description at end of page heading --}] #[require punk::blockletter] #[keywords module] #[description] -#[para] - +#[para] This is primarily designed to test large lettering using the block2 frametype which requires the right font support +#[para] More reasonably sized block-lettering could be obtained using unicode half-blocks instead - but that doesn't allow the frame outline effect that block2 gives. +#[para] Individual blocks have a minimum width of 4 columns and a minimum height of 2 rows (smallest element that can be fully framed) # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ diff --git a/src/modules/punk/config-0.1.tm b/src/modules/punk/config-0.1.tm index 6e5dbeed..dd7ae873 100644 --- a/src/modules/punk/config-0.1.tm +++ b/src/modules/punk/config-0.1.tm @@ -60,12 +60,19 @@ tcl::namespace::eval punk::config { } # auto_exec_mechanism ;#whether to use exec instead of experimental shellfilter::run + + #optional channel transforms on stdout/stderr. + #can sometimes be useful to distinguish eventloop stdout/stderr writes compared to those triggered directly from repl commands + #If no distinction necessary - should use default_color_ + #The counterpart: default_color__repl is a transform that is added and removed with each repl evaluation. #startup color_stdout - parameters as suitable for punk::ansi::a+ (test with 'punk::ansi::a?') e.g "cyan bold" ;#not a good idea to default - set default_color_stdout "" + set default_color_stdout brightwhite ;#stdout colour including background calls (after etc) + set default_color_stdout_repl "" ;#stdout colour applied during direct repl call only #This wraps the stderr stream as it comes in with Ansi - probably best to default to empty.. but it's useful. #set default_color_stderr "red bold" #set default_color_stderr "web-lightsalmon" set default_color_stderr yellow ;#limit to basic colours for wider terminal support. yellow = term-olive + set default_color_stderr_repl "" ;#during repl call only set homedir "" if {[catch { @@ -132,7 +139,9 @@ tcl::namespace::eval punk::config { configset ".punkshell"\ scriptlib $default_scriptlib\ color_stdout $default_color_stdout\ + color_stdout_repl $default_color_stdout_repl\ color_stderr $default_color_stderr\ + color_stderr_repl $default_color_stderr_repl\ logfile_stdout $default_logfile_stdout\ logfile_stderr $default_logfile_stderr\ logfile_active 0\ @@ -172,9 +181,11 @@ tcl::namespace::eval punk::config { PUNK_CONFIGSET {type string}\ PUNK_SCRIPTLIB {type string}\ PUNK_AUTO_EXEC_MECHANISM {type string}\ - PUNK_AUTO_NOEXEC {type string default 0 help "set 1 to set Tcl's ::auto_noexec true.\nStops 'unknown' from running external programs"}\ - PUNK_COLOR_STDERR {type string}\ - PUNK_COLOR_STDOUT {type string}\ + PUNK_AUTO_NOEXEC {type string default 0 help "set 1 to set Tcl's ::auto_noexec true.\nStops 'unknown' from running external programs"}\ + PUNK_COLOR_STDERR {type string help "stderr colour transform. Use 'punk::ansi::a?' to see colour names"}\ + PUNK_COLOR_STDERR_REPL {type string help "stderr colour transform only while command running (not active during 'after')"}\ + PUNK_COLOR_STDOUT {type string help "stdout colour transform. Use 'punk::ansi::a?' to see colour names"}\ + PUNK_COLOR_STDOUT_REPL {type string help "stdout colour transform only while command running (not active during 'after')"}\ PUNK_LOGFILE_STDOUT {type string}\ PUNK_LOGFILE_STDERR {type string}\ PUNK_LOGFILE_ACTIVE {type string}\ diff --git a/src/modules/punk/console-999999.0a1.0.tm b/src/modules/punk/console-999999.0a1.0.tm index 273d444b..c31852b5 100644 --- a/src/modules/punk/console-999999.0a1.0.tm +++ b/src/modules/punk/console-999999.0a1.0.tm @@ -864,6 +864,7 @@ namespace eval punk::console { #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? #It will stop underlines/bold/reverse as well as SGR colours #what about ansi movement codes etc? + #we already have colour on|off which disables SGR codes in a+ etc as well as stderr/stdout channel transforms proc ansi {{onoff {}}} { variable ansi_wanted if {[string length $onoff]} { @@ -891,6 +892,7 @@ namespace eval punk::console { } } catch {punk::repl::reset_prompt} + puts stderr "::punk::console::ansi - use 'colour' command to turn SGR codes on/off" return [expr {$ansi_wanted}] } @@ -1295,10 +1297,10 @@ namespace eval punk::console { if {![catch {twapi::set_console_title $windowtitle} result]} { return $windowtitle } else { - error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + error "punk::console::local::titleset failed to set title - try punk::console::ansi::titleset" } } else { - error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + error "punk::console::local::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" } } proc titleget {} { @@ -1306,12 +1308,12 @@ namespace eval punk::console { if {![catch {twapi::get_console_title} result]} { return $result } else { - error "punk::console::titleset failed to set title - ensure twapi is available" + error "punk::console::local::titleset failed to set title - ensure twapi is available" } } else { #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title - # won't work on all platforms/terminals - but may be worth implementing - error "punk::console::titleget has no local mechanism to get the window title on this platform." + # won't work on all platforms/terminals - but may be worth implementing (for overtype::renderspace / frames etc) + error "punk::console::local::titleget has no local mechanism to get the window title on this platform." } } } @@ -1327,7 +1329,7 @@ namespace eval punk::console { if { $ansi_wanted <= 0} { punk::console::local::titleset $windowtitle } else { - tailcall ansi::titleset $windowtitle + ansi::titleset $windowtitle } } #no known pure-ansi solution @@ -1486,8 +1488,6 @@ namespace eval punk::console { namespace import ansi::insert_lines namespace import ansi::delete_lines - interp alias {} smcup {} ::punk::console::enable_alt_screen - interp alias {} rmcup {} ::punk::console::disable_alt_screen #experimental proc rhs_prompt {col text} { @@ -1881,12 +1881,6 @@ namespace eval punk::console { -interp alias {} colour {} punk::console::colour -interp alias {} ansi {} punk::console::ansi -interp alias {} color {} punk::console::colour -interp alias {} a+ {} punk::console::code_a+ -interp alias {} a {} punk::console::code_a -interp alias {} a? {} punk::console::code_a? diff --git a/src/modules/punk/nav/fs-999999.0a1.0.tm b/src/modules/punk/nav/fs-999999.0a1.0.tm index 21608ba1..c897eeee 100644 --- a/src/modules/punk/nav/fs-999999.0a1.0.tm +++ b/src/modules/punk/nav/fs-999999.0a1.0.tm @@ -219,7 +219,8 @@ tcl::namespace::eval punk::nav::fs { } if {[punk::nav::fs::system::codethread_is_running]} { if {[llength [info commands ::punk::console::titleset]]} { - ::punk::console::titleset [lrange $result 1 end] + #if ansi is off - punk::console::titleset will try 'local' api method - which can fail + catch {::punk::console::titleset [lrange $result 1 end]} } } if {[string match //zipfs:/* $location]} { @@ -489,7 +490,7 @@ tcl::namespace::eval punk::nav::fs { tsv::lappend repl runchunks-$repl_runid {*}$chunklist } if {[llength [info commands ::punk::console::titleset]]} { - ::punk::console::titleset [lrange $result 1 end] ;#strip location key + catch {::punk::console::titleset [lrange $result 1 end]} ;#strip location key } } if {$repl_runid == 0} { diff --git a/src/modules/punk/ns-999999.0a1.0.tm b/src/modules/punk/ns-999999.0a1.0.tm index 999ea8e6..0d939073 100644 --- a/src/modules/punk/ns-999999.0a1.0.tm +++ b/src/modules/punk/ns-999999.0a1.0.tm @@ -1561,7 +1561,8 @@ tcl::namespace::eval punk::ns { #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] - set origin [nseval $targetns [list ::namespace origin $name]] + set origin [nseval $targetns [list ::namespace origin $name]] + set resolved [nseval $targetns [list ::namespace which $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { @@ -1613,7 +1614,8 @@ tcl::namespace::eval punk::ns { } lappend argl $a } - list proc [nsjoin ${targetns} $name] $argl $body + #list proc [nsjoin ${targetns} $name] $argl $body + list proc $resolved $argl $body } diff --git a/src/modules/punk/repl-0.1.tm b/src/modules/punk/repl-0.1.tm index 864c4030..937988cf 100644 --- a/src/modules/punk/repl-0.1.tm +++ b/src/modules/punk/repl-0.1.tm @@ -31,7 +31,9 @@ package require shellfilter #package require punk package require punk::lib package require punk::aliascore -punk::aliascore::init +if {[catch {punk::aliascore::init} errM]} { + puts stderr "punk::aliascore::init error: $errM" +} package require punk::config package require punk::ns package require punk::ansi @@ -2576,8 +2578,41 @@ namespace eval repl { } } proc colour args { - thread::send %replthread% [list punk::console::colour {*}$args] - interp eval code [list punk::console::colour {*}$args] + set colour_state [thread::send %replthread% [list punk::console::colour]] + if {[llength $args]} { + #colour call was not a query + set new_state [thread::send %replthread% [list punk::console::colour {*}$args]] + if {[expr {$new_state}] ne [expr {$colour_state}]} { + interp eval code [list punk::console::colour {*}$args] ;#align code interp's view of colour state with repl thread + interp eval code [string map [list $new_state] { + #adjust channel transform stack + set docolour [expr {}] + if {!$docolour} { + set s [lindex $::codeinterp::outstack end] + if {$s ne ""} { + shellfilter::stack::remove stdout $s + } + set s [lindex $::codeinterp::errstack end] + if {$s ne ""} { + shellfilter::stack::remove stderr $s + } + } else { + set running_config $::punk::config::running + if {[string length [dict get $running_config color_stdout]]} { + lappend ::codeinterp::outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + } + if {[string length [dict get $running_config color_stderr]]} { + lappend ::codeinterp::errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] + } + + } + }] + } + return $new_state + } else { + return $colour_state + } + #todo - add/remove shellfilter stacked ansiwrap } proc mode args { thread::send %replthread% [list punk::console::mode {*}$args] @@ -2686,6 +2721,10 @@ namespace eval repl { #review argv0,argv,argc interp eval code { + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } set ::argv0 %argv0% set ::auto_path %autopath% #puts stdout "safe interp" @@ -2724,6 +2763,10 @@ namespace eval repl { set ::auto_path %autopath% #puts stdout "safe interp" #flush stdout + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } } interp eval code [list set ::tcl_platform(os) $::tcl_platform(os)] interp eval code [list set ::tcl_platform(osVersion) $::tcl_platform(osVersion)] @@ -2775,7 +2818,11 @@ namespace eval repl { set ::auto_path %autopath% tcl::tm::remove {*}[tcl::tm::list] tcl::tm::add {*}[lreverse %tmlist%] - #puts "-->[chan names]" + puts "code interp chan names-->[chan names]" + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } # -- --- #review @@ -2805,11 +2852,22 @@ namespace eval repl { #catch {package require packageTrace} package require punk package require shellrun + + package require shellfilter + set running_config $::punk::config::running + if {[string length [dict get $running_config color_stderr]] && [punk::console::colour]} { + lappend ::codeinterp::errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] + } + if {[string length [dict get $running_config color_stdout]] && [punk::console::colour]} { + lappend ::codeinterp::outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + } + package require textblock } errM]} { puts stderr "========================" puts stderr "code interp error:" puts stderr $errM + puts stderr $::errorInfo puts stderr "========================" error "$errM" } diff --git a/src/modules/punk/repl/codethread-999999.0a1.0.tm b/src/modules/punk/repl/codethread-999999.0a1.0.tm index bfbe976c..02265946 100644 --- a/src/modules/punk/repl/codethread-999999.0a1.0.tm +++ b/src/modules/punk/repl/codethread-999999.0a1.0.tm @@ -151,16 +151,19 @@ tcl::namespace::eval punk::repl::codethread { puts stderr "runscript is meant to be called from the parent repl thread via a thread::send to the codethread" return } + set outstack [list] + set errstack [list] upvar ::punk::config::running running_config - if {[string length [dict get $running_config color_stdout]] && [punk::console::colour]} { - lappend outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + if {[string length [dict get $running_config color_stdout_repl]] && [interp eval code punk::console::colour]} { + lappend outstack [interp eval code [list shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout_repl]]]] } - lappend outstack [shellfilter::stack::add stdout tee_to_var -settings {-varname ::punk::repl::codethread::output_stdout}] - if {[string length [dict get $running_config color_stderr]] && [punk::console::colour]} { - lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] - #lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour cyan]] + lappend outstack [interp eval code [list shellfilter::stack::add stdout tee_to_var -settings {-varname ::punk::repl::codethread::output_stdout}]] + + if {[string length [dict get $running_config color_stderr_repl]] && [interp eval code punk::console::colour]} { + lappend errstack [interp eval code [list shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr_repl]]]] + # #lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour cyan]] } - lappend errstack [shellfilter::stack::add stderr tee_to_var -settings {-varname ::punk::repl::codethread::output_stderr}] + lappend errstack [interp eval code [list shellfilter::stack::add stderr tee_to_var -settings {-varname ::punk::repl::codethread::output_stderr}]] #an experiment #set errhandle [shellfilter::stack::item_tophandle stderr] @@ -177,8 +180,8 @@ tcl::namespace::eval punk::repl::codethread { #interp transfer code $errhandle "" #flush $errhandle - set lastoutchar [string index [punk::ansi::ansistrip $output_stdout] end] - set lasterrchar [string index [punk::ansi::ansistrip $output_stderr] end] + set lastoutchar [string index [punk::ansi::ansistrip [interp eval code set ::punk::repl::codethread::output_stdout]] end] + set lasterrchar [string index [punk::ansi::ansistrip [interp eval code set ::punk::repl::codethread::output_stderr]] end] #puts stderr "-->[ansistring VIEW -lf 1 $lastoutchar$lasterrchar]" set tid [thread::id] @@ -188,11 +191,12 @@ tcl::namespace::eval punk::repl::codethread { tsv::set codethread_$tid errorcode $::errorCode + #only remove from shellfilter::stack the items we added to stack in this function foreach s [lreverse $outstack] { - shellfilter::stack::remove stdout $s + interp eval code [list shellfilter::stack::remove stdout $s] } foreach s [lreverse $errstack] { - shellfilter::stack::remove stderr $s + interp eval code [list shellfilter::stack::remove stderr $s] } thread::cond notify $replthread_cond } diff --git a/src/modules/punk/rest-999999.0a1.0.tm b/src/modules/punk/rest-999999.0a1.0.tm new file mode 100644 index 00000000..ec369ac3 --- /dev/null +++ b/src/modules/punk/rest-999999.0a1.0.tm @@ -0,0 +1,296 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) DKF (based on DKF's REST client support class) +# (C) 2024 JMN - packaging/possible mods +# +# @@ Meta Begin +# Application punk::rest 999999.0a1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::rest 0 999999.0a1.0] +#[copyright "2024"] +#[titledesc {punk::rest}] [comment {-- Name section and table of contents description --}] +#[moddesc {experimental rest}] [comment {-- Description at end of page heading --}] +#[require punk::rest] +#[keywords module rest http] +#[description] +#[para] Experimental *basic rest as wrapper over http lib - use tcllib's rest package for a more complete implementation of a rest client + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::rest +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::rest +#[list_begin itemized] + +package require Tcl 8.6- +package require http +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::rest::class { + #*** !doctools + #[subsection {Namespace punk::rest::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::rest { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + + + + #*** !doctools + #[subsection {Namespace punk::rest}] + #[para] Core API functions for punk::rest + #[list_begin definitions] + + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + set objname [namespace current]::matrixchain + if {$objname ni [info commands $objname]} { + # Support class for RESTful web services. + # This wraps up the http package to make everything appear nicer. + oo::class create CLIENT { + variable base wadls acceptedmimetypestack + + constructor baseURL { + set base $baseURL + my LogWADL $baseURL + } + + # TODO: Cookies! + + method ExtractError {tok} { + return [http::code $tok],[http::data $tok] + } + + method OnRedirect {tok location} { + upvar 1 url url + set url $location + # By default, GET doesn't follow redirects; the next line would + # change that... + #return -code continue + set where $location + my LogWADL $where + if {[string equal -length [string length $base/] $location $base/]} { + set where [string range $where [string length $base/] end] + return -level 2 [split $where /] + } + return -level 2 $where + } + + method LogWADL url { + return;# do nothing + set tok [http::geturl $url?_wadl] + set w [http::data $tok] + http::cleanup $tok + if {![info exist wadls($w)]} { + set wadls($w) 1 + puts stderr $w + } + } + + method PushAcceptedMimeTypes args { + lappend acceptedmimetypestack [http::config -accept] + http::config -accept [join $args ", "] + return + } + method PopAcceptedMimeTypes {} { + set old [lindex $acceptedmimetypestack end] + set acceptedmimetypestack [lrange $acceptedmimetypestack 0 end-1] + http::config -accept $old + return + } + + method DoRequest {method url {type ""} {value ""}} { + for {set reqs 0} {$reqs < 5} {incr reqs} { + if {[info exists tok]} { + http::cleanup $tok + } + set tok [http::geturl $url -method $method -type $type -query $value] + if {[http::ncode $tok] > 399} { + set msg [my ExtractError $tok] + http::cleanup $tok + return -code error $msg + } elseif {[http::ncode $tok] > 299 || [http::ncode $tok] == 201} { + set location {} + if {[catch { + set location [dict get [http::meta $tok] Location] + }]} { + http::cleanup $tok + error "missing a location header!" + } + my OnRedirect $tok $location + } else { + set s [http::data $tok] + http::cleanup $tok + return $s + } + } + error "too many redirections!" + } + + method GET args { + return [my DoRequest GET $base/[join $args /]] + } + + method POST {args} { + set type [lindex $args end-1] + set value [lindex $args end] + set m POST + set path [join [lrange $args 0 end-2] /] + return [my DoRequest $m $base/$path $type $value] + } + + method PUT {args} { + set type [lindex $args end-1] + set value [lindex $args end] + set m PUT + set path [join [lrange $args 0 end-2] /] + return [my DoRequest $m $base/$path $type $value] + } + + method DELETE args { + set m DELETE + my DoRequest $m $base/[join $args /] + return + } + export GET POST PUT DELETE + } + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::rest ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::rest::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::rest::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::rest::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::rest::system { + #*** !doctools + #[subsection {Namespace punk::rest::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::rest [tcl::namespace::eval punk::rest { + variable pkg punk::rest + variable version + set version 999999.0a1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/rest-buildversion.txt b/src/modules/punk/rest-buildversion.txt new file mode 100644 index 00000000..f47d01c8 --- /dev/null +++ b/src/modules/punk/rest-buildversion.txt @@ -0,0 +1,3 @@ +0.1.0 +#First line must be a semantic version number +#all other lines are ignored. diff --git a/src/modules/shellfilter-0.1.9.tm b/src/modules/shellfilter-0.1.9.tm index e1983653..4f887fd5 100644 --- a/src/modules/shellfilter-0.1.9.tm +++ b/src/modules/shellfilter-0.1.9.tm @@ -654,6 +654,7 @@ namespace eval shellfilter::chan { #detect will detect ansi SGR and gron groff and other codes if {[punk::ansi::ta::detect $buf]} { #split_codes_single regex faster than split_codes - but more resulting parts + #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc) set parts [punk::ansi::ta::split_codes_single $buf] #process all pt/code pairs except for trailing pt foreach {pt code} [lrange $parts 0 end-1] { @@ -725,21 +726,70 @@ namespace eval shellfilter::chan { } else { #puts "-->esc but no detect" #no complete ansi codes - but at least one esc is present - if {[string first \x1b $buf] == [llength $buf]-1} { + if {[string last \x1b $buf] == [llength $buf]-1} { #only esc is last char in buf #puts ">>trailing-esc<<" set o_buffered \x1b set emit [string range $buf 0 end-1] } else { + set emit_anyway 0 #todo - ensure non-ansi escapes in middle of chunks don't lead to ever growing buffer - append o_buffered $chunk - set emit "" + if {[punk::ansi::ta::detect_st_open $buf]} { + #no detect - but we have an ST open (privacy msg etc) - allow a larger chunk before we give up - could include newlines (and even nested codes - although not widely interpreted that way in terms) + set st_partial_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of unclosed ST code + #todo - configurable ST max - use 1k for now + if {$st_partial_len < 1001} { + append o_buffered $chunk + set emit "" + } else { + set emit_anyway 1 + } + } else { + set possible_code_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of possible code + #most opening sequences are 1,2 or 3 chars - review? + set open_sequence_detected [punk::ansi::ta::detect_open $buf] + if {$possible_code_len > 10 && !$open_sequence_detected} { + set emit_anyway 1 + } else { + #could be composite sequence with params - allow some reasonable max sequence length + #todo - configurable max sequence length + #len 40-50 quite possible for SGR sequence using coloured underlines etc, even without redundancies + # - allow some headroom for redundant codes when the caller didn't merge. + if {$possible_code_len < 101} { + append o_buffered $chunk + set emit "" + } else { + #allow a little more grace if we at least have an opening ansi sequence of any type.. + if {$open_sequence_detected && $possible_code_len < 151} { + append o_buffered $chunk + set emit "" + } else { + set emit_anyway 1 + } + } + } + } + if {$emit_anyway} { + #looked ansi-like - but we've given enough length without detecting close.. + #treat as possible plain text with some esc or unrecognised ansi sequence + if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { + set emit $o_do_colour$buf$o_do_normal + } else { + set emit $buf + } + } } } } else { #no esc #puts stdout [a+ yellow]...[a] - set emit $buf + #test! + if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { + set emit $o_do_colour$buf$o_do_normal + } else { + set emit $buf + } + #set emit $buf set o_buffered "" } return [dict create emit $emit stacksize [llength $o_codestack]] diff --git a/src/modules/textblock-999999.0a1.0.tm b/src/modules/textblock-999999.0a1.0.tm index 45890bee..f3895b14 100644 --- a/src/modules/textblock-999999.0a1.0.tm +++ b/src/modules/textblock-999999.0a1.0.tm @@ -4041,6 +4041,9 @@ tcl::namespace::eval textblock { (default 0 if no existing -table supplied)" -table -default "" -type string -help "existing table object to use" -headers -default "" -help "list of header values. Must match number of columns" + -show_header -default "" -help "Whether to show a header row. + Leave as empty string for unspecified/automatic, + in which case it will display only if -headers list was supplied." -action -default "append" -choices {append replace} -help "row insertion method if existing -table is supplied if append is chosen the new values will always start at the first column" -columns -default "" -type integer -help "Number of table columns @@ -4101,11 +4104,34 @@ tcl::namespace::eval textblock { } } else { set is_new_table 1 + set headers {} + if {[tcl::dict::get $opts -headers] ne ""} { + set headers [dict get $opts -headers] + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 1 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } else { + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 0 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } + if {[tcl::string::is integer -strict $opt_columns]} { set cols $opt_columns + if {[llength $headers] && $cols != [llength $headers]} { + error "list_as_table number of columns ($cols) doesn't match supplied number of headers ([llength $headers])" + } } else { #review - set cols 2 ;#seems a reasonable default + if {[llength $headers]} { + set cols [llength $headers] + } else { + set cols 2 ;#seems a reasonable default + } } #defaults for new table only if {[tcl::dict::get $opts -frametype] eq ""} { @@ -4123,15 +4149,6 @@ tcl::namespace::eval textblock { if {[tcl::dict::get $opts -show_hseps] eq ""} { tcl::dict::set opts -show_hseps 0 } - set headers {} - set show_header 0 - if {[tcl::dict::get $opts -headers] ne ""} { - set headers [dict get $opts -headers] - if {[llength $headers] ne $cols} { - error "list_as_table number of headers ([llength $headers]) must match number of columns ($cols)" - } - set show_header 1 - } set t [textblock::class::table new\ -show_header $show_header\ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config index 75a091dc..fc436d8c 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config @@ -6,7 +6,6 @@ set bootsupport_modules [list\ src/vendormodules cksum\ src/vendormodules modpod\ - src/vendormodules natsort\ src/vendormodules overtype\ src/vendormodules oolib\ src/vendormodules http\ @@ -22,6 +21,8 @@ set bootsupport_modules [list\ src/vendormodules uuid\ src/vendormodules md5\ src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ modules punkcheck\ modules natsort\ modules punk::ansi\ @@ -60,6 +61,7 @@ set bootsupport_modules [list\ modules punk::zip\ modules punk::winpath\ modules textblock\ + modules natsort\ modules oolib\ ] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm index 38ce71c2..492341d6 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm @@ -233,7 +233,6 @@ tcl::namespace::eval overtype { -width \uFFEF\ -height \uFFEF\ -startcolumn 1\ - -wrap 0\ -ellipsis 0\ -ellipsistext $default_ellipsis_horizontal\ -ellipsiswhitespace 0\ @@ -243,11 +242,13 @@ tcl::namespace::eval overtype { -exposed1 \uFFFD\ -exposed2 \uFFFD\ -experimental 0\ - -cp437 1\ + -cp437 0\ -looplimit \uFFEF\ -crm_mode 0\ -reverse_mode 0\ -insert_mode 0\ + -wrap 0\ + -info 0\ -console {stdin stdout stderr}\ ] #expand_right is perhaps consistent with the idea of the page_size being allowed to grow horizontally.. @@ -263,14 +264,19 @@ tcl::namespace::eval overtype { #-ellipsis args not used if -wrap is true foreach {k v} $argsflags { switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace + -looplimit - -width - -height - -startcolumn - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -transparent - -exposed1 - -exposed2 - -experimental - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -console { + - -info - -console { tcl::dict::set opts $k $v } + -wrap - -autowrap_mode { + #temp alias -autowrap_mode for consistency with renderline + #todo - + tcl::dict::set opts -wrap $v + } default { error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" } @@ -280,10 +286,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- #review - expand_left for RTL text? set opt_expand_right [tcl::dict::get $opts -expand_right] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. set opt_width [tcl::dict::get $opts -width] set opt_height [tcl::dict::get $opts -height] @@ -298,50 +300,34 @@ tcl::namespace::eval overtype { set opt_crm_mode [tcl::dict::get $opts -crm_mode] set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] set opt_insert_mode [tcl::dict::get $opts -insert_mode] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_autowrap_mode [tcl::dict::get $opts -wrap] + #??? wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] + set opt_info [tcl::dict::get $opts -info] - #initial state for renderspace 'terminal' reset - set initial_state [dict create\ - -width $opt_width\ - -height $opt_height\ - -crm_mode $opt_crm_mode\ - -reverse_mode $opt_reverse_mode\ - -insert_mode $opt_insert_mode\ - -cp437 $opt_cp437\ - ] # ---------------------------- # -experimental dev flag to set flags etc # ---------------------------- set data_mode 0 - set info_mode 0 set edit_mode 0 set opt_experimental [tcl::dict::get $opts -experimental] foreach o $opt_experimental { switch -- $o { - old_mode { - set info_mode 1 - } data_mode { set data_mode 1 } - info_mode { - set info_mode 1 - } edit_mode { set edit_mode 1 } } } # ---------------------------- - - #modes - set insert_mode $opt_insert_mode ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode $opt_reverse_mode - set crm_mode $opt_crm_mode set underblock [tcl::string::map {\r\n \n} $underblock] @@ -367,6 +353,20 @@ tcl::namespace::eval overtype { set renderwidth $opt_width set renderheight $opt_height } + #initial state for renderspace 'terminal' reset + set initial_state [dict create\ + renderwidth $renderwidth\ + renderheight $renderheight\ + crm_mode $opt_crm_mode\ + reverse_mode $opt_reverse_mode\ + insert_mode $opt_insert_mode\ + autowrap_mode $opt_autowrap_mode\ + cp437 $opt_cp437\ + ] + #modes + #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l + #opt_startcolumn ?? - DECSLRM ? + set vtstate $initial_state # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -494,50 +494,55 @@ tcl::namespace::eval overtype { } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderargs [list -experimental $opt_experimental\ - -cp437 $opt_cp437\ - -info 1\ - -crm_mode $crm_mode\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ - -reverse_mode $reverse_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -transparent $opt_transparent\ - -width $renderwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -expand_right $opt_expand_right\ - -cursor_column $col\ - -cursor_row $row\ + set renderargs [list -experimental $opt_experimental\ + -cp437 $opt_cp437\ + -info 1\ + -crm_mode [tcl::dict::get $vtstate crm_mode]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ + -reverse_mode [tcl::dict::get $vtstate reverse_mode]\ + -cursor_restore_attributes $cursor_saved_attributes\ + -transparent $opt_transparent\ + -width [tcl::dict::get $vtstate renderwidth]\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -expand_right $opt_expand_right\ + -cursor_column $col\ + -cursor_row $row\ $undertext\ $overtext\ ] set LASTCALL $renderargs set rinfo [renderline {*}$renderargs] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - set reverse_mode [tcl::dict::get $rinfo reverse_mode] + set instruction [tcl::dict::get $rinfo instruction] + tcl::dict::set vtstate crm_mode [tcl::dict::get $rinfo crm_mode] + tcl::dict::set vtstate insert_mode [tcl::dict::get $rinfo insert_mode] + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + tcl::dict::set vtstate reverse_mode [tcl::dict::get $rinfo reverse_mode] #how to support reverse_mode in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? # - review - the answer is probably that we don't need to - it is set/reset only during application of overtext - set crm_mode [tcl::dict::get $rinfo crm_mode] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + + #Note carefully the difference betw overflow_right and unapplied. + #overflow_right may need to be included in next run before the unapplied data + #overflow_right most commonly has data when in insert_mode + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - if {0 && $reverse_mode} { + if {0 && [tcl::dict::get $vtstate reverse_mode]} { #test branch - todo - prune puts stderr "---->[ansistring VIEW $replay_codes_overlay] rendered: $rendered" #review @@ -593,19 +598,29 @@ tcl::namespace::eval overtype { #todo - handle potential insertion mode as above for cursor restore? #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { + set instruction_type [lindex $instruction 0] ;#some instructions have params + tcl::dict::incr instruction_stats $instruction_type + switch -- $instruction_type { reset { #reset the 'renderspace terminal' (not underlying terminal) set row 1 set col 1 - - + set vtstate [tcl::dict::merge $vtstate $initial_state] + #todo - clear screen } {} { + #end of supplied line input #lf included in data set row $post_render_row set col $post_render_col + if {![llength $unapplied_list]} { + if {$overflow_right ne ""} { + incr row + } + } else { + puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" + } + set col $opt_startcolumn } up { @@ -708,17 +723,18 @@ tcl::namespace::eval overtype { puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - set sub_info [overtype::renderline -info 1\ - -width $renderwidth\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ + set sub_info [overtype::renderline\ + -info 1\ + -width [tcl::dict::get $vtstate renderwidth]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ -expand_right [tcl::dict::get $opts -opt_expand_right]\ ""\ $overflow_right\ ] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + set foldline [tcl::dict::get $sub_info result] + tcl::dict::set vtstate insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -745,6 +761,53 @@ tcl::namespace::eval overtype { set col $post_render_col #overflow + unapplied? } + clear_and_move { + #e.g 2J + if {$post_render_row > [llength $outputlines]} { + set row [llength $outputlines] + } else { + set row $post_render_row + } + set col $post_render_col + set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant + set clearedlines [list] + foreach ln $outputlines { + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $renderwidth]\x1b\[0m + if 0 { + + set lineparts [punk::ansi::ta::split_codes $ln] + set numcells 0 + foreach {pt _code} $lineparts { + if {$pt ne ""} { + foreach grapheme [punk::char::grapheme_split $pt] { + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + incr numcells 1 + } + default { + if {$grapheme eq "\u0000"} { + incr numcells 1 + } else { + incr numcells [grapheme_width_cached $grapheme] + } + } + } + + } + } + } + #replays/resets each line + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $numcells]\x1b\[0m + } + } + set outputlines $clearedlines + #todo - determine background/default to be in effect - DECECM ? + puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" + #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] + + } lf_start { #raw newlines # ---------------------- @@ -780,27 +843,48 @@ tcl::namespace::eval overtype { append rendered $overflow_right set overflow_right "" } else { + #review - we should really make renderline do the work...? set overflow_width [punk::ansi::printing_length $overflow_right] if {$visualwidth + $overflow_width <= $renderwidth} { append rendered $overflow_right set overflow_right "" } else { - if {$visualwidth < $renderwidth} { - set graphemes [punk::char::grapheme_split $overflow_width] - set add "" - set addlen $visualwidth - set remaining_overflow $graphemes - foreach g $graphemes { - set w [overtype::grapheme_width_cached] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - lpop remaining_overflow - } else { - break - } + if {[tcl::dict::get $vtstate autowrap_mode]} { + set outputlines [linsert $outputlines $renderedrow $overflow_right] + set overflow_right "" + set row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } + + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break + } + } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code + } } - append rendered $add set overflow_right [join $remaining_overflow ""] } } @@ -829,14 +913,16 @@ tcl::namespace::eval overtype { #we may also have other control sequences that came after col 80 e.g cursor save if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" - #assertion - there should be no overflow.. - puts $lhs + #assertion - there should be no overflow.. + puts $lhs + } + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right set row $post_render_row #set row $renderedrow @@ -981,7 +1067,7 @@ tcl::namespace::eval overtype { #normal single-width grapheme overflow #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { incr row set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { @@ -1014,7 +1100,7 @@ tcl::namespace::eval overtype { #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { if {$renderwidth < 2} { #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character set idx 0 @@ -1055,6 +1141,14 @@ tcl::namespace::eval overtype { set row $post_render_row set col $post_render_col } + set_window_title { + set newtitle [lindex $instruction 1] + puts stderr "overtype::renderspace set_window_title [ansistring VIEW $newtitle] instruction '$instruction'" + # + } + reset_colour_palette { + puts stderr "overtype::renderspace instruction '$instruction' unimplemented" + } default { puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" } @@ -1062,7 +1156,7 @@ tcl::namespace::eval overtype { } - if {!$opt_expand_right && !$autowrap_mode} { + if {!$opt_expand_right && ![tcl::dict::get $vtstate autowrap_mode]} { #not allowed to overflow column or wrap therefore we get overflow data to truncate if {[tcl::dict::get $opts -ellipsis]} { set show_ellipsis 1 @@ -1160,11 +1254,22 @@ tcl::namespace::eval overtype { } set result [join $outputlines \n] - if {$info_mode} { + if {!$opt_info} { + return $result + } else { #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? #append result \n$instruction_stats\n + set inforesult [dict create\ + result $result\ + last_instruction $instruction\ + instruction_stats $instruction_stats\ + ] + if {$opt_info == 2} { + return [pdict -channel none inforesult] + } else { + return $inforesult + } } - return $result } #todo - left-right ellipsis ? @@ -2368,14 +2473,22 @@ tcl::namespace::eval overtype { } else { #linefeed occurred in middle or at end of text #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - if {$idx == -1 || $overflow_idx > $idx} { - #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + if {$insert_mode == 0} { + incr cursor_row + if {$idx == -1 || $overflow_idx > $idx} { + #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + } + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + incr cursor_row + #don't adjust the overflow_idx + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction lf_mid + break ;# could have overdata following the \n - don't keep processing } - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break } } @@ -3077,8 +3190,11 @@ tcl::namespace::eval overtype { set idx [expr {$cursor_column -1}] set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] incr idx_over + if {[llength $outcols]} { + priv::render_erasechar 0 [llength $outcols] + } priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move + set instruction clear_and_move break } 3 { @@ -3749,6 +3865,72 @@ tcl::namespace::eval overtype { } 7OSC - 8OSC { + # OSCs are terminated with ST of either \007 or \x1b\\ - we allow either whether code was 7 or 8 bit + if {[tcl::string::index $codenorm end] eq "\007"} { + set code_content [tcl::string::range $codenorm 4 end-1] ;#ST is \007 + } else { + set code_content [tcl::string::range $codenorm 4 end-2] ;#ST is \x1b\\ + } + set first_colon [tcl::string::first {;} $code_content] + if {$first_colon == -1} { + #there probably should always be a colon - but we'll try to make sense of it without + set osc_code $code_content ;#e.g \x1b\]104\007 vs \x1b\]104\;\007 + } else { + set osc_code [tcl::string::range $code_content 0 $first_colon-1] + } + switch -exact -- $osc_code { + 2 { + set newtitle [tcl::string::range $code_content 2 end] + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list set_window_title $newtitle] + break + } + 4 { + #OSC 4 - set colour palette + #can take multiple params + #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ + set params [tcl::string::range $code_content 1 end] + + puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 { + #OSC 10 through 17 - so called 'dynamic colours' + #can take multiple params - each successive parameter changes the next colour in the list + #- e.g if code started at 11 - next param is for 12. 17 takes only one param because there are no more + #10 change text foreground colour + #11 change text background colour + #12 change text cursor colour + #13 change mouse foreground colour + #14 change mouse background colour + #15 change tektronix foreground colour + #16 change tektronix background colour + #17 change highlight colour + set params [tcl::string::range $code_content 2 end] + + puts stderr "overtype::renderline OSC $osc_code set dynamic colours unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 18 { + #why is this not considered one of the dynamic colours above? + #https://www.xfree86.org/current/ctlseqs.html + #tektronix cursor color + puts stderr "overtype::renderline OSC 18 - set tektronix cursor color unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + 104 { + #reset colour palette + #we want to do it for the current rendering context only (vvt) - not just pass through to underlying vt + puts stderr "overtype::renderline OSC 104 reset colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list reset_colour_palette] + break + } + default { + puts stderr "overtype::renderline OSC - UNIMPLEMENTED. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } } default { @@ -3791,6 +3973,23 @@ tcl::namespace::eval overtype { #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW set in_overflow 1 } + set trailing_nulls 0 + foreach ch [lreverse $outcols] { + if {$ch eq "\u0000"} { + incr trailing_nulls + } else { + break + } + } + if {$trailing_nulls} { + set first_tail_null_posn [expr {[llength $outcols] - $trailing_nulls}] + } else { + set first_tail_null_posn -1 + } + + #puts stderr "first_tail_null_posn: $first_tail_null_posn" + #puts stderr "colview: [ansistring VIEW $outcols]" + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -3865,8 +4064,17 @@ tcl::namespace::eval overtype { } append outstring $gxleader append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] + if {$ch eq "\u0000"} { + if {$cp437_glyphs} { + #map all nulls including at tail to space + append outstring " " + } else { + if {$trailing_nulls && $i < $first_tail_null_posn} { + append outstring " " ;#map inner nulls to space + } else { + append outstring \u0000 + } + } } else { append outstring $ch } @@ -3874,12 +4082,20 @@ tcl::namespace::eval overtype { incr i } #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + #review - presence of overflow_right doesn't indicate line's trailing nulls should remain. + #The cells could have been erased? + #if {!$cp437_glyphs} { + # #if {![ansistring length $overflow_right]} { + # # set outstring [tcl::string::trimright $outstring "\u0000"] + # #} + # set outstring [tcl::string::trimright $outstring "\u0000"] + # set outstring [tcl::string::map {\u0000 " "} $outstring] + #} + + + #REVIEW + #set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + #set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] set replay_codes "" if {[llength $understacks] > 0} { @@ -4207,8 +4423,9 @@ tcl::namespace::eval overtype::priv { upvar outcols o upvar understacks ustacks upvar understacks_gx gxstacks + upvar replay_codes_overlay replay #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller of render_erasechar should do that mapping and only supply 1 or greater. if {![tcl::string::is integer -strict $count] || $count < 1} { error "render_erasechar count must be integer >= 1" } @@ -4223,8 +4440,9 @@ tcl::namespace::eval overtype::priv { } set num [expr {$end - $start + 1}] set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + #DECECM ??? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list $replay]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] ;# ??? review return } proc render_setchar {i c } { diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm index 267e680e..1a40c952 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -413,6 +413,7 @@ tcl::namespace::eval punk::ansi { tcl::namespace::export\ {a?} {a+} a \ ansistring\ + ansiwrap\ convert*\ clear*\ cursor_*\ @@ -722,77 +723,11 @@ tcl::namespace::eval punk::ansi { #candidate for zig/c implementation? proc stripansi2 {text} { - - set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } - proc stripansi1 {text} { - - #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW - - variable escape_terminals ;#dict - variable ::punk::ansi::ta::standalone_code_map ;#map to empty string - - set text [convert_g0 $text] - - - set text [tcl::string::map $standalone_code_map $text] - #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm - #\x1b#3 double-height letters top half - #\x1b#4 double-height letters bottom half - #\x1b#5 single-width line - #\x1b#6 double-width line - #\x1b#8 dec test fill screen - - - #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. - - #Theoretically line endings can occur within an ST payload (review e.g title?) - #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) - - set inputlist [split $text ""] - set outputlist [list] - - set in_escapesequence 0 - #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) - - set i 0 - foreach u $inputlist { - set v [lindex $inputlist $i+1] - set uv ${u}${v} - if {$in_escapesequence eq "2b"} { - #2nd byte - done. - set in_escapesequence 0 - } elseif {$in_escapesequence != 0} { - set endseq [tcl::dict::get $escape_terminals $in_escapesequence] - if {$u in $endseq} { - set in_escapesequence 0 - } elseif {$uv in $endseq} { - set in_escapesequence 2b ;#flag next byte as last in sequence - } - } else { - #handle both 7-bit and 8-bit CSI and OSC - if {[regexp {^(?:\033\[|\u009b)} $uv]} { - set in_escapesequence CSI - } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { - set in_escapesequence OSC - } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { - set in_escapesequence DCS - } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { - #SOS,PM,APC - all terminated with ST - set in_escapesequence MISC - } else { - lappend outputlist $u - } - } - incr i - } - return [join $outputlist ""] - } - #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set @@ -2029,7 +1964,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map - set fcposn [lsearch $args "forcecol*"] + set fcposn [lsearch $args "force*"] set fc "" set opt_forcecolour 0 if {$fcposn >= 0} { @@ -2409,7 +2344,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >= 0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -2767,7 +2702,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >=0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -3199,6 +3134,16 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emitblock {row col textblock} { + #*** !doctools + #[call [fun move_emitblock] [arg row] [arg col] [arg textblock]] + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + return $commands + } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] @@ -3315,18 +3260,90 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) - #Alt screen buffer + #Alt screen buffer - smcup/rmcup ti/te + #Note \x1b\[?47h doesn't work at all in some terminals (e.g alacritty,cmd on windows and reportedly kitty) + #It is also reported to have 'deceptively similar' effects to 1049 -but to be 'subtly broken' on some terminals. + #see: https://xn--rpa.cc/irl/term.html + #1049 (introduced by xterm in 1998?) considered the more modern version? + #1047,1048,1049 xterm private modes are 'composite' control sequences as replacement for original 47 sequence + #1049 - includes save cursor,switch to alt screen, clear screen + #e.g ? (below example not known to be exactly what 1049 does - but it's something similar) + #SMCUP + # \x1b7 (save cursor) + # \x1b\[?47h (switch) + # \x1b\[2J (clear screen) + #RMCUP + # \x1b\[?47l (switch back) + # \x1b8 (restore cursor) + + #1047 - clear screen on the way out (ony if actually on alt screen) proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? - #\x1b\[?1049h ;#xterm - return \x1b\[?47h + return \x1b\[?1049h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] - #\x1b\[?1049l + return \x1b\[?1049l + } + #47 - less widely supported(?) doesn't restore cursor or clear alt screen + proc enable_alt_screen2 {} { + return \x1b\[?47h + } + proc disable_alt_screen2 {} { return \x1b\[?47l } + proc term_colour_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_color_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_colour_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_color_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_colour_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_color_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_colour_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_color_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_colour_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + proc term_color_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + #15,16 tektronix fg, tektronix bg ??? + proc term_colour_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + proc term_color_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + #18 tektronix cursor colour ??? + proc term_colour_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + proc term_color_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + #22 pointer shape - there are other methods too - not known to work on windows terminal based VTs - review + proc term_colour_reset {} { + return "\x1b\]104\;\x1b\\" + } + proc term_color_reset {} { + return "\x1b\]104\;\x1b\\" + } # -- --- --- proc erase_line {} { @@ -4398,6 +4415,10 @@ tcl::namespace::eval punk::ansi::ta { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } + proc detect_st_open {text} { + variable re_ST_open + expr {[regexp $re_ST_open $text]} + } #not in perl ta proc detect_csi {text} { @@ -4672,7 +4693,8 @@ tcl::namespace::eval punk::ansi::class { } oo::class create base_renderer { variable o_width - variable o_wrap o_overflow o_appendlines o_looplimit + variable o_autowrap_mode + variable o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered @@ -4690,25 +4712,33 @@ tcl::namespace::eval punk::ansi::class { } tcl::namespace::path $nspath #-- -- - if {[llength $args] < 2} { - error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + if {[llength $args] < 1} { + error {usage: ?-width ? ?-height ? ?-autowrap_mode [1|0]? ?-overflow [1|0]? from_ansistring} } - lassign [lrange $args end-1 end] from_ansistring to_ansistring + #lassign [lrange $args end-1 end] from_ansistring to_ansistring + set from_ansistring [lindex $args end] + set opts [tcl::dict::create\ - -width \uFFEF\ - -wrap 1\ - -overflow 0\ - -appendlines 1\ - -looplimit 15000\ - -experimental {}\ - -cursor_column 1\ - -cursor_row 1\ + -width \uFFEF\ + -height \uFFEF\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + -insert_mode 0\ + -autowrap_mode 1\ + -initial_ansistring ""\ ] - puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] foreach {k v} $argsflags { switch -- $k { - -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + -width - -height - + -overflow - -appendlines - -looplimit - -experimental - + -autowrap_mode - + -insert_mode - + -initial_ansistring { tcl::dict::set opts $k $v } default { @@ -4717,8 +4747,19 @@ tcl::namespace::eval punk::ansi::class { } } } + set initial_ansistring [tcl::dict::get $opts -initial_ansistring] + if {$initial_ansistring eq ""} { + set to_ansistring [punk::ansi::class::class_ansistring new ""] + } else { + #todo - verify obj vs raw string + set to_ansistring $initial_ansistring + } + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set o_width [tcl::dict::get $opts -width] - set o_wrap [tcl::dict::get $opts -wrap] + set o_height [tcl::dict::get $opts -height] + set o_autowrap_mode [tcl::dict::get $opts -autowrap_mode] + set o_insert_mode [tcl::dict::get $opts -insert_mode] set o_overflow [tcl::dict::get $opts -overflow] set o_appendlines [tcl::dict::get $opts -appendlines] set o_looplimit [tcl::dict::get $opts -looplimit] @@ -4736,6 +4777,9 @@ tcl::namespace::eval punk::ansi::class { method eval_in {script} { eval $script } + method renderbuf {} { + return $o_to_ansistring + } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column @@ -4755,6 +4799,12 @@ tcl::namespace::eval punk::ansi::class { set o_cursor_row $row } + #set/query cursor state + method cursor_state {args} { + lassign $args r c + return [dict create row [my cursor_row $r] column [my cursor_column $c]] + } + #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { @@ -4834,7 +4884,8 @@ tcl::namespace::eval punk::ansi::class { #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] } } - + #renderspace equivalent? channel based? + #todo $o_to_ansistring append $newtext return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] @@ -4960,11 +5011,10 @@ tcl::namespace::eval punk::ansi::class { if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" - } - if {$o_renderout ne ""} { - append result \n " render target ansistring: $o_renderout" - append result \n " render target has ansi : [$o_renderout has_ansi]" - append result \n " render target count : [$o_renderout count]" + set renderstring [$o_renderer renderbuf] + append result \n " render target ansistring: $renderstring" + append result \n " render target has ansi : [$renderstring has_ansi]" + append result \n " render target count : [$renderstring count]" } if {$verbose} { append result \n "ansisplits listing" @@ -5052,7 +5102,8 @@ tcl::namespace::eval punk::ansi::class { } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} - + #review + return [string length [join $o_ptlist ""]] } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { @@ -5119,10 +5170,9 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } - if {$o_renderout eq ""} { - #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? - set o_renderout [punk::ansi::class::class_ansistring new ""] - } + #if {$o_renderout eq ""} { + # set o_renderout [punk::ansi::class::class_ansistring new ""] + #} if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [tcl::namespace::tail $oinfo] @@ -5130,13 +5180,15 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } else { return $currenttype } } else { puts "creating first renderer" - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } } #--- progressive rendering buffer - another ansistring object @@ -5149,10 +5201,13 @@ tcl::namespace::eval punk::ansi::class { return $o_renderwidth } #re-render if needed? - + puts stderr "renderwidth todo? re-render?" set o_renderwidth $rw } + method renderer {} { + return $o_renderer + } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. @@ -5160,10 +5215,36 @@ tcl::namespace::eval punk::ansi::class { } method renderbuf {} { #get the underlying renderobj - if any - return $o_renderout ;#also class_ansistring + #return $o_renderout ;#also class_ansistring + return [$o_renderer renderbuf] } - method render {} { + method render {{maxgraphemes ""}} { #full render - return buffer ansistring + set do_render 1 + set grapheme_count 0 + set other_count 0 + if {$maxgraphemes eq ""} { + while {$do_render} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } else { + while {$do_render && $grapheme_count <= $maxgraphemes} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } + return [dict create graphemes $grapheme_count other $other_count] } method rendernext {} { #render next available pt/code chunk only - not to end of available input @@ -5198,6 +5279,13 @@ tcl::namespace::eval punk::ansi::class { #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + #like Tcl's append class_ansistring append returns the result directly - which for ANSI - can be inconvenient in the terminal + #class_ansistring append_string is a convenience wrapper to avoid returning the raw result + method append_string {args} { + my append {*}$args + return + } + #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { @@ -5369,7 +5457,7 @@ tcl::namespace::eval punk::ansi::class { } - #method append_and_render - append and render up to end of appended data at same time + #method append_and_render? - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm index 4dd7bd66..e367ce9e 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm @@ -864,6 +864,7 @@ namespace eval punk::console { #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? #It will stop underlines/bold/reverse as well as SGR colours #what about ansi movement codes etc? + #we already have colour on|off which disables SGR codes in a+ etc as well as stderr/stdout channel transforms proc ansi {{onoff {}}} { variable ansi_wanted if {[string length $onoff]} { @@ -891,6 +892,7 @@ namespace eval punk::console { } } catch {punk::repl::reset_prompt} + puts stderr "::punk::console::ansi - use 'colour' command to turn SGR codes on/off" return [expr {$ansi_wanted}] } @@ -1295,10 +1297,10 @@ namespace eval punk::console { if {![catch {twapi::set_console_title $windowtitle} result]} { return $windowtitle } else { - error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + error "punk::console::local::titleset failed to set title - try punk::console::ansi::titleset" } } else { - error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + error "punk::console::local::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" } } proc titleget {} { @@ -1306,12 +1308,12 @@ namespace eval punk::console { if {![catch {twapi::get_console_title} result]} { return $result } else { - error "punk::console::titleset failed to set title - ensure twapi is available" + error "punk::console::local::titleset failed to set title - ensure twapi is available" } } else { #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title - # won't work on all platforms/terminals - but may be worth implementing - error "punk::console::titleget has no local mechanism to get the window title on this platform." + # won't work on all platforms/terminals - but may be worth implementing (for overtype::renderspace / frames etc) + error "punk::console::local::titleget has no local mechanism to get the window title on this platform." } } } @@ -1327,7 +1329,7 @@ namespace eval punk::console { if { $ansi_wanted <= 0} { punk::console::local::titleset $windowtitle } else { - tailcall ansi::titleset $windowtitle + ansi::titleset $windowtitle } } #no known pure-ansi solution @@ -1486,8 +1488,6 @@ namespace eval punk::console { namespace import ansi::insert_lines namespace import ansi::delete_lines - interp alias {} smcup {} ::punk::console::enable_alt_screen - interp alias {} rmcup {} ::punk::console::disable_alt_screen #experimental proc rhs_prompt {col text} { @@ -1881,12 +1881,6 @@ namespace eval punk::console { -interp alias {} colour {} punk::console::colour -interp alias {} ansi {} punk::console::ansi -interp alias {} color {} punk::console::colour -interp alias {} a+ {} punk::console::code_a+ -interp alias {} a {} punk::console::code_a -interp alias {} a? {} punk::console::code_a? diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm index 70f924d7..cf0bf70c 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1561,7 +1561,8 @@ tcl::namespace::eval punk::ns { #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] - set origin [nseval $targetns [list ::namespace origin $name]] + set origin [nseval $targetns [list ::namespace origin $name]] + set resolved [nseval $targetns [list ::namespace which $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { @@ -1613,7 +1614,8 @@ tcl::namespace::eval punk::ns { } lappend argl $a } - list proc [nsjoin ${targetns} $name] $argl $body + #list proc [nsjoin ${targetns} $name] $argl $body + list proc $resolved $argl $body } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm new file mode 100644 index 0000000000000000000000000000000000000000..4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd GIT binary patch literal 24693 zcmch92Rzm7`@dvli;T=;@2zYh%Fc+g=P}PYIOkX?va|Q5vKmT+j1-w=kB~BwJ&J@R z;{Q1)$3xHae0zSc|EW)$b3XTVUH3KL_kCZV@Ih!VmMiwoARBuK802n`v;$qVf`A}S z7S>=m$QcfH2E(ku*8KQr_k&n$!7wn~0tvPTxgzXgwjijJwX>78q$JeB5o`ufGu!!K z-ynM%ke88_1=7k6#zH?x3Qo4Em|-WdaT zazbto+Ifq#u!ro8H`zx3VgYE<76vjxI794_+jspU#%Hpt_H6CE8J>lcE1}ZfKG%T3Ed~bH4qqr03YIFEC+lCr4fX)1;7pA z1ab$1tSn%_{9Iv3kcG1|+{qbk55xk*b02w>vjX2)?f4i1Y=s1Q$%7OQ^J&zr+xmb& zz%W}N0$JrvKr%8QLEvUM80iX!fxNEmH#TYr>a~BE!XFQl1sS0+|DBL1jwrtY;sucL zz;sbxqh6?6&(U9*52mb=%<+IsDUIKR8rp2S=c$ z53#a9fX$FlRFIh=z_w5@3`kB!s7wcBY@`#!$^BQ@?}acBN+u=;B;KP3L?w_-QOOj6 zn!-VPNGph>BoZnq2?3JfA*6mP9%5w%MUerq?g)xb_+Z*cu|f&4Jp^qw+Zkk6`kxdJ zijNw-FH(Sl10??KpxkBuS3R-r=VY=SS$i?^mv8?mU{GlgO=<|-0%i-8J#^`T`J;4? z7KeY~0RlYA1SG}c3M4I5(gDMf5KkZ#A%H53qJO|>s9B?4mOx-Q9{TM;$2s`PKE1K- zo5*kD0A(o~R|o`1nT{5yi8?sh1BJrM2?o@E(0(G@g#&OY6lD{Tq$FyJsIj2!GUQ|f z6z&}&76<@r0CNQ>{rm_Hc5$_bgAbkCwzr^iv>Cv1x2Op0Xnb25FYasm1u;RD9^ihk zH6LIL0M6_yK?ivJwtS#e{#ON{>d@{S4-oo!|2A`UVgEOA@DC8)w%m5^vT}k#fjW-> zZ3ia+nrsjNRzpqJ1Oy-?X6E1dFN+1-))kdFKu&NJ9<;JUIWFLDP9RI53<7Wo2y)cd z+rZHcMHZkgDXFnt9KlGd145x@%L2B51FpLZ3lNeh7>63VGiqBdXTV{QsLxSea5xbF zcmf4kSWsAmPf$VgJP{x#7@!!KwUavnQS#>k+Cx+kdZY2eL;?3KwzVT4)XhK$}AwTV>lGd+6gpf z5X>GG(7*KcJI6xF0pKizvpo_FlGjq%^(-_8{T1Z9ss_2E5&*DHfFoUD+i(j|BVamb z>usIz?LxC1gF3>T+(CcwU6hMDxgtSGxaYRwkcVprCE0f5>@HOfwGQeg)QjbSzJ8%) z&@b)$T^s|+4(x%1gP{O?1R;QujN*;lHN6GM9qt6!*wz({K!7+c;7EHL3&2!BQnWyo zQB+_8xEioLn(qQWgK!0d&K&MmJ7NQZJG%m4fcbC4-?Qo8YT(~c_`RZmBnpRX{I-t# zk`}NT$O!^?xU>D=N`o>S)6YfbLD7Dh+a@OkTl9HvLNZbc#s9z0A&tXP8-840ax6elH_4y(Fg4kLbzI4fx*CP+!)ABD44aI zH@9!y^S8Zl1pS)c{x$7Sy`ulzH?^~<%#KR@z7P{Usr$sXyJV-*uenXy9+4m?&uiE z_J7EH*YJSlc2u=Z4=8;`Xm3saUU~bHjVo+NqC@2Q+X&g+KiS(Tu>lIywwwH1Yy<0L z6vhHVWP2B8yI}3W7Z58NsQbOFhnw=+;aPwM$VEG<-Uqip2vXKslkZ>>H!#c|xD80CsMHE1OjKz^t;SqoJ3|30 zqDt|hdfMGp_(LlnAo;f_k;TRW03doPi1SBcx3h$tu9(Int%+k2$eAWKgG_Q6~MQ+fgefpB)- zPM3h@w{zC7I6|#kAiy3H|NkV}MS#u@Rlth@r~_aElpz6byWS40n@|VfZ~vlR;Pk)g9`iDDD2O*Q0Jgy|&j#)*z#w z9{&^n|CC}|oc;dyTZSll|H6U)Z^{IU*`CmPyNQ_9W+#pAllX51M$!5Ikd9Go4o}Iy z|K{-f|FYu>7-|Rg|0}f~P{n>8``@8XB%FUbJm8=}Zrs}{q|s74igf}51LGJ5#o2cC zJ5L{cKYA1c!-)_BLk0sE<7YWRIzfRgC_6qOe&CM)!WCF)AOuhm1nhvhS^>MN;QjCT zQOmupEr8My%uknw90vqRecV7>bukn7qvgLRcLfvI>L$L0^ zqHtPyEjOtyZ=;-w?mjN8v99TWuVU&1rSaB3SYGxsGb$9kiGRthp*Q)M*+0=(Ft9|P zE6Zsj-I*X5ukUTS2vtfqSKKH^ac#=F$&`#l$jORqGCA^MI*Gz?{}Ys_)_kh(D1CCq zaSvop`g+gYIlu7GIxFsvgVi@v6Ql-D$DvnNLvl{^RMFYsI|Nt)m{~o47AOpU0KSk-Y3_ zh)HM}!pjmAe=|!lnVyqvxHh5~zh!2srKw@%Jm=l11r94tch8*pr!+ok#>Nry?8~3+ zH-5~mvC{eAOsp)oH1y*36Oydt{0 z$Scl?dHAB9Ac>+6L!5zz$))L&1Z`xe`f2GYmppK)byj?`W)U8R>zx-Oq+JWJH}BY0 zi8#mGJzneU{Ir3&7i7l@I1Qp5{3#=`FfizVAUg$w-roip9LNjSz&5k9z^~u7gDY2G z&1vj3sqDb@t-6beAFQ?6lr;m1y^KqOu|wUwIG;Jlsm139F+X8V&>;O*?H)v;;*F)D z%@RTZXL#RwRU~zS3dY+nQ`{*8rKvdj!0?DGOS$j4+I;=z4?A6 zo2MLnvfxg|OrUG%MoNPIvuIxGKKQ+4@EQI|wP(Ca3q6wYQ{S5mNHMDxXj?R1s?I-C z-ZT*8;05Nk(@f)-(7`pBL*KcW6r~?)?~C?EW%K9BX~W zG_O>z`)%q;*xAr$osb|O;xLJkyK_QLjrzup%IYC_WqR3&el61LQ2|(|BOX$Go6Az> zzXwbCU>fsm&H1x{{#~JUX_7bC(LS-DB@+5NthAfmXDByHmWNNyL_d)WZqvNSXRTwA zo?)K+gRt%F_r$RQBr@5c@7Oozv%=WDmZiwPl+82~;SU1Jys}fC_jBr>ouw(kVIjZq zDk}6$9s8{KD{?vpc~R{bCN3Yd9a=+ZpRJy0X_;~M&EROL$hq5Eh2%7es_C#it`{}_ zs^}xj)vRTfD>W0u+IOb^#l9u@EOcxo2MOy?VM@(alm(H5rb#ND6$UKy`K327RJ z?G4T&2uy04Pa6K98a?4V@EA_4Qb_I~$R`qV-0@1fnQy_AG@{g*?tWyja=M;LdRU)0 z-<-`G$7TiWvDD51$@Q5cdtN$-bN9s@cny1OyEuY_a3w=Nt{MBB%<9=${}6iFFcF%# zAS+gzg1)?KpZys#sIsRilLAB1_2jwJ)me!a=FQ`#GVM~`>FQpM%x_+v-ZY*r(0&!c z9(v2iKR%u0#gAISXMtn0f;UGb>jD<(ZdvdLd`NN-PO_@ZhnbvQa-H*&%PLB_E8v1V>GQuaYhn1!F&g zfw2Hwihq#c%urw-Az%TA11nWCA$}D4Fo}_ItP>;+T9w6iefr&czmB?t!@q4xsp~fc` z;Wh^6zI{Jqtj&E}MfU+b^rP2Ce$^GZh?8zkw+7w}gzm^ z>y#oN6W0n7Dz2*jz1HGsXiD$cD_wlp)pRvWN%mfRuWhNnvU z!V7$g29IGf)$a-kv3L*Z@W{BbuDx;hIN9ieaoxH1BoCi6#s%jvuo~>R^XXV_>zi`e zSf}iBNQ@v!bBgE7GLNbgySo)m$8BO?DtRPITisN~<|Im6vv#4~?$Z@$!>g^iO|$3( zjHRmeLH9FziZCg|SGln82`mmMK?k@LF|hw!5pXbSYk%7zx3&Q9e$bg;qNl2^sy{k0 zI+53@SE#3@%IEv}MS4zpJ>jte%=)%hZP-iU;lAk`kQ?DKLdtoe@jQ`Wv+c7bGpGgJ(F_cAMtbha>Q(J^295Dc;w|OU0lorlfT`a0 z_2&-q7sja50W;L(ju~jL(c@_bg06 zeoQ4t?i~{rU=V45*SSAeAs{puwJW=ubhd3A5%U=44`d#9*5MC1geI86Ie1KDXYb~D zCAn4hvtJgL4T$nBD-cpm(6^^(d3~*Vej|gTi2t^4)}vYVkHtk-sCw-$-3<7qPEKLB z78aUd!^B=s^L8j|+*SI`_@qDkvC#=-jEJlGyqM<}bSDj@t!9UX`me zk5D{4E@y8aPt0X4_$c>r)C!&Hh(ZpglS`4Wr@3F#T)$xHjJ8|j49Aw0^G)^#4NF6O z4auKi^}%beD*#~eDIa2qn>W)Xmp6xw{{}O4*M_Id&*{J`Er@%GsXR*fU={3>+CD+0AvhtGO=+7H1rLmck((`5}O++ZkySnFJTLk7f5>ekgNY z{F(HbnlaUo+oF@YagMJQ$DToCpY9&yFmJ>LS0ob6Qxk%i+PN5kB=YKU{J!Ty?MUNrW0cA z@*XGd?%OIaBu8jMRc6_x;Rl8=k?FO!(eXMp7dBcI=KTk|huO(Eyv|k8K=Bs}sbj)81ZtZq(jFfr)D7tjkEy80EPvBWhkYL&yB|D}%((KzX({h1 zI3EiSN8$Ag%&_rSVuqQg?UY^f<}$CoU)L{vC&`f^a1;?orH%KAcq-H>wV?M}K}TX# zEG{8kXzBb%)-f-^^7RF#FVP};+zX2#MZIC zCw1aJsTLXE@kC|coUGY}^~m^aacJ>nriJ0>UaOHOKP8YLJ2SHUT}ynvx%qg;botNN z;OBp}j>sA*`Er8yzV}CKEmkXY)wFbC`IFqXbWx@fMYuWKk`B5{t04MY@O1c199uO5 z%qncMH}4Q9v9MD=mCs1RRow(ulc=IFc=!b{Z^(LT2@wrNe@n5HxN${L5<^<3Doeey z#q69PEaI`q7kKwD1aqd)%Dl?vGnB`~BxlQtVkSLZdpt%_o*7r6(Tu?KNMkdXjao#T zuFvD;$}(z3GUXoe%BnkuaxX9UPWsw-=2at~ZyAMfE5%q=Y z4tUv&$Ak!par0tn`b*Ic>&Hu4XI(C+#|m+`biwa+)9WxQp^cDic>S)?^J9G~Q5x=# z9{X&4frn+st4Y_^@My;}b5EvyNeDbs&6n(c&b`Rt!n#YLi7iLp;=Renw=SO-7L_`R zXeo?yAM;i{vAVR>-eZNGM61(oP?pPRqaF2z%}pC3Qn&Z}9Z%2H6C{dmZ|1 zYyBB+OW1@=a106FppBBy1^15c}TG1+s)SkdzhvJW~LtAAiXcu920fJfQ?2Z~2if zYRFoO{IagPy+{C=dsAmefn}hZ-R#UD0-Tu(9dER-r^U3uy)Yt$1v)l{5gwBzmxZN} zGC;V|^K9t*Rf}x;@1$Po;;Bx^MB@q8nwa_fjqwejl&JgQB1;*K8^g9GvI*4i$OZ;R z%$aHv)};Z888Z4xkr~aU$RKSb)5XtMjT_~MZb65i7$)gg_%G!TP&AXF!FLe|W zY$?a#yb(r$DL@q%vVX1%hz)ve8pA|GPpklJqxn|B#Q|_&11=5V`d{~PcFQ0Xb?9pb z0Ztx(rZnKVV+UU9wyTV(0@zY#bW}%^zgcB$LbXV52WRTEtLQ4LUubRJClNlzmNJ8k z%PO#`gn7wSDNT&YbAgQ`9=MzaD@%n#*zTGDeF0ONMxQu^~UvD3=A z=lZA`!51k+P%B6pxT>&K$3ib@ke7DWMMtZ?LFJh8ysl6}NDArE~{cg{8*f4=B(p3;vcpT=1IDSY&{)xdn&yIU1vmTw&^7*Y*N zn}lO!wG#qlq|TeZV9=k%*z^eR(}g8PGPp+;UFj0IfIH<@Jd-9=$a1p#!fl~w1?-e? zdPHGsOBV0up2tTW+fsg@NSiGOXuy733xDj3KeZsdZFpTyelEc2E&_`itfFv8Ladzn zv5w0%`dlZutt=nhj`xceexj$Ur*plq4U*6ue|wa&RCNYl)wxswN0C5_4b4zG&|@l< zS}{1OfOwohR4V2mof^coAX4m+=OOON%UGqG0@A?*=0aMQv^+Vj%bWP> z*&%9GRKtR#cPU|=oT*gm3=oCYW%WC6VK;P>L^?E|TYhQO6u~)PX3v4%C2b zaq)&WHxQ;F+B%%=mpGd)aq=r`v;vD$^oW(>`Et3HsD(TfkbxYq#l`;~G^flgfh8cS zfz-_cXuC)5iiL_HXt)q+pFn9feUztF3G*~7tfrrh55^Th9b1x{&Tw9y@Q6U^=BgJD zT}MD<{)DHqjKXpeW;lnaT27voS?fol!aF>E{NB^TpOEy@6opgt$^abO_0h#L@ zUvB6aEg3qGdgtramg2oOOt8+6S7>@emRUIc@h$kmX7t+1wb1J3?(Z6Ln?!q-V9UT) z$>}2`2m>_02Wa3w>`@_6=h{EBhDl@vajhJo;=1x$a17U=0i?W|EvuhL?-;h|hD@cK z96`cV+qZYp*OEmjeOgEclh%6Ovkm9pxeH2MTw{BCiZgIE@QkkCQT)ilCw?rVe2GaQ z?btgxiy_>}vSQyA?4AzxbslR!l`)F*edvP982*VF#<=IrScQcap>Q+Bi(JNB2S~m-ZW7)krSw(20q> zLcKg7Uxc2cyrdPXr`6sfprXgwq^u{%ud2?}}NPKZ0Y3_r{Q@=<(A}3qPpih(;^0+SK?Rdpl`tHiX;SKo5;x-7)ZCR?AXEkrb(sn4#m< z4;Peg4=KEnUBWpQX#6S%A@h{Ak?JnT^fK%Xvr?$5pj`~9nv8PqfW8XC^l3kT;^+GM z7ebUTRJ6#}(r&)_RH`IKyo49;X5E|k5LsVafO8+B5O*njJVwU3wRttO*RGUO(f1B( z_fiZTiKnMT?me=y%&PVp`i2LJH$ru(m#zJb&`?pti{6gu&M%WVU#?1;K6s?t{kEB~ zEK`sn9{laeOS6qk2}_^%BI=J%-ov;*9^7z7=Ez1TO`aQR{jB^wqIjLm4pYHapSLA6 z_>AP}W0cA!h+56d%HxOTIHG@PbCCOytUm4l6g zB3|R+%fTebH<5>ya{RaASl5>8zx6qvt^fKWaM7}k$X9u@_-9+K^k9#hXZ)y~&SEmp;g(oJvT=I86o_k7 znBvt;92L?G=#xE}@Mb+3^zM@GWkIQ8Bk_3J_l#0}cNv{b5K?w28Z5VXO6=Qwm1V5s z*Sl=s1EU^l6{DR-_GVWUonH4zSgxI{3XrT$$at{ERjux7mL)41PRmF$-|=m3vG6Wm zV8`J3&zdk2yP5^V-oaK{4GK6hQ#L=xbbUnrG2g54Td!F%i2W(M`u)>iB@YOck5a{& zgw8&_vEdgV(U5rl-Lf{;N`-b4)1@fS9^=L*jp5_|NDiK_L45Oc7p{7aTg=czPZbdk zCwg9t^AgT~OHd#8=2H~2U{km6;M>Hkg1|h+`>gcEvJ8GVE8h})l=fye+c4pO3hx@^ z^L|8GtrvD?$yIu0(HySy>Wt5~xM1*2d!*!(iBLcKpYC985c9X_T1U9C zFZybmS0t-yc}0dlEJ}WhH9Gsf$5(6##+mi?b`O^jv0I{kWkT%XZ)S6%YH1k@#^c{| zdQIM~ZuZNn!Hw*igKi2*EzXDQusJpnE0GahZkvJzrvvq?=@cKVKg<0zOQ#>!uNHUfUvv%z{!cIetuv-f!J zQW9ZKZ}H{FOqfLdB65|iDl=dlR$Mgi&c7gv6Qp|@0phI8u&d_lHnyKT;kW4Qp}J0c z2ir)sXfwtN5<_wuICHZ~xWm14IYwTMKUv}3?Gs(qaLw|-I;=U!U`m+!OuWisoWY0I zACH!9rq_oLYUAAwF^HtQ+$fJ9mby`G_BzW=+`m#GM$PM@V7aPL@x1YLoRktV$CKb6 zJ3SkW?MB8#x6{A&x3+&Q5+j+BT!7+?%Z6mzz4dBY9Tg$F6nsoSS10+D`dou^vRwlI zAZU<7uJJQ&q9hYWX&e?Ym8;R{;*nj1<(o+PzOd$kYZwaXx`E-rwlR za{ChKS`$i~d0ca&$k@2ck`K2!pW~Y7g<-$=&r6+>CC~45d1-&sEe$pH?sssy-0;eD>~SAs{zQVJHO{Zf-@FO)<(;3fUWiOO!G&|{8!PKY z3IVK|Ie3gl3-pcS*kvZjqr3|xo2=?QrLk^jRxe9IU`!oTUku$d^oR9rx*ASk=kjI` z#B5H#bPQk3q&o4CUYtVrOGitgF^0*r;3Wn%M0L^?g52)$;v_#|?bG?b%=L=yI?zv< zd8M_?`GY6o>bgVpYwtSty>&J{)}2r$(CY5eq!x|sJKje6yJnA}{kj2~5@i~xO zeVR2-CX%P#p9++Wi9LB;Ofz@_&+MVE zt0>4&2VdLz0UGD1|kVH0i z=?b<42#sRvxR)^4DaLAOysk*hF35evSiwrq#M2!_;=RHW@4{sEL-)~Bc<)t{ABOqf z1^PY{X+04JZWCeAKHjSdFQG^5;gV$&VG2tfZ(so^NU`x!=`LABZdqL-R)NpW*ckS> zFHtdaA!q21Bj0py8t#izaX|=80nFJq|P7 zRmLOD_o$yP(yDcYO$Ux`+{)Oe5Ybgb0Ic- zB?1S0WAg}@Jx%N!uilTj0Xp&r7ghq=ykyR{eohQA9X6}6(!E35*GPo5bfmbpuDjK& z;!@z&h!Kn-@ig;n>t>(Kbp8?U4MnSLTS+14?al|J1$?mrIJH4ZlO1C*o0C`X(Tq_p z9k;|V%W#HcHDx{&TE~>qSb3?G{pPbt%7s1MXAp52*e`#BDF7+{IkaP9#Yp#~l-e594nVJ*tJ+pbVE{4=?7fQ3DK z&uQ)Dv8e9I4`cpFD_sfmGdfXn(_G`VcyrC%2)&To;jivbOw}FTY-fz0AsH>Q}9}-IKW`LE?9z|L!e|?}CZ^ZjwgoZ*^A3^SfA)#59cYJ$7$N z=;GG%u4ng;Exs6k-C3|eeg4*mM*X9+6PIcAnMo2msH~m{sv)y{z3CXIZHEg!QL$8n za6D&t$-eSff_X|_HzZ3A*p-;nzW5dT<=$frrA{e?q)G7_PdOQ68*@fs|_E( zSnxwK*wXa%UzBvuHaYJSaQ9damXSQ+vmxAf3>5d z{fmg&;6~uF6btSk;+7K}foXkoo`H{@tJ-gOXCd7an7D>B?^ zh6v@xe>KjaD$bkg(9d1)KmMa^?#yRT3UjlyPsG<;?aPFk8o&0wTX^X#tCZ_AXrE~+ z&%sh5=ydygB(l4Q3%Y-gYZw0U9b0Z#;^{|}!x3kXA{p!-GTO1x|Wwd4rA2;bnhU(Qu zdXkUNHA&A0mKR01@=_%uKeQNRW|juMqp{4Sozpt5`dWHE`Syu!cxK*_nHm!X)5S_L zIefiF?EKVPMZ=c76yeQjdWhFjXWMZFdX}~#7@4X26R^sv%5$2xY3k*WK`dBhr9*ph zWns2A?b40uSLMjXD*lz0&#c2;%t@YkP0aq86t&pn5~ z`LVCnu$LG2+m+mKjA^91?Y1Rh#Td)awj^U!S>NwuCcv*LP=LqFn3mraDr2$~Tv_Mu zx&FX35}*Eb^&CCJRkXe`CtPlWZ)2Av9D!5$_gpB%`48<>yNO5uJt1x9|4*qvRuTuY zQd#GgV0oaIFaO7yn&Z>K6P)f7@8%!34Z%fgI7_>4NESIZK@m(_KWtdv#?XstyqkxK zrx&>h(?ks4>U=2c|0*o$#O%R4FEp2ugHr=oLs8)F%Xzs?9)b@3Snq`#-YRCA5j(&LVW zG*cuZMpk)!O*@v`j!ojOd$m{!wRIcE`Q;DJ{kSo)%1Z@_H~;i9{Cw)m1*T(HJ>xa+ ziZd5Ixm(`Ie~-CK-=1WP?;iQGFC_JN5D9svN%j)_6@J=zN7LhRLdu)b$C@bK`dW#a zSv!v#N;Bjxww5~5Oo}ahFr;7s9(9dAiudi#jIUFeMXvs7k5978S4yxl4g6S2EKZRp zg6X72&LPI*H4MNG0mbugyY3f{vi5ab=!5XB-)^buwYdugNl;)`7k3BJ#%8pZL_cm< zqD*}KG=4;ulE6T{X%Lbx#zF;>=GVsjFz}#8^H@z4qDMbS5U$H23b^Lb|QdhEvkM zD6S8Ba04uTLbx89;x@xBsedHkBJ-qqf+-={ana72EEc<9yi}59OSK!_F0UF>r|8&5GFf_5p;@5o_~f4TCk_ZSY6Xs+dS2 zO|^z>g)pzk1bqCVK4xY-!nr4EZ?iX=Vj5}c#x3LXoXY*I)6)ZMl@}F}|13$XfBSjZ|2U-g#MV}J5iafNMCw;EsdfD`LpbgE< z`hngXN_46Z!8QWO)hjDcU2hGg4)@9$*qP#lx6CAV#`|qah@VRNXl@vI|HnIPdiBl4 zBz>PiTBb|p$MM;d-?MRsbW6XCWG&{CyM1at@=>Mh`N1pI!XA@Ym6sS899nxXvyhed zm}L~z$zT8QEE`hBxxO;i&@cmL>=J%ZeBAiD03$yevlrpLPiE{nDGIko0&p?NNh7jq z@^YGKuF^1nHl@%=mdBBoBf_5S%fDZ8#Kb#+Lue^M-^i0ex;lu7zfgCyBk814nnP+B zt?%8Hn=#sDM!k8x3b^wI9<*0JIiDSSA~fxt%eFX>YSe93T;^@@j#!WiWP`W09n96~nT>j(B3mFEnlw4Bwfr+K+E)s~z)*&I*Y^c zy2qjf%bxnZv8nKgYjrT7?`N9p%$#>9W{u8aFh4BOuJhUP@6W}(;46Rxc{Hut1B9cJfT{isGq-+4X7uKN!OGQ9HDaNRjMJvs8C&ar&U^uIA` z&laC4INaV3wKI8^kJL``kXl~%9P>P9SaI5jhsvFyUHrQ3)yZ6e$8t$^^X&KP);r#; z6JF1GfS(X-68nNfn@ODT)O*&8&XX(Y4`RO8N|_V#XhbW=%2<)#!jZ6eF?{Mp)hri2 ziSl$n%Y5Kv7bal_(%TKy4)!j|2BjtSp9}|T<;gK+XBR)5XL;E}6z zD}FMR@!NT^_mtT=63o?>F-l#TZ_`Q^PpqB#!RGnudV+AW8-Ar`Z1eQ-sv4TE*3B}A zPMhuB)n#0Um!XDr$6cf~)=i0=*_yKro`jXFjoxf+v{U;eOwsfPQsUF#R1vEmzgZY$ zctpE9z0a{kLOTqzS@rVC)C9^F?O~jP0Vp{!{rCBb6oyW7`UU~Il?S$~GdCAskmWv( zfi}QzIAj5jqFS8KG_S$?rnLKUH2R->dH)ElsZ%C ze`(QB=Y*7<)k9U#*#^UwSVfx~#fk z@&!)829BMGv>v2*+v@3Gi)@b#EP)W;(P=v+S|`xbZlcjP(eB?IE!J>VzI46!^NZZ} z_w2MM!s*iQsfSP%ico<*)_$sl|IiN<4QZ`7eK&rQ`1<#Zo|?~8(0k?YXKnDb&h#YT znHjj2xN=-!ak&he=v`#1e0x?wzMsf*X~zp(?OpD(tKO-EeL7^6+6Cjp5#AT&Rr=l5 z$6F3uJT_hdscu$Koi{|IXsD$63%h9(+_| zj~@m`81o-<{L6zI=sXWT6@kt(p5;IAM0-X8o#Vj=9nd+7u>Lv6zdU`g_t*nE`-6`= zptFDYf5(3R!w={p9DGgzT?99vjqrbU-d)f7%To*JJP)?(qw^eN|6`u$9o6Uz4>ql% zGt}h#V}|=$*wHy2>=j4nr~<4}|4^+5I>*tu9&EWq=jy@x$6V1GaMAf4Y->g57XX|| z{oyQkn_SU(9oF2s$8Gk{c^%&Biq86A7waBl42-M%e>}~<^t7V$JlMjD&U5L{c^+(N zMQ3}kc@>?lGqBJ5hZgjKCj9 NB4FDF^%OJ4{{tdaZ=nDH literal 0 HcmV?d00001 diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.1.tm index 88fdc3fd..96fb263d 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.1.tm @@ -4041,6 +4041,9 @@ tcl::namespace::eval textblock { (default 0 if no existing -table supplied)" -table -default "" -type string -help "existing table object to use" -headers -default "" -help "list of header values. Must match number of columns" + -show_header -default "" -help "Whether to show a header row. + Leave as empty string for unspecified/automatic, + in which case it will display only if -headers list was supplied." -action -default "append" -choices {append replace} -help "row insertion method if existing -table is supplied if append is chosen the new values will always start at the first column" -columns -default "" -type integer -help "Number of table columns @@ -4101,11 +4104,34 @@ tcl::namespace::eval textblock { } } else { set is_new_table 1 + set headers {} + if {[tcl::dict::get $opts -headers] ne ""} { + set headers [dict get $opts -headers] + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 1 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } else { + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 0 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } + if {[tcl::string::is integer -strict $opt_columns]} { set cols $opt_columns + if {[llength $headers] && $cols != [llength $headers]} { + error "list_as_table number of columns ($cols) doesn't match supplied number of headers ([llength $headers])" + } } else { #review - set cols 2 ;#seems a reasonable default + if {[llength $headers]} { + set cols [llength $headers] + } else { + set cols 2 ;#seems a reasonable default + } } #defaults for new table only if {[tcl::dict::get $opts -frametype] eq ""} { @@ -4123,15 +4149,6 @@ tcl::namespace::eval textblock { if {[tcl::dict::get $opts -show_hseps] eq ""} { tcl::dict::set opts -show_hseps 0 } - set headers {} - set show_header 0 - if {[tcl::dict::get $opts -headers] ne ""} { - set headers [dict get $opts -headers] - if {[llength $headers] ne $cols} { - error "list_as_table number of headers ([llength $headers]) must match number of columns ($cols)" - } - set show_header 1 - } set t [textblock::class::table new\ -show_header $show_header\ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm new file mode 100644 index 00000000..d85d4416 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.1.tm @@ -0,0 +1,3357 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.1] +#[copyright "2024"] +#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEYVAL = bare key and value + #QKEYVAL = quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for get_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "Failed to find value element in KEYVAL. '$keyval_element'" + } + if {$found_value > 1} { + error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITSTRING { + #REVIEW + set result [list type $type value $value] + } + TABLE - ITABLE - ARRAY - MULTISTRING { + #jmn2024 - added ITABLE - review + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } + default { + error "Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + #get_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # get_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + proc get_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEYVAL - QKEYVAL { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set key_hierarchy [list] + set key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [::string index $rawseg 0] + set c2 [::string index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [::string range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] + #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend key_hierarchy $seg + lappend key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a keyval/qkeyval + + set testkey [join $key_hierarchy_raw .] + set testkey_length [llength $key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #dots within table segments might seem like an 'edge case' + # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. + + #VVV the test below is wrong VVV! + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset + error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + } + } + + } + + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + TABLE - ARRAY - MULTISTRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] + } + WS - SEP { + #ignore whitespace and commas + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::get_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [string tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![string is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [list BOOL true] + } else { + return [list BOOL false] + } + } + } + + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 3} { + if {[lindex $t 0] ne "KEYVAL"} { + error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + } + lappend pairs $t + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEYVAL $n [list STRING $v]] + } else { + error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + } + } + return [list TABLE $name $pairs] + } + + + #the tomlish root is basically a nameless table representing the root of the document + proc root {args} { + set table [::tomlish::encode::table TOMLISH {*}$args] + set result [lindex $table 2] ;#Take only the key-value pair list + } + + #WS = whitepace, US = underscore + proc tomlish {list {context ""}} { + if {![tcl::string::is list $list]} { + error "Supplied 'tomlish' is not a valid Tcl list. Expected a tagged list (parsed Toml)" + } + set toml "" ;#result string + + foreach item $list { + set tag [lindex $item 0] + #puts "tomlish::encode::tomlish processing item '$item', tag '$tag'" + #during recursion, some tags require different error checking in different contexts. + set nextcontext $tag ; + + + #Handle invalid tag nestings + switch -- $context { + QKEYVAL - + KEYVAL { + if {$tag in {KEYVAL QKEYVAL}} { + error "Invalid tag '$tag' encountered within '$context'" + } + } + MULTISTRING { + #explicitly list the valid child tags + if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { + error "Invalid tag '$tag' encountered within a MULTISTRING" + } + } + default { + #no context, or no defined nesting error for this context + } + } + + switch -- $tag { + TOMLISH { + #optional root tag. Ignore. + } + QKEYVAL - + KEYVAL { + if {$tag eq "KEYVAL"} { + append toml [lindex $item 1] ;#Key + } else { + append toml \"[lindex $item 1]\" ;#Quoted Key + } + foreach part [lrange $item 2 end] { + if {$part eq "="} { + append toml "=" + } else { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + } + } + TABLE { + append toml "\[[lindex $item 1]\]" ;#table name + foreach part [lrange $item 2 end] { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + + } + ITABLE { + #inline table - e.g within array or on RHS of keyval/qkeyval + set data "" + foreach part [lrange $item 1 end] { + append data [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\{$data\}" + } + ARRAY { + + set arraystr "" + foreach part [lrange $item 1 end] { + append arraystr [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\[$arraystr\]" + } + WS { + append toml [lindex $item 1] + } + SEP { + append toml "," + } + NEWLINE { + set chartype [lindex $item 1] + if {$chartype eq "lf"} { + append toml \n + } elseif {$chartype eq "crlf"} { + append toml \r\n + } else { + error "Unrecognized newline type '$chartype'" + } + } + CONT { + #line continuation character "\" + append toml "\\" + } + STRING { + #simple double quoted strings only + # + return \"[lindex $item 1]\" + } + STRINGPART { + return [lindex $item 1] + } + MULTISTRING { + #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts + set multistring "" ;#variable to build up the string + foreach part [lrange $item 1 end] { + append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\"\"\"$multistring\"\"\"" + } + LITSTRING { + #Single Quoted string(literal string) + append toml '[lindex $item 1]' + } + MULTILITSTRING { + #review - multilitstring can be handled as a single string? + set litstring "" + foreach part [lrange $item 1 end] { + append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$litstring''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + #Note that we deliberately don't check certain things such as duplicate table declarations here. + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { + #*** !doctools + #[call [fun toml] [arg s]] + #[para] return a Tcl list of tomlish tokens + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "key-space" + ::tomlish::parse::spacestack push {space key-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + ##### + set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] + ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + + set state $nextstate + if {$state eq "err"} { + error "State error - aborting parse. [tomlish::parse::report_line]" + } + + if {$last_space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + switch -exact -- $tokenType { + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + puts stderr "endinlinetable" + } + endmultiquote { + puts stderr "endmultiquote for last_space_action 'pop'" + } + default { + error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + + } elseif {$last_space_action eq "push"} { + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + switch -exact -- $tokenType { + barekey { + set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + } + quotedkey - itablequotedkey { + set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + puts stderr "push trigger tokenType startmultiquote (todo)" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + #JMN ??? + #set next_tokenType_known 1 + #::tomlish::parse::set_tokenType "multistring" + #set tok "" + } + default { + error "push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "did not expect 'tablename/tablearrayname' without space level change" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "decode::toml error. did not expect startlinetable without space level change" + } + startquote { + switch -exact -- $nextstate { + string { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itablequotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "startquote switch case not implemented for nextstate: $nextstate" + } + } + } + startmultiquote { + #review + puts stderr "no space level change - got startmultiquote" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped-value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + } + lappend v($nest) [list $tag $tok] + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + + + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg [list " " \t]] + } + return [join $trimmed_segments .] + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set i 0 + set sLen [::string length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {} {$i < $sLen} {} { + + if {$i > 0} { + set lastChar [::string index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $tablename $i] + incr i + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[::string trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [::string trim $seg] + if {[::string index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [::string trim $seg [list " " \t]] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename'" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[::string match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[::string match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [::string length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [::string index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[::string length $buffer4] < 4} { + append buffer4 $c + } + if {[::string length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[::string length $buffer8] < 8} { + append buffer8 $c + } + if {[::string length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [string map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [::string index $rawkey 0] + set c2 [::string index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [::string range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[::string length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[::string length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[::string last - $str] > 0} { + return 0 + } + if {[::string last + $str] > 0} { + return 0 + } + set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![string is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[::string length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[::string length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [::string map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![::string is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_datetime {str} { + #e.g 1979-05-27T00:32:00-07:00 + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[::string length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + if {[catch {clock scan $datepart} err]} { + puts stderr "tcl clock scan failed err:'$err'" + return 0 + } + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # key-space, curly-space, array-space + # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # + # notes: + # key-space i + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push command and the name of the space to push on the stack. + # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + + #test + variable stateMatrix + set stateMatrix [dict create] + + dict set stateMatrix\ + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + + + dict set stateMatrix\ + curly-space {\ + whitespace "curly-space"\ + newline "curly-space"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + newline "err"\ + eof "err"\ + untyped-value "samespace"\ + startquote "string"\ + startmultiquote {pushspace "multistring-space"}\ + startinlinetable {pushspace curly-space}\ + comment "err"\ + comma "err"\ + startarray {pushspace array-space}\ + } + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + eof "err"\ + untyped-value "samespace"\ + startarray {pushspace "array-space"}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "array-space"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped-value "samespace"\ + startarray {pushspace array-space}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "err"\ + } + + + dict set stateMatrix\ + itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + #dict set stateMatrix\ + # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablekeyval-space {} + dict set stateMatrix\ + itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + + + dict set stateMatrix\ + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + dict set stateMatrix\ + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + dict set stateMatrix\ + keyval-space {} + + + + dict set stateMatrix\ + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + dict set stateMatrix\ + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + dict set stateMatrix\ + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + dict set stateMatrix\ + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + dict set stateMatrix\ + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + dict set stateMatrix\ + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + dict set stateMatrix\ + baretablename {whitespace "NA" newline "err" equal "value-expected"} + dict set stateMatrix\ + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + dict set stateMatrix\ + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + dict set stateMatrix\ + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + dict set stateMatrix\ + end {} + + #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push + variable stateMatrix_orig { + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} + value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} + array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} + array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + keyval-space {} + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + end {} + } + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set action [lindex $transition_to 0] + switch -exact -- $action { + pushspace - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + puts stdout "push_trigger_tokens: $push_trigger_tokens" + #!todo - hard code once stateMatrix finalised? + + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + variable spacePopTransitions { + array-space array-syntax + curly-space curly-syntax + keyval-space keytail + itablekeyval-space itablevaltail + } + variable spacePushTransitions { + keyval-space keyval-syntax + itablekeyval-space itablekeyval-syntax + array-space array-space + curly-space curly-space + key-space tablename + } + + + variable state_list + + namespace export tomlish toml + namespace ensemble create + + proc getNextState {tokentype currentstate} { + variable nest + variable v + + variable spacePopTransitions + variable spacePushTransitions + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + popspace { + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + samespace { + #note the same data as popspace (spacePopTransitions) is used here. + set parent [spacestack peek] + ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" + lassign $parent type target + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (key-space) + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list pushspace [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" + set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + } + pushspace { + set target [lindex $transition_to 1] + spacestack push [list space $target] + set last_space_action "push" + set last_space_type "space" + + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $target] + ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + default { + set result $transition_to + } + } + } else { + set result "nostate-err" + + } + lappend state_list $result + return $result + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [::string length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + #return a list of 0 1 or 2 tokens + #tomlish::parse::tok + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + set resultlist [list] + + variable tokenType + variable tokenType_list + + + variable endToken + set sLen [::string length $s] + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + variable token_waiting + if {[dict size $token_waiting]} { + set tokenType [dict get $token_waiting type] + set tok [dict get $token_waiting tok] + dict unset token_waiting type + dict unset token_waiting tok + return 1 + } + #------------------------------ + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [string index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [string index $s $i] + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do'returns'inside the loop + + set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #dict set token_waiting type comment + #dict set token_waiting tok "" + incr i -1 ;#leave comment for next run + return 1 + } + untyped-value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped-value. + incr i -1 + return 1 + } + default { + #quotedkey, string, multistring + append tok $c + } + } + } else { + #$slash_active not relevant when no tokenType + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + lc { + set multi_dquote "" ;#!! + #test jmn2024 + #left curly brace + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + error "unexpected tablename problem" + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + if {$slash_active} { + set tok "\\\{" + } else { + set tok "\{" + } + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + rc { + set multi_dquote "" ;#!! + #right curly brace + try { + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endinlinetable + dict set token_waiting tok "" + return 1 + } + tablearrayname { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + itablevaltail { + + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + curly-syntax - curly-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itablevaltail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + default { + #JMN2024b keytail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + lb { + set multi_dquote "" ;#!! + #left square bracket + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + key-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + rb { + set multi_dquote "" ;#!! + #right square bracket + try { + + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } + tablearraynames { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - litstring - multilitstring - comment - tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + barekey { + error "Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + if {$state eq "multistring-space"} { + set slash_active 1 + } else { + error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + dq { + #double quote + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length in 'startquotesequence'" + } + } + endquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "endmultiquote" + return 1 + } else { + error "unexpected token length in 'endquotesequence'" + } + } + string { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + stringpart { + #sub element of multistring + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + value-expected { + if {$multi_dquote eq "\"\""} { + dict set token_waiting type startmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + #end whitespace token and reprocess + incr i -1 + return 1 + #append multi_dquote "\"" + } + } + default { + dict set token_waiting type startquote + dict set token_waiting tok "\"" + return 1 + } + } + } + comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + tablename - tablearrayname { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + key-space { + set tokenType startquote + set tok $c + return 1 + } + curly-space { + set tokenType startquote + set tok $c + return 1 + } + tablename - tablearrayname { + set_tokenType $state + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey { + #for these tokenTypes an = is just data. + append tok $c + } + stringpart { + append tok $dquotes$c + } + whitespace { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + barekey { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + default { + error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok ${dquotes}= + } + default { + set_tokenType equal + set tok = + return 1 + } + } + } + } + cr { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + stringpart { + append tok $dquotes$c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType newline + set tok cr + } + } + lf { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \n newline + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + newline { + #this lf is the trailing part of a crlf + append tok lf + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + } else { + set had_slash $slash_active + set slash_active 0 + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + set_tokenType newline + set tok lf + return 1 + } + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey - tablename - tablearrayname { + append tok $c + } + stringpart { + append tok $dquotes$c + } + default { + dict set token_waiting type comma + dict set token_waiting tok "," + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "," + } + multiliteral-space { + set_tokenType literalpart + set tok "," + } + default { + set_tokenType comma + set tok "," + return 1 + } + } + } + } + . { + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment - quotedkey - untyped-value { + append tok $c + } + baretablename - tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #we need to transition the barekey to become a structured table name ??? review + switch_tokenType tablename + incr i -1 + + #error "barekey period unimplemented" + } + default { + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #dict set token_waiting type period + #dict set token_waiting tok "." + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "." + } + multiliteral-space { + set_tokenType literalpart + set tok "." + } + default { + set_tokenType untyped-value + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[::string length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + untyped-value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $c + } + quotedkey - string { + if {$had_slash} { + append tok "\\" + } + #if {$dquotes eq "\""} { + #} + append tok $c + } + whitespace { + append tok $c + } + stringpart { + if {$had_slash} { + #REVIEW + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + #keeping WS separate allows easier processing of CONT stripping + append tok $dquotes + incr i -1 + return 1 + } + } + starttablename { + incr i -1 + return 1 + } + starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + if {$had_slash} { + set tok "\\$c" + } else { + set tok $c + } + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return + } + set_tokenType "whitespace" + append tok $c + } + } + default { + if {$had_slash} { + error "unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set token_waiting type whitespace + #set token_waiting tok $c + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + quotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + append tok $dquotes$c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + endquotesequence { + puts stderr "endquotesequence: $tok" + } + whitespace { + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + key-space - curly-space - curly-syntax { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + tablename { + set_tokenType "tablename" + set tok $c + } + tablearrayname { + set_tokenType "tablearrayname" + set tok $c + } + default { + set_tokenType "untyped-value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[::string length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + if {$tokenType eq "startquotesequence"} { + set toklen [::string length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + eror "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #dict set token_waiting type "string" + #dict set token_waiting tok "" + return 1 + } + } + dict set token_waiting type "eof" + dict set token_waiting tok "eof" + return 1 + } else { + ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[::string tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/uuid-1.0.8.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/uuid-1.0.8.tm new file mode 100644 index 00000000..c5cffa67 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/uuid-1.0.8.tm @@ -0,0 +1,246 @@ +# uuid.tcl - Copyright (C) 2004 Pat Thoyts +# +# UUIDs are 128 bit values that attempt to be unique in time and space. +# +# Reference: +# http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt +# +# uuid: scheme: +# http://www.globecom.net/ietf/draft/draft-kindel-uuid-uri-00.html +# +# Usage: uuid::uuid generate +# uuid::uuid equal $idA $idB + +package require Tcl 8.5 9 + +namespace eval uuid { + variable accel + array set accel {critcl 0} + + namespace export uuid + + variable uid + if {![info exists uid]} { + set uid 1 + } + + proc K {a b} {set a} +} + +### +# Optimization +# Caches machine info after the first pass +### + +proc ::uuid::generate_tcl_machinfo {} { + variable machinfo + if {[info exists machinfo]} { + return $machinfo + } + lappend machinfo [clock seconds]; # timestamp + lappend machinfo [clock clicks]; # system incrementing counter + lappend machinfo [info hostname]; # spatial unique id (poor) + lappend machinfo [pid]; # additional entropy + lappend machinfo [array get ::tcl_platform] + + ### + # If we have /dev/urandom just stream 128 bits from that + ### + if {[file exists /dev/urandom]} { + set fin [open /dev/urandom r] + fconfigure $fin -encoding binary + binary scan [read $fin 128] H* machinfo + close $fin + } elseif {[catch {package require nettool}]} { + # More spatial information -- better than hostname. + # bug 1150714: opening a server socket may raise a warning messagebox + # with WinXP firewall, using ipconfig will return all IP addresses + # including ipv6 ones if available. ipconfig is OK on win98+ + if {[string equal $::tcl_platform(platform) "windows"]} { + catch {exec ipconfig} config + lappend machinfo $config + } else { + catch { + set s [socket -server void -myaddr [info hostname] 0] + K [fconfigure $s -sockname] [close $s] + } r + lappend machinfo $r + } + + if {[package provide Tk] != {}} { + lappend machinfo [winfo pointerxy .] + lappend machinfo [winfo id .] + } + } else { + ### + # If the nettool package works on this platform + # use the stream of hardware ids from it + ### + lappend machinfo {*}[::nettool::hwid_list] + } + return $machinfo +} + +# Generates a binary UUID as per the draft spec. We generate a pseudo-random +# type uuid (type 4). See section 3.4 +# +proc ::uuid::generate_tcl {} { + package require md5 2 + variable uid + + set tok [md5::MD5Init] + md5::MD5Update $tok [incr uid]; # package incrementing counter + foreach string [generate_tcl_machinfo] { + md5::MD5Update $tok $string + } + set r [md5::MD5Final $tok] + binary scan $r c* r + + # 3.4: set uuid versioning fields + lset r 8 [expr {([lindex $r 8] & 0x3F) | 0x80}] + lset r 6 [expr {([lindex $r 6] & 0x0F) | 0x40}] + + return [binary format c* $r] +} + +if {[string equal $tcl_platform(platform) "windows"] + && [package provide critcl] != {}} { + namespace eval uuid { + critcl::ccode { + #define WIN32_LEAN_AND_MEAN + #define STRICT + #include + #include + typedef long (__stdcall *LPFNUUIDCREATE)(UUID *); + typedef const unsigned char cu_char; + } + critcl::cproc generate_c {Tcl_Interp* interp} ok { + HRESULT hr = S_OK; + int r = TCL_OK; + UUID uuid = {0}; + HMODULE hLib; + LPFNUUIDCREATE lpfnUuidCreate = NULL; + hLib = LoadLibraryA(("rpcrt4.dll")); + if (hLib) + lpfnUuidCreate = (LPFNUUIDCREATE) + GetProcAddress(hLib, "UuidCreate"); + if (lpfnUuidCreate) { + Tcl_Obj *obj; + lpfnUuidCreate(&uuid); + obj = Tcl_NewByteArrayObj((cu_char *)&uuid, sizeof(uuid)); + Tcl_SetObjResult(interp, obj); + } else { + Tcl_SetResult(interp, "error: failed to create a guid", + TCL_STATIC); + r = TCL_ERROR; + } + return r; + } + } +} + +# Convert a binary uuid into its string representation. +# +proc ::uuid::tostring {uuid} { + binary scan $uuid H* s + foreach {a b} {0 7 8 11 12 15 16 19 20 end} { + append r [string range $s $a $b] - + } + return [string tolower [string trimright $r -]] +} + +# Convert a string representation of a uuid into its binary format. +# +proc ::uuid::fromstring {uuid} { + return [binary format H* [string map {- {}} $uuid]] +} + +# Compare two uuids for equality. +# +proc ::uuid::equal {left right} { + set l [fromstring $left] + set r [fromstring $right] + return [string equal $l $r] +} + +# Call our generate uuid implementation +proc ::uuid::generate {} { + variable accel + if {$accel(critcl)} { + return [generate_c] + } else { + return [generate_tcl] + } +} + +# uuid generate -> string rep of a new uuid +# uuid equal uuid1 uuid2 +# +proc uuid::uuid {cmd args} { + switch -exact -- $cmd { + generate { + if {[llength $args] != 0} { + return -code error "wrong # args:\ + should be \"uuid generate\"" + } + return [tostring [generate]] + } + equal { + if {[llength $args] != 2} { + return -code error "wrong \# args:\ + should be \"uuid equal uuid1 uuid2\"" + } + return [eval [linsert $args 0 equal]] + } + default { + return -code error "bad option \"$cmd\":\ + must be generate or equal" + } + } +} + +# ------------------------------------------------------------------------- + +# LoadAccelerator -- +# +# This package can make use of a number of compiled extensions to +# accelerate the digest computation. This procedure manages the +# use of these extensions within the package. During normal usage +# this should not be called, but the test package manipulates the +# list of enabled accelerators. +# +proc ::uuid::LoadAccelerator {name} { + variable accel + set r 0 + switch -exact -- $name { + critcl { + if {![catch {package require tcllibc}]} { + set r [expr {[info commands ::uuid::generate_c] != {}}] + } + } + default { + return -code error "invalid accelerator package:\ + must be one of [join [array names accel] {, }]" + } + } + set accel($name) $r +} + +# ------------------------------------------------------------------------- + +# Try and load a compiled extension to help. +namespace eval ::uuid { + variable e {} + foreach e {critcl} { + if {[LoadAccelerator $e]} break + } + unset e +} + +package provide uuid 1.0.8 + +# ------------------------------------------------------------------------- +# Local variables: +# mode: tcl +# indent-tabs-mode: nil +# End: diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config index 75a091dc..fc436d8c 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config @@ -6,7 +6,6 @@ set bootsupport_modules [list\ src/vendormodules cksum\ src/vendormodules modpod\ - src/vendormodules natsort\ src/vendormodules overtype\ src/vendormodules oolib\ src/vendormodules http\ @@ -22,6 +21,8 @@ set bootsupport_modules [list\ src/vendormodules uuid\ src/vendormodules md5\ src/vendormodules sha1\ + src/vendormodules tomlish\ + src/vendormodules test::tomlish\ modules punkcheck\ modules natsort\ modules punk::ansi\ @@ -60,6 +61,7 @@ set bootsupport_modules [list\ modules punk::zip\ modules punk::winpath\ modules textblock\ + modules natsort\ modules oolib\ ] diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm index 38ce71c2..492341d6 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.5.tm @@ -233,7 +233,6 @@ tcl::namespace::eval overtype { -width \uFFEF\ -height \uFFEF\ -startcolumn 1\ - -wrap 0\ -ellipsis 0\ -ellipsistext $default_ellipsis_horizontal\ -ellipsiswhitespace 0\ @@ -243,11 +242,13 @@ tcl::namespace::eval overtype { -exposed1 \uFFFD\ -exposed2 \uFFFD\ -experimental 0\ - -cp437 1\ + -cp437 0\ -looplimit \uFFEF\ -crm_mode 0\ -reverse_mode 0\ -insert_mode 0\ + -wrap 0\ + -info 0\ -console {stdin stdout stderr}\ ] #expand_right is perhaps consistent with the idea of the page_size being allowed to grow horizontally.. @@ -263,14 +264,19 @@ tcl::namespace::eval overtype { #-ellipsis args not used if -wrap is true foreach {k v} $argsflags { switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace + -looplimit - -width - -height - -startcolumn - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -transparent - -exposed1 - -exposed2 - -experimental - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -console { + - -info - -console { tcl::dict::set opts $k $v } + -wrap - -autowrap_mode { + #temp alias -autowrap_mode for consistency with renderline + #todo - + tcl::dict::set opts -wrap $v + } default { error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" } @@ -280,10 +286,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- #review - expand_left for RTL text? set opt_expand_right [tcl::dict::get $opts -expand_right] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. set opt_width [tcl::dict::get $opts -width] set opt_height [tcl::dict::get $opts -height] @@ -298,50 +300,34 @@ tcl::namespace::eval overtype { set opt_crm_mode [tcl::dict::get $opts -crm_mode] set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] set opt_insert_mode [tcl::dict::get $opts -insert_mode] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_autowrap_mode [tcl::dict::get $opts -wrap] + #??? wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] + set opt_info [tcl::dict::get $opts -info] - #initial state for renderspace 'terminal' reset - set initial_state [dict create\ - -width $opt_width\ - -height $opt_height\ - -crm_mode $opt_crm_mode\ - -reverse_mode $opt_reverse_mode\ - -insert_mode $opt_insert_mode\ - -cp437 $opt_cp437\ - ] # ---------------------------- # -experimental dev flag to set flags etc # ---------------------------- set data_mode 0 - set info_mode 0 set edit_mode 0 set opt_experimental [tcl::dict::get $opts -experimental] foreach o $opt_experimental { switch -- $o { - old_mode { - set info_mode 1 - } data_mode { set data_mode 1 } - info_mode { - set info_mode 1 - } edit_mode { set edit_mode 1 } } } # ---------------------------- - - #modes - set insert_mode $opt_insert_mode ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode $opt_reverse_mode - set crm_mode $opt_crm_mode set underblock [tcl::string::map {\r\n \n} $underblock] @@ -367,6 +353,20 @@ tcl::namespace::eval overtype { set renderwidth $opt_width set renderheight $opt_height } + #initial state for renderspace 'terminal' reset + set initial_state [dict create\ + renderwidth $renderwidth\ + renderheight $renderheight\ + crm_mode $opt_crm_mode\ + reverse_mode $opt_reverse_mode\ + insert_mode $opt_insert_mode\ + autowrap_mode $opt_autowrap_mode\ + cp437 $opt_cp437\ + ] + #modes + #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l + #opt_startcolumn ?? - DECSLRM ? + set vtstate $initial_state # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -494,50 +494,55 @@ tcl::namespace::eval overtype { } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderargs [list -experimental $opt_experimental\ - -cp437 $opt_cp437\ - -info 1\ - -crm_mode $crm_mode\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ - -reverse_mode $reverse_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -transparent $opt_transparent\ - -width $renderwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -expand_right $opt_expand_right\ - -cursor_column $col\ - -cursor_row $row\ + set renderargs [list -experimental $opt_experimental\ + -cp437 $opt_cp437\ + -info 1\ + -crm_mode [tcl::dict::get $vtstate crm_mode]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ + -reverse_mode [tcl::dict::get $vtstate reverse_mode]\ + -cursor_restore_attributes $cursor_saved_attributes\ + -transparent $opt_transparent\ + -width [tcl::dict::get $vtstate renderwidth]\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -expand_right $opt_expand_right\ + -cursor_column $col\ + -cursor_row $row\ $undertext\ $overtext\ ] set LASTCALL $renderargs set rinfo [renderline {*}$renderargs] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - set reverse_mode [tcl::dict::get $rinfo reverse_mode] + set instruction [tcl::dict::get $rinfo instruction] + tcl::dict::set vtstate crm_mode [tcl::dict::get $rinfo crm_mode] + tcl::dict::set vtstate insert_mode [tcl::dict::get $rinfo insert_mode] + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + tcl::dict::set vtstate reverse_mode [tcl::dict::get $rinfo reverse_mode] #how to support reverse_mode in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? # - review - the answer is probably that we don't need to - it is set/reset only during application of overtext - set crm_mode [tcl::dict::get $rinfo crm_mode] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + + #Note carefully the difference betw overflow_right and unapplied. + #overflow_right may need to be included in next run before the unapplied data + #overflow_right most commonly has data when in insert_mode + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - if {0 && $reverse_mode} { + if {0 && [tcl::dict::get $vtstate reverse_mode]} { #test branch - todo - prune puts stderr "---->[ansistring VIEW $replay_codes_overlay] rendered: $rendered" #review @@ -593,19 +598,29 @@ tcl::namespace::eval overtype { #todo - handle potential insertion mode as above for cursor restore? #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { + set instruction_type [lindex $instruction 0] ;#some instructions have params + tcl::dict::incr instruction_stats $instruction_type + switch -- $instruction_type { reset { #reset the 'renderspace terminal' (not underlying terminal) set row 1 set col 1 - - + set vtstate [tcl::dict::merge $vtstate $initial_state] + #todo - clear screen } {} { + #end of supplied line input #lf included in data set row $post_render_row set col $post_render_col + if {![llength $unapplied_list]} { + if {$overflow_right ne ""} { + incr row + } + } else { + puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" + } + set col $opt_startcolumn } up { @@ -708,17 +723,18 @@ tcl::namespace::eval overtype { puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - set sub_info [overtype::renderline -info 1\ - -width $renderwidth\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ + set sub_info [overtype::renderline\ + -info 1\ + -width [tcl::dict::get $vtstate renderwidth]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ -expand_right [tcl::dict::get $opts -opt_expand_right]\ ""\ $overflow_right\ ] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + set foldline [tcl::dict::get $sub_info result] + tcl::dict::set vtstate insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -745,6 +761,53 @@ tcl::namespace::eval overtype { set col $post_render_col #overflow + unapplied? } + clear_and_move { + #e.g 2J + if {$post_render_row > [llength $outputlines]} { + set row [llength $outputlines] + } else { + set row $post_render_row + } + set col $post_render_col + set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant + set clearedlines [list] + foreach ln $outputlines { + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $renderwidth]\x1b\[0m + if 0 { + + set lineparts [punk::ansi::ta::split_codes $ln] + set numcells 0 + foreach {pt _code} $lineparts { + if {$pt ne ""} { + foreach grapheme [punk::char::grapheme_split $pt] { + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + incr numcells 1 + } + default { + if {$grapheme eq "\u0000"} { + incr numcells 1 + } else { + incr numcells [grapheme_width_cached $grapheme] + } + } + } + + } + } + } + #replays/resets each line + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $numcells]\x1b\[0m + } + } + set outputlines $clearedlines + #todo - determine background/default to be in effect - DECECM ? + puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" + #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] + + } lf_start { #raw newlines # ---------------------- @@ -780,27 +843,48 @@ tcl::namespace::eval overtype { append rendered $overflow_right set overflow_right "" } else { + #review - we should really make renderline do the work...? set overflow_width [punk::ansi::printing_length $overflow_right] if {$visualwidth + $overflow_width <= $renderwidth} { append rendered $overflow_right set overflow_right "" } else { - if {$visualwidth < $renderwidth} { - set graphemes [punk::char::grapheme_split $overflow_width] - set add "" - set addlen $visualwidth - set remaining_overflow $graphemes - foreach g $graphemes { - set w [overtype::grapheme_width_cached] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - lpop remaining_overflow - } else { - break - } + if {[tcl::dict::get $vtstate autowrap_mode]} { + set outputlines [linsert $outputlines $renderedrow $overflow_right] + set overflow_right "" + set row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } + + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break + } + } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code + } } - append rendered $add set overflow_right [join $remaining_overflow ""] } } @@ -829,14 +913,16 @@ tcl::namespace::eval overtype { #we may also have other control sequences that came after col 80 e.g cursor save if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" - #assertion - there should be no overflow.. - puts $lhs + #assertion - there should be no overflow.. + puts $lhs + } + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right set row $post_render_row #set row $renderedrow @@ -981,7 +1067,7 @@ tcl::namespace::eval overtype { #normal single-width grapheme overflow #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { incr row set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { @@ -1014,7 +1100,7 @@ tcl::namespace::eval overtype { #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { if {$renderwidth < 2} { #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character set idx 0 @@ -1055,6 +1141,14 @@ tcl::namespace::eval overtype { set row $post_render_row set col $post_render_col } + set_window_title { + set newtitle [lindex $instruction 1] + puts stderr "overtype::renderspace set_window_title [ansistring VIEW $newtitle] instruction '$instruction'" + # + } + reset_colour_palette { + puts stderr "overtype::renderspace instruction '$instruction' unimplemented" + } default { puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" } @@ -1062,7 +1156,7 @@ tcl::namespace::eval overtype { } - if {!$opt_expand_right && !$autowrap_mode} { + if {!$opt_expand_right && ![tcl::dict::get $vtstate autowrap_mode]} { #not allowed to overflow column or wrap therefore we get overflow data to truncate if {[tcl::dict::get $opts -ellipsis]} { set show_ellipsis 1 @@ -1160,11 +1254,22 @@ tcl::namespace::eval overtype { } set result [join $outputlines \n] - if {$info_mode} { + if {!$opt_info} { + return $result + } else { #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? #append result \n$instruction_stats\n + set inforesult [dict create\ + result $result\ + last_instruction $instruction\ + instruction_stats $instruction_stats\ + ] + if {$opt_info == 2} { + return [pdict -channel none inforesult] + } else { + return $inforesult + } } - return $result } #todo - left-right ellipsis ? @@ -2368,14 +2473,22 @@ tcl::namespace::eval overtype { } else { #linefeed occurred in middle or at end of text #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - if {$idx == -1 || $overflow_idx > $idx} { - #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + if {$insert_mode == 0} { + incr cursor_row + if {$idx == -1 || $overflow_idx > $idx} { + #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + } + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + incr cursor_row + #don't adjust the overflow_idx + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction lf_mid + break ;# could have overdata following the \n - don't keep processing } - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break } } @@ -3077,8 +3190,11 @@ tcl::namespace::eval overtype { set idx [expr {$cursor_column -1}] set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] incr idx_over + if {[llength $outcols]} { + priv::render_erasechar 0 [llength $outcols] + } priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move + set instruction clear_and_move break } 3 { @@ -3749,6 +3865,72 @@ tcl::namespace::eval overtype { } 7OSC - 8OSC { + # OSCs are terminated with ST of either \007 or \x1b\\ - we allow either whether code was 7 or 8 bit + if {[tcl::string::index $codenorm end] eq "\007"} { + set code_content [tcl::string::range $codenorm 4 end-1] ;#ST is \007 + } else { + set code_content [tcl::string::range $codenorm 4 end-2] ;#ST is \x1b\\ + } + set first_colon [tcl::string::first {;} $code_content] + if {$first_colon == -1} { + #there probably should always be a colon - but we'll try to make sense of it without + set osc_code $code_content ;#e.g \x1b\]104\007 vs \x1b\]104\;\007 + } else { + set osc_code [tcl::string::range $code_content 0 $first_colon-1] + } + switch -exact -- $osc_code { + 2 { + set newtitle [tcl::string::range $code_content 2 end] + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list set_window_title $newtitle] + break + } + 4 { + #OSC 4 - set colour palette + #can take multiple params + #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ + set params [tcl::string::range $code_content 1 end] + + puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 { + #OSC 10 through 17 - so called 'dynamic colours' + #can take multiple params - each successive parameter changes the next colour in the list + #- e.g if code started at 11 - next param is for 12. 17 takes only one param because there are no more + #10 change text foreground colour + #11 change text background colour + #12 change text cursor colour + #13 change mouse foreground colour + #14 change mouse background colour + #15 change tektronix foreground colour + #16 change tektronix background colour + #17 change highlight colour + set params [tcl::string::range $code_content 2 end] + + puts stderr "overtype::renderline OSC $osc_code set dynamic colours unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 18 { + #why is this not considered one of the dynamic colours above? + #https://www.xfree86.org/current/ctlseqs.html + #tektronix cursor color + puts stderr "overtype::renderline OSC 18 - set tektronix cursor color unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + 104 { + #reset colour palette + #we want to do it for the current rendering context only (vvt) - not just pass through to underlying vt + puts stderr "overtype::renderline OSC 104 reset colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list reset_colour_palette] + break + } + default { + puts stderr "overtype::renderline OSC - UNIMPLEMENTED. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } } default { @@ -3791,6 +3973,23 @@ tcl::namespace::eval overtype { #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW set in_overflow 1 } + set trailing_nulls 0 + foreach ch [lreverse $outcols] { + if {$ch eq "\u0000"} { + incr trailing_nulls + } else { + break + } + } + if {$trailing_nulls} { + set first_tail_null_posn [expr {[llength $outcols] - $trailing_nulls}] + } else { + set first_tail_null_posn -1 + } + + #puts stderr "first_tail_null_posn: $first_tail_null_posn" + #puts stderr "colview: [ansistring VIEW $outcols]" + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -3865,8 +4064,17 @@ tcl::namespace::eval overtype { } append outstring $gxleader append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] + if {$ch eq "\u0000"} { + if {$cp437_glyphs} { + #map all nulls including at tail to space + append outstring " " + } else { + if {$trailing_nulls && $i < $first_tail_null_posn} { + append outstring " " ;#map inner nulls to space + } else { + append outstring \u0000 + } + } } else { append outstring $ch } @@ -3874,12 +4082,20 @@ tcl::namespace::eval overtype { incr i } #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + #review - presence of overflow_right doesn't indicate line's trailing nulls should remain. + #The cells could have been erased? + #if {!$cp437_glyphs} { + # #if {![ansistring length $overflow_right]} { + # # set outstring [tcl::string::trimright $outstring "\u0000"] + # #} + # set outstring [tcl::string::trimright $outstring "\u0000"] + # set outstring [tcl::string::map {\u0000 " "} $outstring] + #} + + + #REVIEW + #set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + #set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] set replay_codes "" if {[llength $understacks] > 0} { @@ -4207,8 +4423,9 @@ tcl::namespace::eval overtype::priv { upvar outcols o upvar understacks ustacks upvar understacks_gx gxstacks + upvar replay_codes_overlay replay #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller of render_erasechar should do that mapping and only supply 1 or greater. if {![tcl::string::is integer -strict $count] || $count < 1} { error "render_erasechar count must be integer >= 1" } @@ -4223,8 +4440,9 @@ tcl::namespace::eval overtype::priv { } set num [expr {$end - $start + 1}] set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + #DECECM ??? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list $replay]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] ;# ??? review return } proc render_setchar {i c } { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm index 267e680e..1a40c952 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -413,6 +413,7 @@ tcl::namespace::eval punk::ansi { tcl::namespace::export\ {a?} {a+} a \ ansistring\ + ansiwrap\ convert*\ clear*\ cursor_*\ @@ -722,77 +723,11 @@ tcl::namespace::eval punk::ansi { #candidate for zig/c implementation? proc stripansi2 {text} { - - set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } - proc stripansi1 {text} { - - #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW - - variable escape_terminals ;#dict - variable ::punk::ansi::ta::standalone_code_map ;#map to empty string - - set text [convert_g0 $text] - - - set text [tcl::string::map $standalone_code_map $text] - #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm - #\x1b#3 double-height letters top half - #\x1b#4 double-height letters bottom half - #\x1b#5 single-width line - #\x1b#6 double-width line - #\x1b#8 dec test fill screen - - - #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. - - #Theoretically line endings can occur within an ST payload (review e.g title?) - #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) - - set inputlist [split $text ""] - set outputlist [list] - - set in_escapesequence 0 - #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) - - set i 0 - foreach u $inputlist { - set v [lindex $inputlist $i+1] - set uv ${u}${v} - if {$in_escapesequence eq "2b"} { - #2nd byte - done. - set in_escapesequence 0 - } elseif {$in_escapesequence != 0} { - set endseq [tcl::dict::get $escape_terminals $in_escapesequence] - if {$u in $endseq} { - set in_escapesequence 0 - } elseif {$uv in $endseq} { - set in_escapesequence 2b ;#flag next byte as last in sequence - } - } else { - #handle both 7-bit and 8-bit CSI and OSC - if {[regexp {^(?:\033\[|\u009b)} $uv]} { - set in_escapesequence CSI - } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { - set in_escapesequence OSC - } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { - set in_escapesequence DCS - } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { - #SOS,PM,APC - all terminated with ST - set in_escapesequence MISC - } else { - lappend outputlist $u - } - } - incr i - } - return [join $outputlist ""] - } - #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set @@ -2029,7 +1964,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map - set fcposn [lsearch $args "forcecol*"] + set fcposn [lsearch $args "force*"] set fc "" set opt_forcecolour 0 if {$fcposn >= 0} { @@ -2409,7 +2344,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >= 0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -2767,7 +2702,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >=0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -3199,6 +3134,16 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emitblock {row col textblock} { + #*** !doctools + #[call [fun move_emitblock] [arg row] [arg col] [arg textblock]] + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + return $commands + } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] @@ -3315,18 +3260,90 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) - #Alt screen buffer + #Alt screen buffer - smcup/rmcup ti/te + #Note \x1b\[?47h doesn't work at all in some terminals (e.g alacritty,cmd on windows and reportedly kitty) + #It is also reported to have 'deceptively similar' effects to 1049 -but to be 'subtly broken' on some terminals. + #see: https://xn--rpa.cc/irl/term.html + #1049 (introduced by xterm in 1998?) considered the more modern version? + #1047,1048,1049 xterm private modes are 'composite' control sequences as replacement for original 47 sequence + #1049 - includes save cursor,switch to alt screen, clear screen + #e.g ? (below example not known to be exactly what 1049 does - but it's something similar) + #SMCUP + # \x1b7 (save cursor) + # \x1b\[?47h (switch) + # \x1b\[2J (clear screen) + #RMCUP + # \x1b\[?47l (switch back) + # \x1b8 (restore cursor) + + #1047 - clear screen on the way out (ony if actually on alt screen) proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? - #\x1b\[?1049h ;#xterm - return \x1b\[?47h + return \x1b\[?1049h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] - #\x1b\[?1049l + return \x1b\[?1049l + } + #47 - less widely supported(?) doesn't restore cursor or clear alt screen + proc enable_alt_screen2 {} { + return \x1b\[?47h + } + proc disable_alt_screen2 {} { return \x1b\[?47l } + proc term_colour_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_color_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_colour_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_color_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_colour_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_color_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_colour_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_color_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_colour_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + proc term_color_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + #15,16 tektronix fg, tektronix bg ??? + proc term_colour_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + proc term_color_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + #18 tektronix cursor colour ??? + proc term_colour_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + proc term_color_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + #22 pointer shape - there are other methods too - not known to work on windows terminal based VTs - review + proc term_colour_reset {} { + return "\x1b\]104\;\x1b\\" + } + proc term_color_reset {} { + return "\x1b\]104\;\x1b\\" + } # -- --- --- proc erase_line {} { @@ -4398,6 +4415,10 @@ tcl::namespace::eval punk::ansi::ta { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } + proc detect_st_open {text} { + variable re_ST_open + expr {[regexp $re_ST_open $text]} + } #not in perl ta proc detect_csi {text} { @@ -4672,7 +4693,8 @@ tcl::namespace::eval punk::ansi::class { } oo::class create base_renderer { variable o_width - variable o_wrap o_overflow o_appendlines o_looplimit + variable o_autowrap_mode + variable o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered @@ -4690,25 +4712,33 @@ tcl::namespace::eval punk::ansi::class { } tcl::namespace::path $nspath #-- -- - if {[llength $args] < 2} { - error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + if {[llength $args] < 1} { + error {usage: ?-width ? ?-height ? ?-autowrap_mode [1|0]? ?-overflow [1|0]? from_ansistring} } - lassign [lrange $args end-1 end] from_ansistring to_ansistring + #lassign [lrange $args end-1 end] from_ansistring to_ansistring + set from_ansistring [lindex $args end] + set opts [tcl::dict::create\ - -width \uFFEF\ - -wrap 1\ - -overflow 0\ - -appendlines 1\ - -looplimit 15000\ - -experimental {}\ - -cursor_column 1\ - -cursor_row 1\ + -width \uFFEF\ + -height \uFFEF\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + -insert_mode 0\ + -autowrap_mode 1\ + -initial_ansistring ""\ ] - puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] foreach {k v} $argsflags { switch -- $k { - -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + -width - -height - + -overflow - -appendlines - -looplimit - -experimental - + -autowrap_mode - + -insert_mode - + -initial_ansistring { tcl::dict::set opts $k $v } default { @@ -4717,8 +4747,19 @@ tcl::namespace::eval punk::ansi::class { } } } + set initial_ansistring [tcl::dict::get $opts -initial_ansistring] + if {$initial_ansistring eq ""} { + set to_ansistring [punk::ansi::class::class_ansistring new ""] + } else { + #todo - verify obj vs raw string + set to_ansistring $initial_ansistring + } + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set o_width [tcl::dict::get $opts -width] - set o_wrap [tcl::dict::get $opts -wrap] + set o_height [tcl::dict::get $opts -height] + set o_autowrap_mode [tcl::dict::get $opts -autowrap_mode] + set o_insert_mode [tcl::dict::get $opts -insert_mode] set o_overflow [tcl::dict::get $opts -overflow] set o_appendlines [tcl::dict::get $opts -appendlines] set o_looplimit [tcl::dict::get $opts -looplimit] @@ -4736,6 +4777,9 @@ tcl::namespace::eval punk::ansi::class { method eval_in {script} { eval $script } + method renderbuf {} { + return $o_to_ansistring + } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column @@ -4755,6 +4799,12 @@ tcl::namespace::eval punk::ansi::class { set o_cursor_row $row } + #set/query cursor state + method cursor_state {args} { + lassign $args r c + return [dict create row [my cursor_row $r] column [my cursor_column $c]] + } + #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { @@ -4834,7 +4884,8 @@ tcl::namespace::eval punk::ansi::class { #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] } } - + #renderspace equivalent? channel based? + #todo $o_to_ansistring append $newtext return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] @@ -4960,11 +5011,10 @@ tcl::namespace::eval punk::ansi::class { if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" - } - if {$o_renderout ne ""} { - append result \n " render target ansistring: $o_renderout" - append result \n " render target has ansi : [$o_renderout has_ansi]" - append result \n " render target count : [$o_renderout count]" + set renderstring [$o_renderer renderbuf] + append result \n " render target ansistring: $renderstring" + append result \n " render target has ansi : [$renderstring has_ansi]" + append result \n " render target count : [$renderstring count]" } if {$verbose} { append result \n "ansisplits listing" @@ -5052,7 +5102,8 @@ tcl::namespace::eval punk::ansi::class { } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} - + #review + return [string length [join $o_ptlist ""]] } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { @@ -5119,10 +5170,9 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } - if {$o_renderout eq ""} { - #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? - set o_renderout [punk::ansi::class::class_ansistring new ""] - } + #if {$o_renderout eq ""} { + # set o_renderout [punk::ansi::class::class_ansistring new ""] + #} if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [tcl::namespace::tail $oinfo] @@ -5130,13 +5180,15 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } else { return $currenttype } } else { puts "creating first renderer" - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } } #--- progressive rendering buffer - another ansistring object @@ -5149,10 +5201,13 @@ tcl::namespace::eval punk::ansi::class { return $o_renderwidth } #re-render if needed? - + puts stderr "renderwidth todo? re-render?" set o_renderwidth $rw } + method renderer {} { + return $o_renderer + } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. @@ -5160,10 +5215,36 @@ tcl::namespace::eval punk::ansi::class { } method renderbuf {} { #get the underlying renderobj - if any - return $o_renderout ;#also class_ansistring + #return $o_renderout ;#also class_ansistring + return [$o_renderer renderbuf] } - method render {} { + method render {{maxgraphemes ""}} { #full render - return buffer ansistring + set do_render 1 + set grapheme_count 0 + set other_count 0 + if {$maxgraphemes eq ""} { + while {$do_render} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } else { + while {$do_render && $grapheme_count <= $maxgraphemes} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } + return [dict create graphemes $grapheme_count other $other_count] } method rendernext {} { #render next available pt/code chunk only - not to end of available input @@ -5198,6 +5279,13 @@ tcl::namespace::eval punk::ansi::class { #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + #like Tcl's append class_ansistring append returns the result directly - which for ANSI - can be inconvenient in the terminal + #class_ansistring append_string is a convenience wrapper to avoid returning the raw result + method append_string {args} { + my append {*}$args + return + } + #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { @@ -5369,7 +5457,7 @@ tcl::namespace::eval punk::ansi::class { } - #method append_and_render - append and render up to end of appended data at same time + #method append_and_render? - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm index 4dd7bd66..e367ce9e 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm @@ -864,6 +864,7 @@ namespace eval punk::console { #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? #It will stop underlines/bold/reverse as well as SGR colours #what about ansi movement codes etc? + #we already have colour on|off which disables SGR codes in a+ etc as well as stderr/stdout channel transforms proc ansi {{onoff {}}} { variable ansi_wanted if {[string length $onoff]} { @@ -891,6 +892,7 @@ namespace eval punk::console { } } catch {punk::repl::reset_prompt} + puts stderr "::punk::console::ansi - use 'colour' command to turn SGR codes on/off" return [expr {$ansi_wanted}] } @@ -1295,10 +1297,10 @@ namespace eval punk::console { if {![catch {twapi::set_console_title $windowtitle} result]} { return $windowtitle } else { - error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + error "punk::console::local::titleset failed to set title - try punk::console::ansi::titleset" } } else { - error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + error "punk::console::local::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" } } proc titleget {} { @@ -1306,12 +1308,12 @@ namespace eval punk::console { if {![catch {twapi::get_console_title} result]} { return $result } else { - error "punk::console::titleset failed to set title - ensure twapi is available" + error "punk::console::local::titleset failed to set title - ensure twapi is available" } } else { #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title - # won't work on all platforms/terminals - but may be worth implementing - error "punk::console::titleget has no local mechanism to get the window title on this platform." + # won't work on all platforms/terminals - but may be worth implementing (for overtype::renderspace / frames etc) + error "punk::console::local::titleget has no local mechanism to get the window title on this platform." } } } @@ -1327,7 +1329,7 @@ namespace eval punk::console { if { $ansi_wanted <= 0} { punk::console::local::titleset $windowtitle } else { - tailcall ansi::titleset $windowtitle + ansi::titleset $windowtitle } } #no known pure-ansi solution @@ -1486,8 +1488,6 @@ namespace eval punk::console { namespace import ansi::insert_lines namespace import ansi::delete_lines - interp alias {} smcup {} ::punk::console::enable_alt_screen - interp alias {} rmcup {} ::punk::console::disable_alt_screen #experimental proc rhs_prompt {col text} { @@ -1881,12 +1881,6 @@ namespace eval punk::console { -interp alias {} colour {} punk::console::colour -interp alias {} ansi {} punk::console::ansi -interp alias {} color {} punk::console::colour -interp alias {} a+ {} punk::console::code_a+ -interp alias {} a {} punk::console::code_a -interp alias {} a? {} punk::console::code_a? diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm index 70f924d7..cf0bf70c 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -1561,7 +1561,8 @@ tcl::namespace::eval punk::ns { #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] - set origin [nseval $targetns [list ::namespace origin $name]] + set origin [nseval $targetns [list ::namespace origin $name]] + set resolved [nseval $targetns [list ::namespace which $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { @@ -1613,7 +1614,8 @@ tcl::namespace::eval punk::ns { } lappend argl $a } - list proc [nsjoin ${targetns} $name] $argl $body + #list proc [nsjoin ${targetns} $name] $argl $body + list proc $resolved $argl $body } diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.1.tm new file mode 100644 index 0000000000000000000000000000000000000000..4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd GIT binary patch literal 24693 zcmch92Rzm7`@dvli;T=;@2zYh%Fc+g=P}PYIOkX?va|Q5vKmT+j1-w=kB~BwJ&J@R z;{Q1)$3xHae0zSc|EW)$b3XTVUH3KL_kCZV@Ih!VmMiwoARBuK802n`v;$qVf`A}S z7S>=m$QcfH2E(ku*8KQr_k&n$!7wn~0tvPTxgzXgwjijJwX>78q$JeB5o`ufGu!!K z-ynM%ke88_1=7k6#zH?x3Qo4Em|-WdaT zazbto+Ifq#u!ro8H`zx3VgYE<76vjxI794_+jspU#%Hpt_H6CE8J>lcE1}ZfKG%T3Ed~bH4qqr03YIFEC+lCr4fX)1;7pA z1ab$1tSn%_{9Iv3kcG1|+{qbk55xk*b02w>vjX2)?f4i1Y=s1Q$%7OQ^J&zr+xmb& zz%W}N0$JrvKr%8QLEvUM80iX!fxNEmH#TYr>a~BE!XFQl1sS0+|DBL1jwrtY;sucL zz;sbxqh6?6&(U9*52mb=%<+IsDUIKR8rp2S=c$ z53#a9fX$FlRFIh=z_w5@3`kB!s7wcBY@`#!$^BQ@?}acBN+u=;B;KP3L?w_-QOOj6 zn!-VPNGph>BoZnq2?3JfA*6mP9%5w%MUerq?g)xb_+Z*cu|f&4Jp^qw+Zkk6`kxdJ zijNw-FH(Sl10??KpxkBuS3R-r=VY=SS$i?^mv8?mU{GlgO=<|-0%i-8J#^`T`J;4? z7KeY~0RlYA1SG}c3M4I5(gDMf5KkZ#A%H53qJO|>s9B?4mOx-Q9{TM;$2s`PKE1K- zo5*kD0A(o~R|o`1nT{5yi8?sh1BJrM2?o@E(0(G@g#&OY6lD{Tq$FyJsIj2!GUQ|f z6z&}&76<@r0CNQ>{rm_Hc5$_bgAbkCwzr^iv>Cv1x2Op0Xnb25FYasm1u;RD9^ihk zH6LIL0M6_yK?ivJwtS#e{#ON{>d@{S4-oo!|2A`UVgEOA@DC8)w%m5^vT}k#fjW-> zZ3ia+nrsjNRzpqJ1Oy-?X6E1dFN+1-))kdFKu&NJ9<;JUIWFLDP9RI53<7Wo2y)cd z+rZHcMHZkgDXFnt9KlGd145x@%L2B51FpLZ3lNeh7>63VGiqBdXTV{QsLxSea5xbF zcmf4kSWsAmPf$VgJP{x#7@!!KwUavnQS#>k+Cx+kdZY2eL;?3KwzVT4)XhK$}AwTV>lGd+6gpf z5X>GG(7*KcJI6xF0pKizvpo_FlGjq%^(-_8{T1Z9ss_2E5&*DHfFoUD+i(j|BVamb z>usIz?LxC1gF3>T+(CcwU6hMDxgtSGxaYRwkcVprCE0f5>@HOfwGQeg)QjbSzJ8%) z&@b)$T^s|+4(x%1gP{O?1R;QujN*;lHN6GM9qt6!*wz({K!7+c;7EHL3&2!BQnWyo zQB+_8xEioLn(qQWgK!0d&K&MmJ7NQZJG%m4fcbC4-?Qo8YT(~c_`RZmBnpRX{I-t# zk`}NT$O!^?xU>D=N`o>S)6YfbLD7Dh+a@OkTl9HvLNZbc#s9z0A&tXP8-840ax6elH_4y(Fg4kLbzI4fx*CP+!)ABD44aI zH@9!y^S8Zl1pS)c{x$7Sy`ulzH?^~<%#KR@z7P{Usr$sXyJV-*uenXy9+4m?&uiE z_J7EH*YJSlc2u=Z4=8;`Xm3saUU~bHjVo+NqC@2Q+X&g+KiS(Tu>lIywwwH1Yy<0L z6vhHVWP2B8yI}3W7Z58NsQbOFhnw=+;aPwM$VEG<-Uqip2vXKslkZ>>H!#c|xD80CsMHE1OjKz^t;SqoJ3|30 zqDt|hdfMGp_(LlnAo;f_k;TRW03doPi1SBcx3h$tu9(Int%+k2$eAWKgG_Q6~MQ+fgefpB)- zPM3h@w{zC7I6|#kAiy3H|NkV}MS#u@Rlth@r~_aElpz6byWS40n@|VfZ~vlR;Pk)g9`iDDD2O*Q0Jgy|&j#)*z#w z9{&^n|CC}|oc;dyTZSll|H6U)Z^{IU*`CmPyNQ_9W+#pAllX51M$!5Ikd9Go4o}Iy z|K{-f|FYu>7-|Rg|0}f~P{n>8``@8XB%FUbJm8=}Zrs}{q|s74igf}51LGJ5#o2cC zJ5L{cKYA1c!-)_BLk0sE<7YWRIzfRgC_6qOe&CM)!WCF)AOuhm1nhvhS^>MN;QjCT zQOmupEr8My%uknw90vqRecV7>bukn7qvgLRcLfvI>L$L0^ zqHtPyEjOtyZ=;-w?mjN8v99TWuVU&1rSaB3SYGxsGb$9kiGRthp*Q)M*+0=(Ft9|P zE6Zsj-I*X5ukUTS2vtfqSKKH^ac#=F$&`#l$jORqGCA^MI*Gz?{}Ys_)_kh(D1CCq zaSvop`g+gYIlu7GIxFsvgVi@v6Ql-D$DvnNLvl{^RMFYsI|Nt)m{~o47AOpU0KSk-Y3_ zh)HM}!pjmAe=|!lnVyqvxHh5~zh!2srKw@%Jm=l11r94tch8*pr!+ok#>Nry?8~3+ zH-5~mvC{eAOsp)oH1y*36Oydt{0 z$Scl?dHAB9Ac>+6L!5zz$))L&1Z`xe`f2GYmppK)byj?`W)U8R>zx-Oq+JWJH}BY0 zi8#mGJzneU{Ir3&7i7l@I1Qp5{3#=`FfizVAUg$w-roip9LNjSz&5k9z^~u7gDY2G z&1vj3sqDb@t-6beAFQ?6lr;m1y^KqOu|wUwIG;Jlsm139F+X8V&>;O*?H)v;;*F)D z%@RTZXL#RwRU~zS3dY+nQ`{*8rKvdj!0?DGOS$j4+I;=z4?A6 zo2MLnvfxg|OrUG%MoNPIvuIxGKKQ+4@EQI|wP(Ca3q6wYQ{S5mNHMDxXj?R1s?I-C z-ZT*8;05Nk(@f)-(7`pBL*KcW6r~?)?~C?EW%K9BX~W zG_O>z`)%q;*xAr$osb|O;xLJkyK_QLjrzup%IYC_WqR3&el61LQ2|(|BOX$Go6Az> zzXwbCU>fsm&H1x{{#~JUX_7bC(LS-DB@+5NthAfmXDByHmWNNyL_d)WZqvNSXRTwA zo?)K+gRt%F_r$RQBr@5c@7Oozv%=WDmZiwPl+82~;SU1Jys}fC_jBr>ouw(kVIjZq zDk}6$9s8{KD{?vpc~R{bCN3Yd9a=+ZpRJy0X_;~M&EROL$hq5Eh2%7es_C#it`{}_ zs^}xj)vRTfD>W0u+IOb^#l9u@EOcxo2MOy?VM@(alm(H5rb#ND6$UKy`K327RJ z?G4T&2uy04Pa6K98a?4V@EA_4Qb_I~$R`qV-0@1fnQy_AG@{g*?tWyja=M;LdRU)0 z-<-`G$7TiWvDD51$@Q5cdtN$-bN9s@cny1OyEuY_a3w=Nt{MBB%<9=${}6iFFcF%# zAS+gzg1)?KpZys#sIsRilLAB1_2jwJ)me!a=FQ`#GVM~`>FQpM%x_+v-ZY*r(0&!c z9(v2iKR%u0#gAISXMtn0f;UGb>jD<(ZdvdLd`NN-PO_@ZhnbvQa-H*&%PLB_E8v1V>GQuaYhn1!F&g zfw2Hwihq#c%urw-Az%TA11nWCA$}D4Fo}_ItP>;+T9w6iefr&czmB?t!@q4xsp~fc` z;Wh^6zI{Jqtj&E}MfU+b^rP2Ce$^GZh?8zkw+7w}gzm^ z>y#oN6W0n7Dz2*jz1HGsXiD$cD_wlp)pRvWN%mfRuWhNnvU z!V7$g29IGf)$a-kv3L*Z@W{BbuDx;hIN9ieaoxH1BoCi6#s%jvuo~>R^XXV_>zi`e zSf}iBNQ@v!bBgE7GLNbgySo)m$8BO?DtRPITisN~<|Im6vv#4~?$Z@$!>g^iO|$3( zjHRmeLH9FziZCg|SGln82`mmMK?k@LF|hw!5pXbSYk%7zx3&Q9e$bg;qNl2^sy{k0 zI+53@SE#3@%IEv}MS4zpJ>jte%=)%hZP-iU;lAk`kQ?DKLdtoe@jQ`Wv+c7bGpGgJ(F_cAMtbha>Q(J^295Dc;w|OU0lorlfT`a0 z_2&-q7sja50W;L(ju~jL(c@_bg06 zeoQ4t?i~{rU=V45*SSAeAs{puwJW=ubhd3A5%U=44`d#9*5MC1geI86Ie1KDXYb~D zCAn4hvtJgL4T$nBD-cpm(6^^(d3~*Vej|gTi2t^4)}vYVkHtk-sCw-$-3<7qPEKLB z78aUd!^B=s^L8j|+*SI`_@qDkvC#=-jEJlGyqM<}bSDj@t!9UX`me zk5D{4E@y8aPt0X4_$c>r)C!&Hh(ZpglS`4Wr@3F#T)$xHjJ8|j49Aw0^G)^#4NF6O z4auKi^}%beD*#~eDIa2qn>W)Xmp6xw{{}O4*M_Id&*{J`Er@%GsXR*fU={3>+CD+0AvhtGO=+7H1rLmck((`5}O++ZkySnFJTLk7f5>ekgNY z{F(HbnlaUo+oF@YagMJQ$DToCpY9&yFmJ>LS0ob6Qxk%i+PN5kB=YKU{J!Ty?MUNrW0cA z@*XGd?%OIaBu8jMRc6_x;Rl8=k?FO!(eXMp7dBcI=KTk|huO(Eyv|k8K=Bs}sbj)81ZtZq(jFfr)D7tjkEy80EPvBWhkYL&yB|D}%((KzX({h1 zI3EiSN8$Ag%&_rSVuqQg?UY^f<}$CoU)L{vC&`f^a1;?orH%KAcq-H>wV?M}K}TX# zEG{8kXzBb%)-f-^^7RF#FVP};+zX2#MZIC zCw1aJsTLXE@kC|coUGY}^~m^aacJ>nriJ0>UaOHOKP8YLJ2SHUT}ynvx%qg;botNN z;OBp}j>sA*`Er8yzV}CKEmkXY)wFbC`IFqXbWx@fMYuWKk`B5{t04MY@O1c199uO5 z%qncMH}4Q9v9MD=mCs1RRow(ulc=IFc=!b{Z^(LT2@wrNe@n5HxN${L5<^<3Doeey z#q69PEaI`q7kKwD1aqd)%Dl?vGnB`~BxlQtVkSLZdpt%_o*7r6(Tu?KNMkdXjao#T zuFvD;$}(z3GUXoe%BnkuaxX9UPWsw-=2at~ZyAMfE5%q=Y z4tUv&$Ak!par0tn`b*Ic>&Hu4XI(C+#|m+`biwa+)9WxQp^cDic>S)?^J9G~Q5x=# z9{X&4frn+st4Y_^@My;}b5EvyNeDbs&6n(c&b`Rt!n#YLi7iLp;=Renw=SO-7L_`R zXeo?yAM;i{vAVR>-eZNGM61(oP?pPRqaF2z%}pC3Qn&Z}9Z%2H6C{dmZ|1 zYyBB+OW1@=a106FppBBy1^15c}TG1+s)SkdzhvJW~LtAAiXcu920fJfQ?2Z~2if zYRFoO{IagPy+{C=dsAmefn}hZ-R#UD0-Tu(9dER-r^U3uy)Yt$1v)l{5gwBzmxZN} zGC;V|^K9t*Rf}x;@1$Po;;Bx^MB@q8nwa_fjqwejl&JgQB1;*K8^g9GvI*4i$OZ;R z%$aHv)};Z888Z4xkr~aU$RKSb)5XtMjT_~MZb65i7$)gg_%G!TP&AXF!FLe|W zY$?a#yb(r$DL@q%vVX1%hz)ve8pA|GPpklJqxn|B#Q|_&11=5V`d{~PcFQ0Xb?9pb z0Ztx(rZnKVV+UU9wyTV(0@zY#bW}%^zgcB$LbXV52WRTEtLQ4LUubRJClNlzmNJ8k z%PO#`gn7wSDNT&YbAgQ`9=MzaD@%n#*zTGDeF0ONMxQu^~UvD3=A z=lZA`!51k+P%B6pxT>&K$3ib@ke7DWMMtZ?LFJh8ysl6}NDArE~{cg{8*f4=B(p3;vcpT=1IDSY&{)xdn&yIU1vmTw&^7*Y*N zn}lO!wG#qlq|TeZV9=k%*z^eR(}g8PGPp+;UFj0IfIH<@Jd-9=$a1p#!fl~w1?-e? zdPHGsOBV0up2tTW+fsg@NSiGOXuy733xDj3KeZsdZFpTyelEc2E&_`itfFv8Ladzn zv5w0%`dlZutt=nhj`xceexj$Ur*plq4U*6ue|wa&RCNYl)wxswN0C5_4b4zG&|@l< zS}{1OfOwohR4V2mof^coAX4m+=OOON%UGqG0@A?*=0aMQv^+Vj%bWP> z*&%9GRKtR#cPU|=oT*gm3=oCYW%WC6VK;P>L^?E|TYhQO6u~)PX3v4%C2b zaq)&WHxQ;F+B%%=mpGd)aq=r`v;vD$^oW(>`Et3HsD(TfkbxYq#l`;~G^flgfh8cS zfz-_cXuC)5iiL_HXt)q+pFn9feUztF3G*~7tfrrh55^Th9b1x{&Tw9y@Q6U^=BgJD zT}MD<{)DHqjKXpeW;lnaT27voS?fol!aF>E{NB^TpOEy@6opgt$^abO_0h#L@ zUvB6aEg3qGdgtramg2oOOt8+6S7>@emRUIc@h$kmX7t+1wb1J3?(Z6Ln?!q-V9UT) z$>}2`2m>_02Wa3w>`@_6=h{EBhDl@vajhJo;=1x$a17U=0i?W|EvuhL?-;h|hD@cK z96`cV+qZYp*OEmjeOgEclh%6Ovkm9pxeH2MTw{BCiZgIE@QkkCQT)ilCw?rVe2GaQ z?btgxiy_>}vSQyA?4AzxbslR!l`)F*edvP982*VF#<=IrScQcap>Q+Bi(JNB2S~m-ZW7)krSw(20q> zLcKg7Uxc2cyrdPXr`6sfprXgwq^u{%ud2?}}NPKZ0Y3_r{Q@=<(A}3qPpih(;^0+SK?Rdpl`tHiX;SKo5;x-7)ZCR?AXEkrb(sn4#m< z4;Peg4=KEnUBWpQX#6S%A@h{Ak?JnT^fK%Xvr?$5pj`~9nv8PqfW8XC^l3kT;^+GM z7ebUTRJ6#}(r&)_RH`IKyo49;X5E|k5LsVafO8+B5O*njJVwU3wRttO*RGUO(f1B( z_fiZTiKnMT?me=y%&PVp`i2LJH$ru(m#zJb&`?pti{6gu&M%WVU#?1;K6s?t{kEB~ zEK`sn9{laeOS6qk2}_^%BI=J%-ov;*9^7z7=Ez1TO`aQR{jB^wqIjLm4pYHapSLA6 z_>AP}W0cA!h+56d%HxOTIHG@PbCCOytUm4l6g zB3|R+%fTebH<5>ya{RaASl5>8zx6qvt^fKWaM7}k$X9u@_-9+K^k9#hXZ)y~&SEmp;g(oJvT=I86o_k7 znBvt;92L?G=#xE}@Mb+3^zM@GWkIQ8Bk_3J_l#0}cNv{b5K?w28Z5VXO6=Qwm1V5s z*Sl=s1EU^l6{DR-_GVWUonH4zSgxI{3XrT$$at{ERjux7mL)41PRmF$-|=m3vG6Wm zV8`J3&zdk2yP5^V-oaK{4GK6hQ#L=xbbUnrG2g54Td!F%i2W(M`u)>iB@YOck5a{& zgw8&_vEdgV(U5rl-Lf{;N`-b4)1@fS9^=L*jp5_|NDiK_L45Oc7p{7aTg=czPZbdk zCwg9t^AgT~OHd#8=2H~2U{km6;M>Hkg1|h+`>gcEvJ8GVE8h})l=fye+c4pO3hx@^ z^L|8GtrvD?$yIu0(HySy>Wt5~xM1*2d!*!(iBLcKpYC985c9X_T1U9C zFZybmS0t-yc}0dlEJ}WhH9Gsf$5(6##+mi?b`O^jv0I{kWkT%XZ)S6%YH1k@#^c{| zdQIM~ZuZNn!Hw*igKi2*EzXDQusJpnE0GahZkvJzrvvq?=@cKVKg<0zOQ#>!uNHUfUvv%z{!cIetuv-f!J zQW9ZKZ}H{FOqfLdB65|iDl=dlR$Mgi&c7gv6Qp|@0phI8u&d_lHnyKT;kW4Qp}J0c z2ir)sXfwtN5<_wuICHZ~xWm14IYwTMKUv}3?Gs(qaLw|-I;=U!U`m+!OuWisoWY0I zACH!9rq_oLYUAAwF^HtQ+$fJ9mby`G_BzW=+`m#GM$PM@V7aPL@x1YLoRktV$CKb6 zJ3SkW?MB8#x6{A&x3+&Q5+j+BT!7+?%Z6mzz4dBY9Tg$F6nsoSS10+D`dou^vRwlI zAZU<7uJJQ&q9hYWX&e?Ym8;R{;*nj1<(o+PzOd$kYZwaXx`E-rwlR za{ChKS`$i~d0ca&$k@2ck`K2!pW~Y7g<-$=&r6+>CC~45d1-&sEe$pH?sssy-0;eD>~SAs{zQVJHO{Zf-@FO)<(;3fUWiOO!G&|{8!PKY z3IVK|Ie3gl3-pcS*kvZjqr3|xo2=?QrLk^jRxe9IU`!oTUku$d^oR9rx*ASk=kjI` z#B5H#bPQk3q&o4CUYtVrOGitgF^0*r;3Wn%M0L^?g52)$;v_#|?bG?b%=L=yI?zv< zd8M_?`GY6o>bgVpYwtSty>&J{)}2r$(CY5eq!x|sJKje6yJnA}{kj2~5@i~xO zeVR2-CX%P#p9++Wi9LB;Ofz@_&+MVE zt0>4&2VdLz0UGD1|kVH0i z=?b<42#sRvxR)^4DaLAOysk*hF35evSiwrq#M2!_;=RHW@4{sEL-)~Bc<)t{ABOqf z1^PY{X+04JZWCeAKHjSdFQG^5;gV$&VG2tfZ(so^NU`x!=`LABZdqL-R)NpW*ckS> zFHtdaA!q21Bj0py8t#izaX|=80nFJq|P7 zRmLOD_o$yP(yDcYO$Ux`+{)Oe5Ybgb0Ic- zB?1S0WAg}@Jx%N!uilTj0Xp&r7ghq=ykyR{eohQA9X6}6(!E35*GPo5bfmbpuDjK& z;!@z&h!Kn-@ig;n>t>(Kbp8?U4MnSLTS+14?al|J1$?mrIJH4ZlO1C*o0C`X(Tq_p z9k;|V%W#HcHDx{&TE~>qSb3?G{pPbt%7s1MXAp52*e`#BDF7+{IkaP9#Yp#~l-e594nVJ*tJ+pbVE{4=?7fQ3DK z&uQ)Dv8e9I4`cpFD_sfmGdfXn(_G`VcyrC%2)&To;jivbOw}FTY-fz0AsH>Q}9}-IKW`LE?9z|L!e|?}CZ^ZjwgoZ*^A3^SfA)#59cYJ$7$N z=;GG%u4ng;Exs6k-C3|eeg4*mM*X9+6PIcAnMo2msH~m{sv)y{z3CXIZHEg!QL$8n za6D&t$-eSff_X|_HzZ3A*p-;nzW5dT<=$frrA{e?q)G7_PdOQ68*@fs|_E( zSnxwK*wXa%UzBvuHaYJSaQ9damXSQ+vmxAf3>5d z{fmg&;6~uF6btSk;+7K}foXkoo`H{@tJ-gOXCd7an7D>B?^ zh6v@xe>KjaD$bkg(9d1)KmMa^?#yRT3UjlyPsG<;?aPFk8o&0wTX^X#tCZ_AXrE~+ z&%sh5=ydygB(l4Q3%Y-gYZw0U9b0Z#;^{|}!x3kXA{p!-GTO1x|Wwd4rA2;bnhU(Qu zdXkUNHA&A0mKR01@=_%uKeQNRW|juMqp{4Sozpt5`dWHE`Syu!cxK*_nHm!X)5S_L zIefiF?EKVPMZ=c76yeQjdWhFjXWMZFdX}~#7@4X26R^sv%5$2xY3k*WK`dBhr9*ph zWns2A?b40uSLMjXD*lz0&#c2;%t@YkP0aq86t&pn5~ z`LVCnu$LG2+m+mKjA^91?Y1Rh#Td)awj^U!S>NwuCcv*LP=LqFn3mraDr2$~Tv_Mu zx&FX35}*Eb^&CCJRkXe`CtPlWZ)2Av9D!5$_gpB%`48<>yNO5uJt1x9|4*qvRuTuY zQd#GgV0oaIFaO7yn&Z>K6P)f7@8%!34Z%fgI7_>4NESIZK@m(_KWtdv#?XstyqkxK zrx&>h(?ks4>U=2c|0*o$#O%R4FEp2ugHr=oLs8)F%Xzs?9)b@3Snq`#-YRCA5j(&LVW zG*cuZMpk)!O*@v`j!ojOd$m{!wRIcE`Q;DJ{kSo)%1Z@_H~;i9{Cw)m1*T(HJ>xa+ ziZd5Ixm(`Ie~-CK-=1WP?;iQGFC_JN5D9svN%j)_6@J=zN7LhRLdu)b$C@bK`dW#a zSv!v#N;Bjxww5~5Oo}ahFr;7s9(9dAiudi#jIUFeMXvs7k5978S4yxl4g6S2EKZRp zg6X72&LPI*H4MNG0mbugyY3f{vi5ab=!5XB-)^buwYdugNl;)`7k3BJ#%8pZL_cm< zqD*}KG=4;ulE6T{X%Lbx#zF;>=GVsjFz}#8^H@z4qDMbS5U$H23b^Lb|QdhEvkM zD6S8Ba04uTLbx89;x@xBsedHkBJ-qqf+-={ana72EEc<9yi}59OSK!_F0UF>r|8&5GFf_5p;@5o_~f4TCk_ZSY6Xs+dS2 zO|^z>g)pzk1bqCVK4xY-!nr4EZ?iX=Vj5}c#x3LXoXY*I)6)ZMl@}F}|13$XfBSjZ|2U-g#MV}J5iafNMCw;EsdfD`LpbgE< z`hngXN_46Z!8QWO)hjDcU2hGg4)@9$*qP#lx6CAV#`|qah@VRNXl@vI|HnIPdiBl4 zBz>PiTBb|p$MM;d-?MRsbW6XCWG&{CyM1at@=>Mh`N1pI!XA@Ym6sS899nxXvyhed zm}L~z$zT8QEE`hBxxO;i&@cmL>=J%ZeBAiD03$yevlrpLPiE{nDGIko0&p?NNh7jq z@^YGKuF^1nHl@%=mdBBoBf_5S%fDZ8#Kb#+Lue^M-^i0ex;lu7zfgCyBk814nnP+B zt?%8Hn=#sDM!k8x3b^wI9<*0JIiDSSA~fxt%eFX>YSe93T;^@@j#!WiWP`W09n96~nT>j(B3mFEnlw4Bwfr+K+E)s~z)*&I*Y^c zy2qjf%bxnZv8nKgYjrT7?`N9p%$#>9W{u8aFh4BOuJhUP@6W}(;46Rxc{Hut1B9cJfT{isGq-+4X7uKN!OGQ9HDaNRjMJvs8C&ar&U^uIA` z&laC4INaV3wKI8^kJL``kXl~%9P>P9SaI5jhsvFyUHrQ3)yZ6e$8t$^^X&KP);r#; z6JF1GfS(X-68nNfn@ODT)O*&8&XX(Y4`RO8N|_V#XhbW=%2<)#!jZ6eF?{Mp)hri2 ziSl$n%Y5Kv7bal_(%TKy4)!j|2BjtSp9}|T<;gK+XBR)5XL;E}6z zD}FMR@!NT^_mtT=63o?>F-l#TZ_`Q^PpqB#!RGnudV+AW8-Ar`Z1eQ-sv4TE*3B}A zPMhuB)n#0Um!XDr$6cf~)=i0=*_yKro`jXFjoxf+v{U;eOwsfPQsUF#R1vEmzgZY$ zctpE9z0a{kLOTqzS@rVC)C9^F?O~jP0Vp{!{rCBb6oyW7`UU~Il?S$~GdCAskmWv( zfi}QzIAj5jqFS8KG_S$?rnLKUH2R->dH)ElsZ%C ze`(QB=Y*7<)k9U#*#^UwSVfx~#fk z@&!)829BMGv>v2*+v@3Gi)@b#EP)W;(P=v+S|`xbZlcjP(eB?IE!J>VzI46!^NZZ} z_w2MM!s*iQsfSP%ico<*)_$sl|IiN<4QZ`7eK&rQ`1<#Zo|?~8(0k?YXKnDb&h#YT znHjj2xN=-!ak&he=v`#1e0x?wzMsf*X~zp(?OpD(tKO-EeL7^6+6Cjp5#AT&Rr=l5 z$6F3uJT_hdscu$Koi{|IXsD$63%h9(+_| zj~@m`81o-<{L6zI=sXWT6@kt(p5;IAM0-X8o#Vj=9nd+7u>Lv6zdU`g_t*nE`-6`= zptFDYf5(3R!w={p9DGgzT?99vjqrbU-d)f7%To*JJP)?(qw^eN|6`u$9o6Uz4>ql% zGt}h#V}|=$*wHy2>=j4nr~<4}|4^+5I>*tu9&EWq=jy@x$6V1GaMAf4Y->g57XX|| z{oyQkn_SU(9oF2s$8Gk{c^%&Biq86A7waBl42-M%e>}~<^t7V$JlMjD&U5L{c^+(N zMQ3}kc@>?lGqBJ5hZgjKCj9 NB4FDF^%OJ4{{tdaZ=nDH literal 0 HcmV?d00001 diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.1.tm index 88fdc3fd..96fb263d 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.1.tm @@ -4041,6 +4041,9 @@ tcl::namespace::eval textblock { (default 0 if no existing -table supplied)" -table -default "" -type string -help "existing table object to use" -headers -default "" -help "list of header values. Must match number of columns" + -show_header -default "" -help "Whether to show a header row. + Leave as empty string for unspecified/automatic, + in which case it will display only if -headers list was supplied." -action -default "append" -choices {append replace} -help "row insertion method if existing -table is supplied if append is chosen the new values will always start at the first column" -columns -default "" -type integer -help "Number of table columns @@ -4101,11 +4104,34 @@ tcl::namespace::eval textblock { } } else { set is_new_table 1 + set headers {} + if {[tcl::dict::get $opts -headers] ne ""} { + set headers [dict get $opts -headers] + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 1 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } else { + if {[tcl::dict::get $opts -show_header] eq ""} { + set show_header 0 + } else { + set show_header [tcl::dict::get $opts -show_header] + } + } + if {[tcl::string::is integer -strict $opt_columns]} { set cols $opt_columns + if {[llength $headers] && $cols != [llength $headers]} { + error "list_as_table number of columns ($cols) doesn't match supplied number of headers ([llength $headers])" + } } else { #review - set cols 2 ;#seems a reasonable default + if {[llength $headers]} { + set cols [llength $headers] + } else { + set cols 2 ;#seems a reasonable default + } } #defaults for new table only if {[tcl::dict::get $opts -frametype] eq ""} { @@ -4123,15 +4149,6 @@ tcl::namespace::eval textblock { if {[tcl::dict::get $opts -show_hseps] eq ""} { tcl::dict::set opts -show_hseps 0 } - set headers {} - set show_header 0 - if {[tcl::dict::get $opts -headers] ne ""} { - set headers [dict get $opts -headers] - if {[llength $headers] ne $cols} { - error "list_as_table number of headers ([llength $headers]) must match number of columns ($cols)" - } - set show_header 1 - } set t [textblock::class::table new\ -show_header $show_header\ diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm new file mode 100644 index 00000000..d85d4416 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.1.tm @@ -0,0 +1,3357 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application tomlish 1.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin tomlish_module_tomlish 0 1.1.1] +#[copyright "2024"] +#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[require tomlish] +#[keywords module] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of tomlish +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by tomlish +#[list_begin itemized] + +package require Tcl 8.6- +package require struct::stack +package require logger + +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {struct::stack}] + +#limit ourselves to clear, destroy, peek, pop, push, rotate, or size (e.g v 1.3 does not implement 'get') + + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish { + namespace export {[a-z]*}; # Convention: export all lowercase + variable types + #ARRAY is analogous to a Tcl list + #TABLE is analogous to a Tcl dict + #WS = inline whitespace + #KEYVAL = bare key and value + #QKEYVAL = quoted key and value + #ITABLE = inline table (*can* be anonymous table) + # inline table values immediately create a table with the opening brace + # inline tables are fully defined between their braces, as are dotted-key subtables defined within + # No additional subtables or arrays of tables may be defined within an inline table after the ending brace - they must be entirely self-contained + + set tags [list TOMLISH ARRAY TABLE ITABLE ANONTABLE WS NEWLINE COMMENT KEYVAL QKEYVAL STRING MULTISTRING LITSTRING MULTILITSTRING INT FLOAT BOOL DATETIME] + #tomlish v1.0 should accept arbitrary 64-bit signed ints (from -2^63 to 2^63-1) + #we will restrict to this range for compatibility for now - although Tcl can handle larger (arbitrarily so?) + set min_int -9223372036854775808 ;#-2^63 + set max_int +9223372036854775807 ;#2^63-1 + + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] tomlish '$txt'" + puts stderr $msg + } + logger::initNamespace ::tomlish + foreach lvl [logger::levels] { + interp alias {} tomlish_log_$lvl {} ::tomlish::Dolog $lvl + log::logproc $lvl tomlish_log_$lvl + } + + #*** !doctools + #[subsection {Namespace tomlish}] + #[para] Core API functions for tomlish + #[list_begin definitions] + + proc tags {} { + return $::tomlish::tags + } + + #helper function for get_dict + proc _get_keyval_value {keyval_element} { + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" + set found_value 0 + #find the value + # 3 is the earliest index at which the value could occur (depending on whitespace) + set found_sub [list] + foreach sub [lrange $keyval_element 2 end] { + #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey + switch -exact -- [lindex $sub 0] { + STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + set type [lindex $sub 0] + set value [lindex $sub 1] + set found_sub $sub + incr found_value 1 + } + default {} + } + } + if {!$found_value} { + error "Failed to find value element in KEYVAL. '$keyval_element'" + } + if {$found_value > 1} { + error "Found multiple value elements in KEYVAL, expected exactly one. '$keyval_element'" + } + + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + #simple (non-container, no-substitution) datatype + set result [list type $type value $value] + } + STRING - STRINGPART { + set result [list type $type value [::tomlish::utils::unescape_string $value]] + } + LITSTRING { + #REVIEW + set result [list type $type value $value] + } + TABLE - ITABLE - ARRAY - MULTISTRING { + #jmn2024 - added ITABLE - review + #we need to recurse to get the corresponding dict for the contained item(s) + #pass in the whole $found_sub - not just the $value! + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } + default { + error "Unexpected value type '$type' found in keyval '$keyval_element'" + } + } + return $result + } + + #get_dict is a *basic* programmatic datastructure for accessing the data. + # produce a dictionary of keys and values from a tomlish tagged list. + # get_dict is primarily for reading toml data. + #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, + # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. + # creating/changing toml values can be done directly on a tomlish list if preserving (or adding) formatting/comments is desired. + #A separate package 'tomlish::object' may be needed to allow easier programmatic creating/updating/deleting of data elements whilst preserving (or adding or selectively deleting/editing) such formatting. + proc get_dict {tomlish} { + + #keep track of which tablenames have already been directly defined, + # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' + #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. + #we don't error out just because a previous tablename segment has already appeared. + variable tablenames_seen [list] + + + log::info ">>> processing '$tomlish'<<<" + set items $tomlish + + foreach lst $items { + if {[lindex $lst 0] ni $::tomlish::tags} { + error "supplied string does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + } + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + #ignore TOMLISH tag at beginning + set items [lrange $tomlish 1 end] + } + + set datastructure [dict create] + foreach item $items { + set tag [lindex $item 0] + #puts "...> item:'$item' tag:'$tag'" + switch -exact -- $tag { + KEYVAL - QKEYVAL { + log::debug "--> processing $tag: $item" + set key [lindex $item 1] + #!todo - normalize key. (may be quoted/doublequoted) + + if {[dict exists $datastructure $key]} { + error "Duplicate key '$key'. The key already exists at this level in the toml data. The toml data is not valid." + } + + #lassign [_get_keyval_value $item] type val + set keyval_dict [_get_keyval_value $item] + dict set datastructure $key $keyval_dict + } + TABLE { + set tablename [lindex $item 1] + set tablename [::tomlish::utils::tablename_trim $tablename] + + if {$tablename in $tablenames_seen} { + error "Table name '$tablename' has already been directly defined in the toml data. Invalid." + } + + log::debug "--> processing $tag (name: $tablename): $item" + set name_segments [::tomlish::utils::tablename_split $tablename] + set last_seg "" + #toml spec rule - all segments mst be non-empty + #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. + + set key_hierarchy [list] + set key_hierarchy_raw [list] + + foreach rawseg $name_segments { + + set seg [::tomlish::utils::normalize_key $rawseg] ;#strips one level of enclosing quotes, and substitutes only toml-specified escapes + set c1 [::string index $rawseg 0] + set c2 [::string index $rawseg end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes are processed within it. + set seg [::string range $rawseg 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + set seg [::tomlish::utils::unescape_string [::string range $rawseg 1 end-1]] + #set seg [subst -nocommands -novariables [::string range $rawseg 1 end-1]] + } else { + set seg $rawseg + } + + #no need to check for empty segments here - we've already called tablename_split which would have raised an error for empty segments. + #if {$rawseg eq ""} { + # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" + #} + lappend key_hierarchy $seg + lappend key_hierarchy_raw $rawseg + + if {[dict exists $datastructure {*}$key_hierarchy]} { + #It's ok for this key to already exist *if* it was defined by a previous tablename, + # but not if it was defined as a keyval/qkeyval + + set testkey [join $key_hierarchy_raw .] + set testkey_length [llength $key_hierarchy_raw] + set found_testkey 0 + if {$testkey in $tablenames_seen} { + set found_testkey 1 + } else { + #see if it was defined by a longer entry + foreach seen $tablenames_seen { + set seen_segments [::tomlish::utils::tablename_split $seen] + #these are raw unnormalized tablename segments. Need to normalize the double-quoted ones, + # and strip the quotes from both single-quoted and double-quoted entries. + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #dots within table segments might seem like an 'edge case' + # - but perhaps there is legitimate need to put for example version numbers as tablenames or parts of tablenames. + + #VVV the test below is wrong VVV! + set seen_match [join [lrange $seen_segments 0 [expr {$testkey_length -1}]] .] + if {$testkey eq $seen_match} { + set found_testkey 1 + } + } + } + + if {$found_testkey == 0} { + #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset + error "key [join $key_hierarchy_raw .] already exists, but wasn't defined by a supertable." + } + } + + } + + + #We must do this after the key-collision test above! + lappend tablenames_seen $tablename + + + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + + #now add the contained elements + foreach element [lrange $item 2 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + #now make sure we add an empty value if there were no contained elements! + #!todo. + } + ITABLE { + #SEP??? + set datastructure [list] + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + KEYVAL - QKEYVAL { + set keyval_key [lindex $element 1] + set keyval_dict [_get_keyval_value $element] + dict set datastructure $keyval_key $keyval_dict + } + NEWLINE - COMMENT - WS { + #ignore + } + default { + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + } + } + } + } + ARRAY { + #arrays in toml are allowed to contain mixtures of types + set datastructure [list] + log::debug "--> processing array: $item" + + foreach element [lrange $item 1 end] { + set type [lindex $element 0] + switch -exact -- $type { + INT - FLOAT - BOOL - DATETIME { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + TABLE - ARRAY - MULTISTRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] + } + WS - SEP { + #ignore whitespace and commas + } + default { + error "Unexpected value type '$type' found in array" + } + } + } + } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } + WS - COMMENT - NEWLINE { + #ignore + } + default { + error "Unexpected tag '$tag' in Tomlish list '$tomlish'" + } + } + } + return $datastructure + } + + proc json_to_toml {json} { + #*** !doctools + #[call [fun json_to_toml] [arg json]] + #[para] + + set tomlish [::tomlish::from_json $json] + set toml [::tomlish::to_toml $tomlish] + } + + proc from_json {json} { + set jstruct [::tomlish::json_struct $json] + return [::tomlish::from_json_struct $jstruct] + } + + proc from_json_struct {jstruct} { + package require fish::json_toml + return [fish::json_toml::jsonstruct2tomlish $jstruct] + } + + proc toml_to_json {toml} { + set tomlish [::tomlish::from_toml $toml] + return [::tomlish::get_json $tomlish] + } + + proc get_json {tomlish} { + package require fish::json + set d [::tomlish::get_dict $tomlish] + #return [::tomlish::dict_to_json $d] + return [fish::json::from "struct" $d] + } + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +namespace eval tomlish::encode { + #*** !doctools + #[subsection {Namespace tomlish::encode}] + #[para] + #[list_begin definitions] + + #STRING,INT,FLOAT,BOOL, DATETIME - simple wrappers for completeness + # take a value of the appropriate type and wrap as a tomlish tagged item + proc string {s} { + return [list STRING $s] + } + + proc int {i} { + #whole numbers, may be prefixed with a + or - + #Leading zeros are not allowed + #Hex,octal binary forms are allowed (toml 1.0) + #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) + #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. + # - We should probably raise an error for number larger than this and suggest the user supply it as a string? + if {[tcl::string::last , $i] > -1} { + error "Unable to interpret '$i' as an integer. Use underscores if you need a thousands separator [::tomlish::parse::report_line]" + } + if {![::tomlish::utils::int_validchars $i]} { + error "Unable to interpret '$i' as an integer. Only 0-9 + 1 _ characters are acceptable. [::tomlish::parse::report_line]" + } + + if {[::tomlish::utils::is_int $i]} { + return [list INT $i] + } else { + error "'$i' is not a valid integer as per the Toml spec. [::tomlish::parse::report_line]" + } + + } + + proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [string tolower $f]] + } + if {[::tomlish::utils::is_float $f]} { + return [list FLOAT $f] + } else { + error "Unable to interpret '$f' as Toml float. Check your input, or check that tomlish is able to handle all Toml floats properly [::tomlish::parse::report_line]" + } + } + + proc datetime {str} { + if {[::tomlish::utils::is_datetime $str]} { + return [list DATETIME $str] + } else { + error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" + } + } + + proc boolean {b} { + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![string is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" + } else { + if {[expr {$b && 1}]} { + return [list BOOL true] + } else { + return [list BOOL false] + } + } + } + + #Take tablename followed by + # a) *tomlish* name-value pairs e.g table mydata [list KEYVAL item11 [list STRING "test"]] {KEYVAL item2 [list INT 1]} + # b) simple 2-element tcl lists being name & *simple* value pairs for which basic heuristics will be used to determine types + proc table {name args} { + set pairs [list] + foreach t $args { + if {[llength $t] == 3} { + if {[lindex $t 0] ne "KEYVAL"} { + error "Only items tagged as KEYVAL currently accepted as name-value pairs for table command" + } + lappend pairs $t + } elseif {[llength $t] == 2} { + #!todo - type heuristics + lassign $t n v + lappend pairs [list KEYVAL $n [list STRING $v]] + } else { + error "erlish 'KEYVAL' or simple name-value pairs expected. Unable to handle [llength $t] element list as argument" + } + } + return [list TABLE $name $pairs] + } + + + #the tomlish root is basically a nameless table representing the root of the document + proc root {args} { + set table [::tomlish::encode::table TOMLISH {*}$args] + set result [lindex $table 2] ;#Take only the key-value pair list + } + + #WS = whitepace, US = underscore + proc tomlish {list {context ""}} { + if {![tcl::string::is list $list]} { + error "Supplied 'tomlish' is not a valid Tcl list. Expected a tagged list (parsed Toml)" + } + set toml "" ;#result string + + foreach item $list { + set tag [lindex $item 0] + #puts "tomlish::encode::tomlish processing item '$item', tag '$tag'" + #during recursion, some tags require different error checking in different contexts. + set nextcontext $tag ; + + + #Handle invalid tag nestings + switch -- $context { + QKEYVAL - + KEYVAL { + if {$tag in {KEYVAL QKEYVAL}} { + error "Invalid tag '$tag' encountered within '$context'" + } + } + MULTISTRING { + #explicitly list the valid child tags + if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { + error "Invalid tag '$tag' encountered within a MULTISTRING" + } + } + default { + #no context, or no defined nesting error for this context + } + } + + switch -- $tag { + TOMLISH { + #optional root tag. Ignore. + } + QKEYVAL - + KEYVAL { + if {$tag eq "KEYVAL"} { + append toml [lindex $item 1] ;#Key + } else { + append toml \"[lindex $item 1]\" ;#Quoted Key + } + foreach part [lrange $item 2 end] { + if {$part eq "="} { + append toml "=" + } else { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + } + } + TABLE { + append toml "\[[lindex $item 1]\]" ;#table name + foreach part [lrange $item 2 end] { + append toml [::tomlish::encode::tomlish [list $part] $nextcontext] + } + + } + ITABLE { + #inline table - e.g within array or on RHS of keyval/qkeyval + set data "" + foreach part [lrange $item 1 end] { + append data [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\{$data\}" + } + ARRAY { + + set arraystr "" + foreach part [lrange $item 1 end] { + append arraystr [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\[$arraystr\]" + } + WS { + append toml [lindex $item 1] + } + SEP { + append toml "," + } + NEWLINE { + set chartype [lindex $item 1] + if {$chartype eq "lf"} { + append toml \n + } elseif {$chartype eq "crlf"} { + append toml \r\n + } else { + error "Unrecognized newline type '$chartype'" + } + } + CONT { + #line continuation character "\" + append toml "\\" + } + STRING { + #simple double quoted strings only + # + return \"[lindex $item 1]\" + } + STRINGPART { + return [lindex $item 1] + } + MULTISTRING { + #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts + set multistring "" ;#variable to build up the string + foreach part [lrange $item 1 end] { + append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml "\"\"\"$multistring\"\"\"" + } + LITSTRING { + #Single Quoted string(literal string) + append toml '[lindex $item 1]' + } + MULTILITSTRING { + #review - multilitstring can be handled as a single string? + set litstring "" + foreach part [lrange $item 1 end] { + append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + } + append toml '''$litstring''' + } + INT - + BOOL - + FLOAT - + DATETIME { + append toml [lindex $item 1] + } + INCOMPLETE { + error "cannot process tomlish term tagged as INCOMPLETE" + } + COMMENT { + append toml "#[lindex $item 1]" + } + BOM { + #Byte Order Mark may appear at beginning of a file. Needs to be preserved. + append toml "\uFEFF" + } + default { + error "Not a properly formed 'tomlish' taggedlist.\n '$list'\n Unknown tag '[lindex $item 0]'. See output of \[tomlish::tags\] command." + } + } + + } + return $toml + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::encode ---}] +} +#fish toml from tomlish + +#(encode tomlish as toml) +interp alias {} tomlish::to_toml {} tomlish::encode::tomlish + +# + + +namespace eval tomlish::decode { + #*** !doctools + #[subsection {Namespace tomlish::decode}] + #[para] + #[list_begin definitions] + + #return a Tcl list of tomlish tokens + #i.e get a standard list of all the toml terms in string $s + #where each element of the list is a *tomlish* term.. i.e a specially 'tagged' Tcl list. + #(simliar to a tcl 'Huddle' - but also supporting whitespace preservation) + #Note that we deliberately don't check certain things such as duplicate table declarations here. + #Part of the justification for this is that as long as the syntax is toml shaped - we can load files which violate certain rules and allow programmatic manipulation. + # (e.g perhaps a toml editor to highlight violations for fixing) + # A further stage is then necessary to load the tomlish tagged list into a data structure more suitable for efficient query/reading. + # e.g dicts or an object oriented structure + #Note also - *no* escapes in quoted strings are processed. This is up to the datastructure stage + #e.g to_dict will substitute \r \n \uHHHH \UHHHHHHH etc + #This is important for tomlish to maintain the ability to perform competely lossless round-trips from toml to tomlish and back to toml. + # (which is handy for testing as well as editing some part of the structure with absolutely no effect on other parts of the document) + #If we were to unescape a tab character for example + # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. + # For this reason, we also do absolutely no line-ending transformations based on platform. + # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { + #*** !doctools + #[call [fun toml] [arg s]] + #[para] return a Tcl list of tomlish tokens + + namespace upvar ::tomlish::parse is_parsing is_parsing + set is_parsing 1 + + + if {[info command ::tomlish::parse::spacestack] eq "::tomlish::parse::spacestack"} { + tomlish::parse::spacestack destroy + } + struct::stack ::tomlish::parse::spacestack + + namespace upvar ::tomlish::parse last_space_action last_space_action + namespace upvar ::tomlish::parse last_space_type last_space_type + + + namespace upvar ::tomlish::parse tok tok + set tok "" + + namespace upvar ::tomlish::parse type type + namespace upvar ::tomlish::parse tokenType tokenType + ::tomlish::parse::set_tokenType "" + namespace upvar ::tomlish::parse tokenType_list tokenType_list + set tokenType [list] ;#Flat (un-nested) list of tokentypes found + + namespace upvar ::tomlish::parse lastChar lastChar + set lastChar "" + + + set result "" + namespace upvar ::tomlish::parse nest nest + set nest 0 + + namespace upvar ::tomlish::parse v v ;#array keyed on nest level + + + set v(0) {TOMLISH} + array set s0 [list] ;#whitespace data to go in {SPACE {}} element. + set parentlevel 0 + + namespace upvar ::tomlish::parse i i + set i 0 + + namespace upvar ::tomlish::parse state state + + namespace upvar ::tomlish::parse braceCount braceCount + set barceCount 0 + namespace upvar ::tomlish::parse bracketCount bracketCount + set bracketCount 0 + + set sep 0 + set r 1 + namespace upvar ::tomlish::parse token_waiting token_waiting + set token_waiting [dict create] ;#if ::tok finds a *complete* second token during a run, it will put the 2nd one here to be returned by the next call. + + + set state "key-space" + ::tomlish::parse::spacestack push {space key-space} + namespace upvar ::tomlish::parse linenum linenum;#'line number' of input data. (incremented for each literal linefeed - but not escaped ones in data) + set linenum 1 + + try { + while {$r} { + set r [::tomlish::parse::tok $s] + #puts stdout "got tok: '$tok' while parsing string '$s' " + set next_tokenType_known 0 ;#whether we begin a new token here based on what terminated the token result of 'tok' + + + + #puts "got token: '$tok' tokenType='$tokenType'. while v($nest) = [set v($nest)]" + #puts "-->tok: $tok tokenType='$tokenType'" + set prevstate $state + ##### + set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] + ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + + set state $nextstate + if {$state eq "err"} { + error "State error - aborting parse. [tomlish::parse::report_line]" + } + + if {$last_space_action eq "pop"} { + #pop_trigger_tokens: newline tablename endarray endinlinetable + #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. + switch -exact -- $tokenType { + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + tablename { + #note: a tablename only 'pops' if we are greater than zero + error "tablename pop should already have been handled as special case zeropoppushspace in getNextState" + } + tablearrayname { + #!review - tablearrayname different to tablename regarding push/pop? + #note: a tablename only 'pops' if we are greater than zero + error "tablearrayname pop should already have been handled as special case zeropoppushspace in getNextState" + } + endarray { + #nothing to do here. + } + comma { + #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" + } + endinlinetable { + puts stderr "endinlinetable" + } + endmultiquote { + puts stderr "endmultiquote for last_space_action 'pop'" + } + default { + error "unexpected tokenType '$tokenType' for last_space_action 'pop'" + } + } + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + + } elseif {$last_space_action eq "push"} { + incr nest 1 + set v($nest) [list] + # push_trigger_tokens: barekey quotedkey startinlinetable startarray tablename tablearrayname + switch -exact -- $tokenType { + barekey { + set v($nest) [list KEYVAL $tok] ;#$tok is the keyname + } + quotedkey - itablequotedkey { + set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname + } + tablename { + #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! + #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish + # back to toml file will be identical. + #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. + # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + + #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, + # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the + # tomlish list? + + set test_only [::tomlish::utils::tablename_trim $tok] + ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" + set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name + #note also that equivalent tablenames may have different toml representations even after being trimmed! + #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) + #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. + } + tablearrayname { + set test_only [::tomlish::utils::tablename_trim $tok] + puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name + } + startarray { + set v($nest) [list ARRAY] ;#$tok is just the opening bracket - don't output. + } + startinlinetable { + set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. + } + startmultiquote { + puts stderr "push trigger tokenType startmultiquote (todo)" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + #JMN ??? + #set next_tokenType_known 1 + #::tomlish::parse::set_tokenType "multistring" + #set tok "" + } + default { + error "push trigger tokenType '$tokenType' not yet implemented" + } + } + + } else { + #no space level change + switch -exact -- $tokenType { + starttablename { + #$tok is triggered by the opening bracket and sends nothing to output + } + starttablearrayname { + #$tok is triggered by the double opening brackets and sends nothing to output + } + tablename - tablenamearray { + error "did not expect 'tablename/tablearrayname' without space level change" + #set v($nest) [list TABLE $tok] + } + endtablename - endtablearrayname { + #no output into the tomlish list for this token + } + startinlinetable { + puts stderr "decode::toml error. did not expect startlinetable without space level change" + } + startquote { + switch -exact -- $nextstate { + string { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itablequotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "startquote switch case not implemented for nextstate: $nextstate" + } + } + } + startmultiquote { + #review + puts stderr "no space level change - got startmultiquote" + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringpart" + set tok "" + } + endquote { + #nothing to do? + set tok "" + } + endmultiquote { + #JMN!! + set tok "" + } + string { + lappend v($nest) [list STRING $tok] + } + stringpart { + lappend v($nest) [list STRINGPART $tok] + } + multistring { + #review + lappend v($nest) [list MULTISTRING $tok] + } + quotedkey { + #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + + } + untyped-value { + #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. + if {$tok in {true false}} { + set tag BOOL + } elseif {[::tomlish::utils::is_int $tok]} { + set tag INT + } elseif {[::tomlish::utils::is_float $tok]} { + set tag FLOAT + } elseif {[::tomlish::utils::is_datetime $tok]} { + set tag DATETIME + } else { + error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" + } + lappend v($nest) [list $tag $tok] + } + comment { + #puts stdout "----- comment token returned '$tok'------" + lappend v($nest) [list COMMENT "$tok"] + } + equal { + #we append '=' to the nest so that any surrounding whitespace is retained. + lappend v($nest) = + } + comma { + lappend v($nest) SEP + } + newline { + incr linenum + lappend v($nest) [list NEWLINE $tok] + } + whitespace { + lappend v($nest) [list WS $tok] + } + continuation { + lappend v($nest) CONT + } + bom { + lappend v($nest) BOM + } + eof { + #ok - nothing more to add to the tomlish list. + #!todo - check previous tokens are complete/valid? + } + default { + error "unknown tokenType '$tokenType' [::tomlish::parse::report_line]" + } + } + } + + if {!$next_tokenType_known} { + ::tomlish::parse::set_tokenType "" + set tok "" + } + + if {$state eq "end"} { + break + } + + + } + + #while {$nest > 0} { + # lappend v([expr {$nest -1}]) [set v($nest)] + # incr nest -1 + #} + while {[::tomlish::parse::spacestack size] > 1} { + ::tomlish::parse::spacestack pop + lappend v([expr {$nest -1}]) [set v($nest)] + incr nest -1 + + #set parent [spacestack peek] ;#the level being appended to + #lassign $parent type state + #if {$type eq "space"} { + # + #} elseif {$type eq "buffer"} { + # lappend v([expr {$nest -1}]) {*}[set v($nest)] + #} else { + # error "invalid spacestack item: $parent" + #} + } + + } finally { + set is_parsing 0 + } + + + return $v(0) + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::decode ---}] +} +#decode toml to tomlish +interp alias {} tomlish::from_toml {} tomlish::decode::toml + +namespace eval tomlish::utils { + #*** !doctools + #[subsection {Namespace tomlish::utils}] + #[para] + #[list_begin definitions] + + + #tablenames (& tablearraynames) may contain irrelevant leading, trailing and interspersed whitespace + # tablenames can be made up of segments delimited by dots. .eg [ a.b . c ] + #trimmed, the tablename becomes {a.b.c} + # A segment may contain whitespace if it is quoted e.g [a . b . "c etc " ] + #ie whitespace is only irrelevant if it's outside a quoted segment + #trimmed, the tablename becomes {a.b."c etc "} + proc tablename_trim {tablename} { + set segments [tablename_split $tablename false] + set trimmed_segments [list] + foreach seg $segments { + lappend trimmed_segments [::string trim $seg [list " " \t]] + } + return [join $trimmed_segments .] + } + + #utils::tablename_split + proc tablename_split {tablename {normalize false}} { + #we can't just split on . because we have to handle quoted segments which may contain a dot. + #eg {dog."tater.man"} + set i 0 + set sLen [::string length $tablename] + set segments [list] + set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + #quoted is for double-quotes, litquoted is for single-quotes (string literal) + set seg "" + for {} {$i < $sLen} {} { + + if {$i > 0} { + set lastChar [::string index $tablename [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $tablename $i] + incr i + + if {$c eq "."} { + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } + } + } elseif {($c eq "\"") && ($lastChar ne "\\")} { + if {$mode eq "unknown"} { + if {[::string trim $seg] ne ""} { + #we don't allow a quote in the middle of a bare key + error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" + } + set mode "quoted" + set seg "\"" + } elseif {$mode eq "unquoted"} { + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" ;#make sure we only accept a dot or end-of-data now. + } elseif {$mode eq "litquoted"} { + append seg $c + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } + } elseif {($c eq "\'")} { + if {$mode eq "unknown"} { + append seg $c + set mode "litquoted" + } elseif {$mode eq "unquoted"} { + #single quote inside e.g o'neill + append seg $c + } elseif {$mode eq "quoted"} { + append seg $c + + } elseif {$mode eq "litquoted"} { + append seg $c + lappend segments $seg + set seg "" + set mode "syntax" + } elseif {$mode eq "syntax"} { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + + } elseif {$c in [list " " \t]} { + if {$mode eq "syntax"} { + #ignore + } else { + append seg $c + } + } else { + if {$mode eq "syntax"} { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + if {$mode eq "unknown"} { + set mode "unquoted" + } + append seg $c + } + if {$i == $sLen} { + #end of data + ::tomlish::log::debug "End of data: mode='$mode'" + switch -exact -- $mode { + quoted { + if {$c ne "\""} { + error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" + } + if {$normalize} { + lappend segments $seg + } else { + lappend segments [::tomlish::utils::unescape_string [::string range $seg 1 end-1]] + #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong + } + } + litquoted { + set trimmed_seg [::string trim $seg] + if {[::string index $trimmed_seg end] ne "\'"} { + error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" + } + lappend segments $seg + } + unquoted - unknown { + lappend segments $seg + } + syntax { + #ok - segment already lappended + } + default { + lappend segments $seg + } + } + } + } + foreach seg $segments { + set trimmed [::string trim $seg [list " " \t]] + #note - we explicitly allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + #if {$trimmed in [list "''" "\"\""]} { + # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" + #} + if {$trimmed eq "" } { + error "tablename_split. Empty segment found. tablename: '$tablename'" + } + } + return $segments + } + + proc unicode_escape_info {slashu} { + #!todo + # validate that slashu is either a \uxxxx or \Uxxxxxxxx value of the correct length and + # is a valid 'unicode scalar value' + # ie integers in the range 0 to D7FF16 and E00016 to 10FFFF16 inclusive + #expr {(($x >= 0) && ($x <= 0xD7FF16)) || (($x >= 0xE00016) && ($x <= 0x10FFFF16))} + if {[::string match {\\u*} $slashu]} { + set exp {^\\u([0-9a-fA-F]{4}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %4x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } + } else { + return [list err [list reason "Supplied string not of the form \\uHHHH where H in \[0-9a-fA-F\]"]] + } + } elseif {[::string match {\\U*} $slashu]} { + set exp {^\\U([0-9a-fA-F]{8}$)} + if {[regexp $exp $slashu match hex]} { + if {[scan $hex %8x dec] != 1} { + #why would a scan ever fail after matching the regexp? !todo - review. unreachable branch? + return [list err [list reason "Failed to convert '$hex' to decimal"]] + } else { + if {(($dec >= 0) && ($dec <= 0xD7FF16)) || (($dec >= 0xE00016) && ($dec <= 0x10FFFF16))} { + return [list ok [list char [subst -nocommand -novariable $slashu]]] + } else { + return [list err [list reason "$slashu is not within the 'unicode scalar value' ranges 0 to 0xD7FF16 or 0xE00016 to 0x10FFFF16"]] + } + } + } else { + return [list err [list reason "Supplied string not of the form \\UHHHHHHHH where H in \[0-9a-fA-F\]"]] + } + } else { + return [list err [list reason "Supplied string did not start with \\u or \\U" ]] + } + + } + + proc unescape_string {str} { + #note we can't just use Tcl subst because: + # it also transforms \a (audible bell) and \v (vertical tab) which are not in the toml spec. + # it would strip out backslashes inappropriately: e.g "\j" becomes just j + # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn + # it replaces\ with a single whitespace + #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + + set buffer "" + set buffer4 "" ;#buffer for 4 hex characters following a \u + set buffer8 "" ;#buffer for 8 hex characters following a \u + + set sLen [::string length $str] + + #we need to handle arbitrarily long sequences of backslashes. \\\\\ etc + set slash_active 0 + set unicode4_active 0 + set unicode8_active 0 + + + #!todo - check for invalid data in the form of a raw carriage return (decimal 13) without following linefeed? + set i 0 + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [::string index $str [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [::string index $str $i] + ::tomlish::log::debug "unescape_string. got char $c" + scan $c %c n + if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { + #we don't expect unescaped unicode characters from 0000 to 001F - + #*except* for raw tab (which is whitespace) and newlines + error "unescape_string. Invalid data for a toml string. Unescaped control character (decimal $n) [::tomlish::utils::string_to_slashu $c]" + } + incr i ;#must incr here because we do'returns'inside the loop + if {$c eq "\\"} { + if {$slash_active} { + append buffer "\\" + set slash_active 0 + } elseif {$unicode4_active} { + error "unescape_string. unexpected case slash during unicode4 not yet handled" + } elseif {$unicode8_active} { + error "unescape_string. unexpected case slash during unicode8 not yet handled" + } else { + # don't output anything (yet) + set slash_active 1 + } + } else { + if {$unicode4_active} { + if {[::string length $buffer4] < 4} { + append buffer4 $c + } + if {[::string length $buffer4] == 4} { + #we have a \uHHHH to test + set unicode4_active 0 + set result [tomlish::utils::unicode_escape_info "\\u$buffer4"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$unicode8_active} { + if {[::string length $buffer8] < 8} { + append buffer8 $c + } + if {[::string length $buffer8] == 8} { + #we have a \UHHHHHHHH to test + set unicode8_active 0 + set result [tomlish::utils::unicode_escape_info "\\U$buffer8"] + if {[lindex $result 0] eq "ok"} { + append buffer [dict get $result ok char] + } else { + error "unescape_string error: [lindex $result 1]" + } + } + } elseif {$slash_active} { + set slash_active 0 + set ctest [string map {{"} dq} $c] + switch -exact -- $ctest { + dq { + set e "\\\"" + append buffer [subst -nocommand -novariable $e] + } + b - t - n - f - r { + set e "\\$c" + append buffer [subst -nocommand -novariable $e] + } + u { + set unicode4_active 1 + set buffer4 "" + } + U { + set unicode8_active 1 + set buffer8 "" + } + default { + set slash_active 0 + + append buffer "\\" + append buffer $c + } + } + } else { + append buffer $c + } + } + } + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + if {$unicode4_active} { + error "End of string reached before complete unicode escape sequence \uHHHH" + } + if {$unicode8_active} { + error "End of string reached before complete unicode escape sequence \UHHHHHHHH" + } + if {$slash_active} { + append buffer "\\" + } + return $buffer + } + + proc normalize_key {rawkey} { + set c1 [::string index $rawkey 0] + set c2 [::string index $rawkey end] + if {($c1 eq "'") && ($c2 eq "'")} { + #single quoted segment. No escapes allowed within it. + set key [::string range $rawkey 1 end-1] + } elseif {($c1 eq "\"") && ($c2 eq "\"")} { + #double quoted segment. Apply escapes. + # + set keydata [::string range $rawkey 1 end-1] ;#strip outer quotes only + set key [::tomlish::utils::unescape_string $keydata] + #set key [subst -nocommands -novariables $keydata] ;#wrong. Todo - create a string escape substitution function. + } else { + set key $rawkey + } + return $key + } + + proc string_to_slashu {string} { + set rv {} + foreach c [split $string {}] { + scan $c %c c + append rv {\u} + append rv [format %.4X $c] + } + return $rv + } + + #'nonprintable' is conservative here because some systems (e.g windows console) are very limited in what they can display. + proc nonprintable_to_slashu {s} { + set res "" + foreach i [split $s ""] { + scan $i %c c + + set printable 0 + if {($c>31) && ($c<127)} { + set printable 1 + } + if {$printable} {append res $i} else {append res \\u[format %.4X $c]} + } + set res + } ;#RS + + #check if str is valid for use as a toml bare key + proc is_barekey {str} { + if {[::string length $str] == 0} { + return 0 + } else { + set matches [regexp -all {[a-zA-Z0-9\_\-]} $str] + if {[::string length $str] == $matches} { + #all characters match the regexp + return 1 + } else { + return 0 + } + } + } + + #test only that the characters in str are valid for the toml specified type 'integer'. + proc int_validchars1 {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_int {str} { + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] + + if {[tcl::string::length $str] == $matches} { + #all characters in legal range + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { + return 0 + } + # --------------------------------------- + + #check +,- only occur in the first position. + if {[::string last - $str] > 0} { + return 0 + } + if {[::string last + $str] > 0} { + return 0 + } + set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![string is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. + #presumably very large numbers would have to be supplied in a toml file as strings. + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max + if {$numeric_value > $::tomlish::max_int} { + return 0 + } + if {$numeric_value < $::tomlish::min_int} { + return 0 + } + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'float'. + proc float_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { + return 1 + } else { + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } + } + } + + proc is_float {str} { + set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } + + if {[::string length $str] == $matches} { + #all characters in legal range + #A leading zero is ok, but we should disallow multiple leading zeroes (same rules as toml ints) + #Toml spec also disallows leading zeros in the exponent part + #... but this seems less interoperable (some libraries generate leading zeroes in exponents) + #for now we will allow leading zeros in exponents + #!todo - configure 'strict' option to disallow? + #first strip any +, - or _ (just for this test) + set check [::string map {+ "" - "" _ ""} $str] + set r {([0-9])*} + regexp $r $check intpart ;#intpart holds all numerals before the first .,e or E + set z {([0])*} + regexp $z $intpart leadingzeros + if {[::string length $leadingzeros] > 1} { + return 0 + } + #for floats, +,- may occur in multiple places + #e.g -2E-22 +3e34 + #!todo - check bounds ? + + #strip underscores for tcl double check + set check [::string map {_ ""} $str] + #string is double accepts inf nan +NaN etc. + if {![::string is double $check]} { + return 0 + } + + } else { + return 0 + } + #Got this far - didn't find anything wrong with it. + return 1 + } + + #test only that the characters in str are valid for the toml specified type 'datetime'. + proc datetime_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[zZtT0-9\-\+\.:]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } + + proc is_datetime {str} { + #e.g 1979-05-27T00:32:00-07:00 + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] + if {[::string length $str] == $matches} { + #all characters in legal range + #!todo - use full RFC 3339 parser? + lassign [split $str T] datepart timepart + #!todo - what if the value is 'time only'? + + if {[catch {clock scan $datepart} err]} { + puts stderr "tcl clock scan failed err:'$err'" + return 0 + } + #!todo - verify time part is reasonable + } else { + return 0 + } + return 1 + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::utils ---}] +} + +namespace eval tomlish::parse { + #*** !doctools + #[subsection {Namespace tomlish::parse}] + #[para] + #[list_begin definitions] + + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text + + variable state + # states: + # key-space, curly-space, array-space + # value-expected, keyval-syntax, doublequoted, singlequoted, quotedkey, string, multistring + # + # notes: + # key-space i + # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack + # value-expected - we also allow for leading whitespace in this state, but once a value is returned we jump to a state based on the containing space. e.g keytail or array-syntax + # + #stateMatrix defines for each state, actions to take for each possible token. + #single-element actions are the name of the next state into which to transition, or a 'popspace' command to pop a level off the spacestack and add the data to the parent container. + #dual-element actions are a push command and the name of the space to push on the stack. + # - pushspace is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root key-space) + + #test + variable stateMatrix + set stateMatrix [dict create] + + dict set stateMatrix\ + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + + + dict set stateMatrix\ + curly-space {\ + whitespace "curly-space"\ + newline "curly-space"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + #REVIEW + #toml spec looks like heading towards allowing newlines within inline tables + #https://github.com/toml-lang/toml/issues/781 + dict set stateMatrix\ + curly-syntax {\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + eof "err"\ + comment "err"\ + } + + + dict set stateMatrix\ + value-expected {\ + whitespace "value-expected"\ + newline "err"\ + eof "err"\ + untyped-value "samespace"\ + startquote "string"\ + startmultiquote {pushspace "multistring-space"}\ + startinlinetable {pushspace curly-space}\ + comment "err"\ + comma "err"\ + startarray {pushspace array-space}\ + } + dict set stateMatrix\ + array-space {\ + whitespace "array-space"\ + newline "array-space"\ + eof "err"\ + untyped-value "samespace"\ + startarray {pushspace "array-space"}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "array-space"\ + } + dict set stateMatrix\ + array-syntax {\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped-value "samespace"\ + startarray {pushspace array-space}\ + endarray "popspace"\ + startquote "string"\ + startmultiquote "multistring"\ + comma "array-space"\ + comment "err"\ + } + + + dict set stateMatrix\ + itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + #dict set stateMatrix\ + # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + dict set stateMatrix\ + itablekeyval-space {} + dict set stateMatrix\ + itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + + + dict set stateMatrix\ + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + dict set stateMatrix\ + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + dict set stateMatrix\ + keyval-space {} + + + + dict set stateMatrix\ + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + dict set stateMatrix\ + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + dict set stateMatrix\ + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + dict set stateMatrix\ + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + dict set stateMatrix\ + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + dict set stateMatrix\ + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + dict set stateMatrix\ + baretablename {whitespace "NA" newline "err" equal "value-expected"} + dict set stateMatrix\ + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + dict set stateMatrix\ + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + dict set stateMatrix\ + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + dict set stateMatrix\ + end {} + + #see also spacePopTransitions and spacePushTransitions below for state redirections on pop/push + variable stateMatrix_orig { + key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} + curly-space {whitespace "curly-space" newline "curly-space" barekey {pushspace "keyval-space"} quotedkey {pushspace "keyval-space"} startcurly {pushspace curly-space} endcurly "popspace" eof "err"} + value-expected {whitespace "value-expected" newline "err" eof "err" untyped-value "samespace" startquote "string" startmultiquote {pushspace "multistring-space"} comment "err" comma "err" startarray {pushspace array-space}} + array-space {whitespace "array-space" newline "array-space" eof "err" untyped-value "samespace" startarray {pushspace "array-space"} endarray "popspace" startquote "string" startmultiquote "multistring" comma "array-space" comment "array-space"} + array-syntax {whitespace "array-syntax" newline "array-syntax" comma "array-space" untyped-value "samespace" startquote "string" startmultiquote "multistring" startarray {pushspace array-space} comment "err" endarray "popspace"} + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} + keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} + keyval-space {} + quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} + tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} + tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} + tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} + end {} + } + #build a list of 'push triggers' from the stateMatrix + # ie tokens which can push a new space onto spacestack + set push_trigger_tokens [list] + tcl::dict::for {s transitions} $stateMatrix { + tcl::dict::for {token transition_to} $transitions { + set action [lindex $transition_to 0] + switch -exact -- $action { + pushspace - zeropoppushspace { + if {$token ni $push_trigger_tokens} { + lappend push_trigger_tokens $token + } + } + } + } + } + puts stdout "push_trigger_tokens: $push_trigger_tokens" + #!todo - hard code once stateMatrix finalised? + + + #mainly for the -space states: + #redirect to another state $c based on a state transition from $whatever to $b + # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. + #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' + variable spacePopTransitions { + array-space array-syntax + curly-space curly-syntax + keyval-space keytail + itablekeyval-space itablevaltail + } + variable spacePushTransitions { + keyval-space keyval-syntax + itablekeyval-space itablekeyval-syntax + array-space array-space + curly-space curly-space + key-space tablename + } + + + variable state_list + + namespace export tomlish toml + namespace ensemble create + + proc getNextState {tokentype currentstate} { + variable nest + variable v + + variable spacePopTransitions + variable spacePushTransitions + variable last_space_action "none" + variable last_space_type "none" + variable state_list + + set result "" + + if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { + set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] + ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" + switch -exact -- [lindex $transition_to 0] { + popspace { + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + samespace { + #note the same data as popspace (spacePopTransitions) is used here. + set parent [spacestack peek] + ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" + lassign $parent type target + if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { + set next [dict get $::tomlish::parse::spacePopTransitions $target] + ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + zeropoppushspace { + if {$nest > 0} { + #pop back down to the root level (key-space) + spacestack pop + set parent [spacestack peek] + lassign $parent type target + set last_space_action "pop" + set last_space_type $type + + #----- + #standard pop + set parentlevel [expr {$nest -1}] + lappend v($parentlevel) [set v($nest)] + incr nest -1 + #----- + } + #re-entrancy + + #set next [list pushspace [lindex $transition_to 1]] + set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 + ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" + set result [::tomlish::parse::getNextState $nexttokentype $tokentype] + } + pushspace { + set target [lindex $transition_to 1] + spacestack push [list space $target] + set last_space_action "push" + set last_space_type "space" + + #puts $::tomlish::parse::spacePushTransitions + if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { + set next [dict get $::tomlish::parse::spacePushTransitions $target] + ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" + } else { + set next $target + } + set result $next + } + default { + set result $transition_to + } + } + } else { + set result "nostate-err" + + } + lappend state_list $result + return $result + } + + proc report_line {{line ""}} { + variable linenum + variable is_parsing + if {$is_parsing} { + if {$line eq ""} { + set line $linenum + } + return "Line Number: $line" + } else { + #not in the middle of parsing tomlish text - return nothing. + return "" + } + } + + #produce a *slightly* more readable string rep of the nest for puts etc. + proc nest_pretty1 {list} { + set prettier "{" + + foreach el $list { + if { [lindex $el 0] eq "NEWLINE"} { + append prettier "[list $el]\n" + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + append prettier [nest_pretty1 $el] + } else { + append prettier "[list $el] " + } + } + append prettier "}" + return $prettier + } + + proc set_tokenType {t} { + variable tokenType + variable tokenType_list + if {![info exists tokenType]} { + set tokenType "" + } + lappend tokenType_list $t + set tokenType $t + } + + proc switch_tokenType {t} { + variable tokenType + variable tokenType_list + lset tokenType_list end $t + set tokenType $t + } + + proc get_tokenType {} { + variable tokenType + return $tokenType + } + + proc _shortcircuit_startquotesequence {} { + variable tok + variable i + set toklen [::string length $tok] + if {$toklen == 1} { + set_tokenType "startquote" + incr i -1 + return -level 2 1 + } elseif {$toklen == 2} { + set_tokenType "startquote" + set tok "\"" + incr i -2 + return -level 2 1 + } + } + + #return a list of 0 1 or 2 tokens + #tomlish::parse::tok + proc tok {s} { + variable nest + variable v + variable i + variable tok + variable type ;#character type + variable state ;#FSM + + set resultlist [list] + + variable tokenType + variable tokenType_list + + + variable endToken + set sLen [::string length $s] + + variable lastChar + + variable braceCount + variable bracketCount + + + #------------------------------ + #Previous run found another (presumably single-char) token + variable token_waiting + if {[dict size $token_waiting]} { + set tokenType [dict get $token_waiting type] + set tok [dict get $token_waiting tok] + dict unset token_waiting type + dict unset token_waiting tok + return 1 + } + #------------------------------ + + set slash_active 0 + set quote 0 + set c "" + set multi_dquote "" + for {} {$i < $sLen} {} { + if {$i > 0} { + set lastChar [string index $s [expr {$i - 1}]] + } else { + set lastChar "" + } + + set c [string index $s $i] + #puts "got char $c during tokenType '$tokenType'" + incr i ;#must incr here because we do'returns'inside the loop + + set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + switch -exact -- $ctest { + # { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" + } + whitespace { + # hash marks end of whitespace token + #do a return for the whitespace, set token_waiting + #dict set token_waiting type comment + #dict set token_waiting tok "" + incr i -1 ;#leave comment for next run + return 1 + } + untyped-value { + #REVIEW! the spec isn't clear.. is whitespace after an int,bool etc required before comment? + #we will accept a comment marker as an immediate terminator of the untyped-value. + incr i -1 + return 1 + } + default { + #quotedkey, string, multistring + append tok $c + } + } + } else { + #$slash_active not relevant when no tokenType + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } + lc { + set multi_dquote "" ;#!! + #test jmn2024 + #left curly brace + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + error "unexpected tablename problem" + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + switch -exact -- $state { + value-expected { + #switch last key to tablename?? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + multistring-space { + set_tokenType "stringpart" + if {$slash_active} { + set tok "\\\{" + } else { + set tok "\{" + } + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + array-space - array-syntax { + #nested anonymous inline table + set_tokenType "startinlinetable" + set tok "\{" + return 1 + } + default { + error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + rc { + set multi_dquote "" ;#!! + #right curly brace + try { + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endinlinetable + dict set token_waiting tok "" + return 1 + } + tablearrayname { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + itablevaltail { + + } + default { + #end any other token + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endinlinetable" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + curly-syntax - curly-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + array-syntax - array-space { + #invalid + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } + itablevaltail { + set_tokenType "endinlinetable" + set tok "" + #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 + incr i -1 + return 1 + } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } + default { + #JMN2024b keytail? + error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + + } + lb { + set multi_dquote "" ;#!! + #left square bracket + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename { + #$slash_active not relevant to this tokentype + #change the tokenType + switch_tokenType "starttablearrayname" + set tok "" ;#no output into the tomlish list for this token + #any following whitespace is part of the tablearrayname, so return now + return 1 + } + comment { + if {$slash_active} {append tok "\\"} + append tok "\[" + } + default { + #end any other token. + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + set_tokenType "startarray" + set tok "\[" + return 1 + } + key-space { + #table name + #assume it's a single bracket - but we need to wait for non-bracket to confirm it's not a tablearray + #note that a starttablearrayname token may contain whitespace between the brackets + # e.g \[ \[ + set_tokenType "starttablename" + set tok "" ;#there is no output into the tomlish list for this token + } + array-space - array-syntax { + #nested array? + set_tokenType "startarray" + set tok "\[" + return 1 + #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" + } + default { + error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + rb { + set multi_dquote "" ;#!! + #right square bracket + try { + + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } + tablearraynames { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + error "unexpected tablename problem" + + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + #backslash + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - litstring - multilitstring - comment - tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" + } else { + set slash_active 1 + } + } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + barekey { + error "Unexpected backslash during barekey. [tomlish::parse::report_line]" + } + default { + error "Backslash unexpected during tokentype: '$tokenType'. [tomlish::parse::report_line]" + } + } + } else { + if {$state eq "multistring-space"} { + set slash_active 1 + } else { + error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } + } + } + dq { + #double quote + try { + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length in 'startquotesequence'" + } + } + endquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "endmultiquote" + return 1 + } else { + error "unexpected token length in 'endquotesequence'" + } + } + string { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + stringpart { + #sub element of multistring + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + value-expected { + if {$multi_dquote eq "\"\""} { + dict set token_waiting type startmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + #end whitespace token and reprocess + incr i -1 + return 1 + #append multi_dquote "\"" + } + } + default { + dict set token_waiting type startquote + dict set token_waiting tok "\"" + return 1 + } + } + } + comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + tablename - tablearrayname { + if {$slash_active} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + key-space { + set tokenType startquote + set tok $c + return 1 + } + curly-space { + set tokenType startquote + set tok $c + return 1 + } + tablename - tablearrayname { + set_tokenType $state + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" + } + } + } + } on error {em} { + error $em + } finally { + set slash_active 0 + } + } + = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey { + #for these tokenTypes an = is just data. + append tok $c + } + stringpart { + append tok $dquotes$c + } + whitespace { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + barekey { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } + default { + error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok ${dquotes}= + } + default { + set_tokenType equal + set tok = + return 1 + } + } + } + } + cr { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \r carriage return + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + stringpart { + append tok $dquotes$c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } + } + } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) + set_tokenType newline + set tok cr + } + } + lf { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + # \n newline + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + newline { + #this lf is the trailing part of a crlf + append tok lf + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + } else { + set had_slash $slash_active + set slash_active 0 + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + set_tokenType newline + set tok lf + return 1 + } + } + } + , { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - comment - quotedkey - tablename - tablearrayname { + append tok $c + } + stringpart { + append tok $dquotes$c + } + default { + dict set token_waiting type comma + dict set token_waiting tok "," + return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "," + } + multiliteral-space { + set_tokenType literalpart + set tok "," + } + default { + set_tokenType comma + set tok "," + return 1 + } + } + } + } + . { + set multi_dquote "" ;#!! + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment - quotedkey - untyped-value { + append tok $c + } + baretablename - tablename - tablearrayname { + #subtable - split later - review + append tok $c + } + barekey { + #we need to transition the barekey to become a structured table name ??? review + switch_tokenType tablename + incr i -1 + + #error "barekey period unimplemented" + } + default { + error "Received period during tokenType '$tokenType' [tomlish::parse::report_line]" + #dict set token_waiting type period + #dict set token_waiting tok "." + #return 1 + } + } + } else { + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "." + } + multiliteral-space { + set_tokenType literalpart + set tok "." + } + default { + set_tokenType untyped-value + set tok "." + } + } + } + + } + " " { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + if {[::string length $tokenType]} { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + untyped-value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + comment { + if {$had_slash} { + append tok "\\" + } + append tok $c + } + quotedkey - string { + if {$had_slash} { + append tok "\\" + } + #if {$dquotes eq "\""} { + #} + append tok $c + } + whitespace { + append tok $c + } + stringpart { + if {$had_slash} { + #REVIEW + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + #keeping WS separate allows easier processing of CONT stripping + append tok $dquotes + incr i -1 + return 1 + } + } + starttablename { + incr i -1 + return 1 + } + starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearrayname { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace space during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + if {$had_slash} { + set tok "\\$c" + } else { + set tok $c + } + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return + } + set_tokenType "whitespace" + append tok $c + } + } + default { + if {$had_slash} { + error "unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } + } + } + } + tab { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set token_waiting type whitespace + #set token_waiting tok $c + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + quotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + append tok $dquotes$c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + default { + set_tokenType "whitespace" + append tok $c + } + } + } + } + bom { + #BOM (Byte Order Mark) - ignored by token consumer + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + default { + set dquotes $multi_dquote + set multi_dquote "" ;#!! + + if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + endquotesequence { + puts stderr "endquotesequence: $tok" + } + whitespace { + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } + barekey { + if {[tomlish::utils::is_barekey $c]} { + append tok $c + } else { + error "Unexpected character '$c' during bare key. Only \[a-zA-Z0-9_-\] allowed. [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + incr i -1 + #allow statemachine to set context for subsequent chars + return 1 + } + stringpart { + append tok $dquotes$c + } + default { + #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + append tok $c + } + } + } else { + set had_slash $slash_active + set slash_active 0 + switch -exact -- $state { + key-space - curly-space - curly-syntax { + #if no currently active token - assume another key value pair + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } + tablename { + set_tokenType "tablename" + set tok $c + } + tablearrayname { + set_tokenType "tablearrayname" + set tok $c + } + default { + set_tokenType "untyped-value" + set tok $c + } + } + } + } + } + + } + + #run out of characters (eof) + if {[::string length $tokenType]} { + #check for invalid ending tokens + #if {$state eq "err"} { + # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" + #} + if {$tokenType eq "startquotesequence"} { + set toklen [::string length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + eror "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #dict set token_waiting type "string" + #dict set token_waiting tok "" + return 1 + } + } + dict set token_waiting type "eof" + dict set token_waiting tok "eof" + return 1 + } else { + ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + set tokenType "eof" + set tok "eof" + } + return 0 + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::parse ---}] +} + +tcl::namespace::eval tomlish::app { + variable applist [list encoder decoder test] + + #*** !doctools + #[subsection {Namespace tomlish::app}] + #[para] + #[list_begin definitions] + + proc decoder {args} { + #*** !doctools + #[call app::[fun decoder] [arg args]] + #[para] read toml on stdin until EOF + #[para] on error - returns non-zero exit code and writes error on stderr + #[para] on success - returns zero exit code and writes JSON encoding of the data on stdout + #[para] This decoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + #fconfigure stdin -encoding utf-8 + fconfigure stdin -translation binary + #Just slurp it all - presumably we are not handling massive amounts of data on stdin. + # - even if the input is large, we probably don't gain much (aside from possible memory savings?) by attempting to process input as it arrives. + if {[catch { + set toml [read stdin] + }]} { + exit 2 ;#read error + } + try { + set j [::tomlish::toml_to_json $toml] + } on error {em} { + puts stderr "decoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $j + exit 0 + } + + proc encoder {args} { + #*** !doctools + #[call app::[fun encoder] [arg args]] + #[para] read JSON on stdin until EOF + #[para] return non-zero exitcode if JSON data cannot be converted to a valid TOML representation + #[para] return zero exitcode and TOML data on stdout if JSON data can be converted. + #[para] This encoder is intended to be compatible with toml-test + + set opts [dict merge [dict create] $args] + fconfigure stdin -translation binary + if {[catch { + set json [read stdin] + }]} { + exit 2 ;#read error + } + try { + set toml [::tomlish::json_to_toml $json] + } on error {em} { + puts stderr "encoding failed: '$em'" + exit 1 + } + puts -nonewline stdout $toml + exit 0 + } + + proc test {args} { + set opts [dict merge [dict create] $args] + + package require test::tomlish + if {[dict exists $opts -suite]} { + test::tomlish::suite [dict get $opts -suite] + } + test::tomlish::run + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::app ---}] +} + +proc ::tomlish::appnames {} { + set applist [list] + foreach cmd [info commands ::tomlish::app::*] { + lappend applist [namespace tail $cmd] + } + return $applist +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval tomlish::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace tomlish::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace tomlish::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +if {$argc > 0} { + puts stderr "argc: $argc args: $argv" + + if {($argc == 1)} { + if {[::string tolower $argv] in {help -help h -h}} { + puts stdout "Usage: -app where appname one of:[tomlish::appnames]" + exit 0 + } else { + puts stderr "Argument '$argv' not understood. Try -help" + exit 1 + } + } + set opts [dict create] + set opts [dict merge $opts $argv] + + set opts_understood [list -app ] + if {"-app" in [dict keys $opts]} { + #Don't vet the remaining opts - as they are interpreted by each app + } else { + foreach key [dict keys $opts] { + if {$key ni $opts_understood} { + puts stderr "Option '$key' not understood" + exit 1 + } + } + } + if {[dict exists $opts -app]} { + set app [dict get $opts -app] + if {$app ni [tomlish::appnames]} { + puts stderr "app '[dict get $opts -app]' not found" + exit 1 + } + tomlish::app::$app {*}$opts + } +} + +## Ready +package provide tomlish [namespace eval tomlish { + variable pkg tomlish + variable version + set version 1.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/uuid-1.0.8.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/uuid-1.0.8.tm new file mode 100644 index 00000000..c5cffa67 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/uuid-1.0.8.tm @@ -0,0 +1,246 @@ +# uuid.tcl - Copyright (C) 2004 Pat Thoyts +# +# UUIDs are 128 bit values that attempt to be unique in time and space. +# +# Reference: +# http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt +# +# uuid: scheme: +# http://www.globecom.net/ietf/draft/draft-kindel-uuid-uri-00.html +# +# Usage: uuid::uuid generate +# uuid::uuid equal $idA $idB + +package require Tcl 8.5 9 + +namespace eval uuid { + variable accel + array set accel {critcl 0} + + namespace export uuid + + variable uid + if {![info exists uid]} { + set uid 1 + } + + proc K {a b} {set a} +} + +### +# Optimization +# Caches machine info after the first pass +### + +proc ::uuid::generate_tcl_machinfo {} { + variable machinfo + if {[info exists machinfo]} { + return $machinfo + } + lappend machinfo [clock seconds]; # timestamp + lappend machinfo [clock clicks]; # system incrementing counter + lappend machinfo [info hostname]; # spatial unique id (poor) + lappend machinfo [pid]; # additional entropy + lappend machinfo [array get ::tcl_platform] + + ### + # If we have /dev/urandom just stream 128 bits from that + ### + if {[file exists /dev/urandom]} { + set fin [open /dev/urandom r] + fconfigure $fin -encoding binary + binary scan [read $fin 128] H* machinfo + close $fin + } elseif {[catch {package require nettool}]} { + # More spatial information -- better than hostname. + # bug 1150714: opening a server socket may raise a warning messagebox + # with WinXP firewall, using ipconfig will return all IP addresses + # including ipv6 ones if available. ipconfig is OK on win98+ + if {[string equal $::tcl_platform(platform) "windows"]} { + catch {exec ipconfig} config + lappend machinfo $config + } else { + catch { + set s [socket -server void -myaddr [info hostname] 0] + K [fconfigure $s -sockname] [close $s] + } r + lappend machinfo $r + } + + if {[package provide Tk] != {}} { + lappend machinfo [winfo pointerxy .] + lappend machinfo [winfo id .] + } + } else { + ### + # If the nettool package works on this platform + # use the stream of hardware ids from it + ### + lappend machinfo {*}[::nettool::hwid_list] + } + return $machinfo +} + +# Generates a binary UUID as per the draft spec. We generate a pseudo-random +# type uuid (type 4). See section 3.4 +# +proc ::uuid::generate_tcl {} { + package require md5 2 + variable uid + + set tok [md5::MD5Init] + md5::MD5Update $tok [incr uid]; # package incrementing counter + foreach string [generate_tcl_machinfo] { + md5::MD5Update $tok $string + } + set r [md5::MD5Final $tok] + binary scan $r c* r + + # 3.4: set uuid versioning fields + lset r 8 [expr {([lindex $r 8] & 0x3F) | 0x80}] + lset r 6 [expr {([lindex $r 6] & 0x0F) | 0x40}] + + return [binary format c* $r] +} + +if {[string equal $tcl_platform(platform) "windows"] + && [package provide critcl] != {}} { + namespace eval uuid { + critcl::ccode { + #define WIN32_LEAN_AND_MEAN + #define STRICT + #include + #include + typedef long (__stdcall *LPFNUUIDCREATE)(UUID *); + typedef const unsigned char cu_char; + } + critcl::cproc generate_c {Tcl_Interp* interp} ok { + HRESULT hr = S_OK; + int r = TCL_OK; + UUID uuid = {0}; + HMODULE hLib; + LPFNUUIDCREATE lpfnUuidCreate = NULL; + hLib = LoadLibraryA(("rpcrt4.dll")); + if (hLib) + lpfnUuidCreate = (LPFNUUIDCREATE) + GetProcAddress(hLib, "UuidCreate"); + if (lpfnUuidCreate) { + Tcl_Obj *obj; + lpfnUuidCreate(&uuid); + obj = Tcl_NewByteArrayObj((cu_char *)&uuid, sizeof(uuid)); + Tcl_SetObjResult(interp, obj); + } else { + Tcl_SetResult(interp, "error: failed to create a guid", + TCL_STATIC); + r = TCL_ERROR; + } + return r; + } + } +} + +# Convert a binary uuid into its string representation. +# +proc ::uuid::tostring {uuid} { + binary scan $uuid H* s + foreach {a b} {0 7 8 11 12 15 16 19 20 end} { + append r [string range $s $a $b] - + } + return [string tolower [string trimright $r -]] +} + +# Convert a string representation of a uuid into its binary format. +# +proc ::uuid::fromstring {uuid} { + return [binary format H* [string map {- {}} $uuid]] +} + +# Compare two uuids for equality. +# +proc ::uuid::equal {left right} { + set l [fromstring $left] + set r [fromstring $right] + return [string equal $l $r] +} + +# Call our generate uuid implementation +proc ::uuid::generate {} { + variable accel + if {$accel(critcl)} { + return [generate_c] + } else { + return [generate_tcl] + } +} + +# uuid generate -> string rep of a new uuid +# uuid equal uuid1 uuid2 +# +proc uuid::uuid {cmd args} { + switch -exact -- $cmd { + generate { + if {[llength $args] != 0} { + return -code error "wrong # args:\ + should be \"uuid generate\"" + } + return [tostring [generate]] + } + equal { + if {[llength $args] != 2} { + return -code error "wrong \# args:\ + should be \"uuid equal uuid1 uuid2\"" + } + return [eval [linsert $args 0 equal]] + } + default { + return -code error "bad option \"$cmd\":\ + must be generate or equal" + } + } +} + +# ------------------------------------------------------------------------- + +# LoadAccelerator -- +# +# This package can make use of a number of compiled extensions to +# accelerate the digest computation. This procedure manages the +# use of these extensions within the package. During normal usage +# this should not be called, but the test package manipulates the +# list of enabled accelerators. +# +proc ::uuid::LoadAccelerator {name} { + variable accel + set r 0 + switch -exact -- $name { + critcl { + if {![catch {package require tcllibc}]} { + set r [expr {[info commands ::uuid::generate_c] != {}}] + } + } + default { + return -code error "invalid accelerator package:\ + must be one of [join [array names accel] {, }]" + } + } + set accel($name) $r +} + +# ------------------------------------------------------------------------- + +# Try and load a compiled extension to help. +namespace eval ::uuid { + variable e {} + foreach e {critcl} { + if {[LoadAccelerator $e]} break + } + unset e +} + +package provide uuid 1.0.8 + +# ------------------------------------------------------------------------- +# Local variables: +# mode: tcl +# indent-tabs-mode: nil +# End: diff --git a/src/testansi/palettes/AppleII.ans b/src/testansi/palettes/AppleII.ans new file mode 100644 index 00000000..b3a72d82 --- /dev/null +++ b/src/testansi/palettes/AppleII.ans @@ -0,0 +1,9 @@ +]4;0;rgb:00/00/00]4;1;rgb:e3/1e/60]4;2;rgb:00/a3/60]4;3;rgb:60/72/03]4;4;rgb:60/4e/bd]4;5;rgb:ff/44/fd]4;6;rgb:d0/c3/ff]4;7;rgb:9c/9c/9c]4;8;rgb:9c/9c/9c]4;9;rgb:ff/6a/3c]4;10;rgb:14/f5/3c]4;11;rgb:d0/dd/8d]4;12;rgb:14/cf/fd]4;13;rgb:ff/a0/d0]4;14;rgb:72/ff/d0]4;15;rgb:ff/ff/ff + +Apple II colors palette +Change the console palette to colors from the Apple II colors palette, +while trying to keep colors indexes similar to CGA. +This is designed to bring back memories when seeing ANSI-art in Apple II +colors, but isn't a compatible palette and doesn't match Apple II indexes. + +by Philippe Majerus (www.phm.lu) diff --git a/src/testansi/palettes/Solarized.ans b/src/testansi/palettes/Solarized.ans new file mode 100644 index 00000000..c5224fb8 --- /dev/null +++ b/src/testansi/palettes/Solarized.ans @@ -0,0 +1,12 @@ +]4;0;rgb:07/36/42]4;1;rgb:dc/32/2f]4;2;rgb:85/99/00]4;3;rgb:b5/89/00]4;4;rgb:26/8b/d2]4;5;rgb:d3/36/82]4;6;rgb:2a/a1/98]4;7;rgb:ee/e8/d5]4;8;rgb:00/2b/36]4;9;rgb:cb/4b/16]4;10;rgb:58/6e/75]4;11;rgb:65/7b/83]4;12;rgb:83/94/96]4;13;rgb:6c/71/c4]4;14;rgb:93/a1/a1]4;15;rgb:fd/f6/e3 + +Solarized colors palette +Change the console palette to the Solarized palette designed by Ethan Schoonover +Solarized is a palette that does not respect the console and ANSI +palettes, but provides replacement monotones and accent colors +that reduces contrasting brightness while preserving contrasting +hues, making text more comfortable to read. + +Credits: Ethan Schoonover (http://ethanschoonover.com/solarized) + +ANSI/VT file by Philippe Majerus (www.phm.lu) diff --git a/src/testansi/palettes/Solarized_light.ans b/src/testansi/palettes/Solarized_light.ans new file mode 100644 index 00000000..31647332 --- /dev/null +++ b/src/testansi/palettes/Solarized_light.ans @@ -0,0 +1,13 @@ +]4;0;rgb:ee/e8/d5]4;1;rgb:dc/32/2f]4;2;rgb:85/99/00]4;3;rgb:b5/89/00]4;4;rgb:26/8b/d2]4;5;rgb:d3/36/82]4;6;rgb:2a/a1/98]4;7;rgb:07/36/42]4;8;rgb:fd/f6/e3]4;9;rgb:cb/4b/16]4;10;rgb:93/a1/a1]4;11;rgb:83/94/96]4;12;rgb:65/7b/83]4;13;rgb:6c/71/c4]4;14;rgb:58/6e/75]4;15;rgb:00/2b/36 + +Solarized light colors palette +Change the console palette to the light background version of +the Solarized palette designed by Ethan Schoonover. +This does not respect the console and ANSI palette, but instead +reverses background tones and content tones so existing scripts +designed for light content on dark background automatically +show as light background. Accent colors are left intact. + +Credits: Ethan Schoonover (http://ethanschoonover.com/solarized) + +ANSI/VT file by Philippe Majerus (www.phm.lu) diff --git a/src/testansi/palettes/VSCode.ans b/src/testansi/palettes/VSCode.ans new file mode 100644 index 00000000..66968e0c --- /dev/null +++ b/src/testansi/palettes/VSCode.ans @@ -0,0 +1,6 @@ +]4;0;rgb:1e/1e/1e]4;1;rgb:cd/31/31]4;2;rgb:0d/bc/79]4;3;rgb:e5/e5/10]4;4;rgb:24/72/c8]4;5;rgb:bc/3f/bc]4;6;rgb:11/a8/cd]4;7;rgb:cc/cc/cc]4;8;rgb:66/66/66]4;9;rgb:f1/4c/4c]4;10;rgb:23/d1/8b]4;11;rgb:f5/f5/43]4;12;rgb:3b/8e/ea]4;13;rgb:d6/70/d6]4;14;rgb:29/b8/db]4;15;rgb:e5/e5/e5 + +VSCode colors palette +Change the console palette to the standard Dark+ palette used in Visual Studio Code. + +by Philippe Majerus (www.phm.lu) diff --git a/src/testansi/palettes/Windows.ans b/src/testansi/palettes/Windows.ans new file mode 100644 index 00000000..446b97cf --- /dev/null +++ b/src/testansi/palettes/Windows.ans @@ -0,0 +1,6 @@ +]4;0;rgb:0c/0c/0c]4;1;rgb:c5/0f/1f]4;2;rgb:13/a1/0e]4;3;rgb:c1/9c/00]4;4;rgb:00/37/da]4;5;rgb:88/17/98]4;6;rgb:3a/96/dd]4;7;rgb:cc/cc/cc]4;8;rgb:76/76/76]4;9;rgb:e7/48/56]4;10;rgb:16/c6/0c]4;11;rgb:f9/f1/a5]4;12;rgb:3b/78/ff]4;13;rgb:b4/00/9e]4;14;rgb:61/d6/d6]4;15;rgb:f2/f2/f2 + +Windows colors palette +The new Windows console palette from Windows 10 Ver. 1709 and later. + +by Philippe Majerus (www.phm.lu) diff --git a/src/testansi/palettes/windows_legacy.ans b/src/testansi/palettes/windows_legacy.ans new file mode 100644 index 00000000..de760bc6 --- /dev/null +++ b/src/testansi/palettes/windows_legacy.ans @@ -0,0 +1,7 @@ +]4;0;rgb:00/00/00]4;1;rgb:80/00/00]4;2;rgb:00/80/00]4;3;rgb:80/80/00]4;4;rgb:00/00/80]4;5;rgb:80/00/80]4;6;rgb:00/80/80]4;7;rgb:c0/c0/c0]4;8;rgb:80/80/80]4;9;rgb:ff/00/00]4;10;rgb:00/ff/00]4;11;rgb:ff/ff/00]4;12;rgb:00/00/ff]4;13;rgb:ff/00/ff]4;14;rgb:00/ff/ff]4;15;rgb:ff/ff/ff + +Windows legacy colors palette +Change the console palette to the original Windows conhost palette. +(The standard Windows console palette finally changed in Windows 10 Ver. 1709) + +by Philippe Majerus (www.phm.lu) diff --git a/src/vendormodules/overtype-1.6.5.tm b/src/vendormodules/overtype-1.6.5.tm index 38ce71c2..492341d6 100644 --- a/src/vendormodules/overtype-1.6.5.tm +++ b/src/vendormodules/overtype-1.6.5.tm @@ -233,7 +233,6 @@ tcl::namespace::eval overtype { -width \uFFEF\ -height \uFFEF\ -startcolumn 1\ - -wrap 0\ -ellipsis 0\ -ellipsistext $default_ellipsis_horizontal\ -ellipsiswhitespace 0\ @@ -243,11 +242,13 @@ tcl::namespace::eval overtype { -exposed1 \uFFFD\ -exposed2 \uFFFD\ -experimental 0\ - -cp437 1\ + -cp437 0\ -looplimit \uFFEF\ -crm_mode 0\ -reverse_mode 0\ -insert_mode 0\ + -wrap 0\ + -info 0\ -console {stdin stdout stderr}\ ] #expand_right is perhaps consistent with the idea of the page_size being allowed to grow horizontally.. @@ -263,14 +264,19 @@ tcl::namespace::eval overtype { #-ellipsis args not used if -wrap is true foreach {k v} $argsflags { switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace + -looplimit - -width - -height - -startcolumn - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -transparent - -exposed1 - -exposed2 - -experimental - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -console { + - -info - -console { tcl::dict::set opts $k $v } + -wrap - -autowrap_mode { + #temp alias -autowrap_mode for consistency with renderline + #todo - + tcl::dict::set opts -wrap $v + } default { error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" } @@ -280,10 +286,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- #review - expand_left for RTL text? set opt_expand_right [tcl::dict::get $opts -expand_right] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. set opt_width [tcl::dict::get $opts -width] set opt_height [tcl::dict::get $opts -height] @@ -298,50 +300,34 @@ tcl::namespace::eval overtype { set opt_crm_mode [tcl::dict::get $opts -crm_mode] set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] set opt_insert_mode [tcl::dict::get $opts -insert_mode] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_autowrap_mode [tcl::dict::get $opts -wrap] + #??? wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] + set opt_info [tcl::dict::get $opts -info] - #initial state for renderspace 'terminal' reset - set initial_state [dict create\ - -width $opt_width\ - -height $opt_height\ - -crm_mode $opt_crm_mode\ - -reverse_mode $opt_reverse_mode\ - -insert_mode $opt_insert_mode\ - -cp437 $opt_cp437\ - ] # ---------------------------- # -experimental dev flag to set flags etc # ---------------------------- set data_mode 0 - set info_mode 0 set edit_mode 0 set opt_experimental [tcl::dict::get $opts -experimental] foreach o $opt_experimental { switch -- $o { - old_mode { - set info_mode 1 - } data_mode { set data_mode 1 } - info_mode { - set info_mode 1 - } edit_mode { set edit_mode 1 } } } # ---------------------------- - - #modes - set insert_mode $opt_insert_mode ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode $opt_reverse_mode - set crm_mode $opt_crm_mode set underblock [tcl::string::map {\r\n \n} $underblock] @@ -367,6 +353,20 @@ tcl::namespace::eval overtype { set renderwidth $opt_width set renderheight $opt_height } + #initial state for renderspace 'terminal' reset + set initial_state [dict create\ + renderwidth $renderwidth\ + renderheight $renderheight\ + crm_mode $opt_crm_mode\ + reverse_mode $opt_reverse_mode\ + insert_mode $opt_insert_mode\ + autowrap_mode $opt_autowrap_mode\ + cp437 $opt_cp437\ + ] + #modes + #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l + #opt_startcolumn ?? - DECSLRM ? + set vtstate $initial_state # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -494,50 +494,55 @@ tcl::namespace::eval overtype { } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderargs [list -experimental $opt_experimental\ - -cp437 $opt_cp437\ - -info 1\ - -crm_mode $crm_mode\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ - -reverse_mode $reverse_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -transparent $opt_transparent\ - -width $renderwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -expand_right $opt_expand_right\ - -cursor_column $col\ - -cursor_row $row\ + set renderargs [list -experimental $opt_experimental\ + -cp437 $opt_cp437\ + -info 1\ + -crm_mode [tcl::dict::get $vtstate crm_mode]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ + -reverse_mode [tcl::dict::get $vtstate reverse_mode]\ + -cursor_restore_attributes $cursor_saved_attributes\ + -transparent $opt_transparent\ + -width [tcl::dict::get $vtstate renderwidth]\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -expand_right $opt_expand_right\ + -cursor_column $col\ + -cursor_row $row\ $undertext\ $overtext\ ] set LASTCALL $renderargs set rinfo [renderline {*}$renderargs] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - set reverse_mode [tcl::dict::get $rinfo reverse_mode] + set instruction [tcl::dict::get $rinfo instruction] + tcl::dict::set vtstate crm_mode [tcl::dict::get $rinfo crm_mode] + tcl::dict::set vtstate insert_mode [tcl::dict::get $rinfo insert_mode] + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + tcl::dict::set vtstate reverse_mode [tcl::dict::get $rinfo reverse_mode] #how to support reverse_mode in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? # - review - the answer is probably that we don't need to - it is set/reset only during application of overtext - set crm_mode [tcl::dict::get $rinfo crm_mode] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + + #Note carefully the difference betw overflow_right and unapplied. + #overflow_right may need to be included in next run before the unapplied data + #overflow_right most commonly has data when in insert_mode + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - if {0 && $reverse_mode} { + if {0 && [tcl::dict::get $vtstate reverse_mode]} { #test branch - todo - prune puts stderr "---->[ansistring VIEW $replay_codes_overlay] rendered: $rendered" #review @@ -593,19 +598,29 @@ tcl::namespace::eval overtype { #todo - handle potential insertion mode as above for cursor restore? #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { + set instruction_type [lindex $instruction 0] ;#some instructions have params + tcl::dict::incr instruction_stats $instruction_type + switch -- $instruction_type { reset { #reset the 'renderspace terminal' (not underlying terminal) set row 1 set col 1 - - + set vtstate [tcl::dict::merge $vtstate $initial_state] + #todo - clear screen } {} { + #end of supplied line input #lf included in data set row $post_render_row set col $post_render_col + if {![llength $unapplied_list]} { + if {$overflow_right ne ""} { + incr row + } + } else { + puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" + } + set col $opt_startcolumn } up { @@ -708,17 +723,18 @@ tcl::namespace::eval overtype { puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - set sub_info [overtype::renderline -info 1\ - -width $renderwidth\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ + set sub_info [overtype::renderline\ + -info 1\ + -width [tcl::dict::get $vtstate renderwidth]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ -expand_right [tcl::dict::get $opts -opt_expand_right]\ ""\ $overflow_right\ ] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + set foldline [tcl::dict::get $sub_info result] + tcl::dict::set vtstate insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -745,6 +761,53 @@ tcl::namespace::eval overtype { set col $post_render_col #overflow + unapplied? } + clear_and_move { + #e.g 2J + if {$post_render_row > [llength $outputlines]} { + set row [llength $outputlines] + } else { + set row $post_render_row + } + set col $post_render_col + set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant + set clearedlines [list] + foreach ln $outputlines { + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $renderwidth]\x1b\[0m + if 0 { + + set lineparts [punk::ansi::ta::split_codes $ln] + set numcells 0 + foreach {pt _code} $lineparts { + if {$pt ne ""} { + foreach grapheme [punk::char::grapheme_split $pt] { + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + incr numcells 1 + } + default { + if {$grapheme eq "\u0000"} { + incr numcells 1 + } else { + incr numcells [grapheme_width_cached $grapheme] + } + } + } + + } + } + } + #replays/resets each line + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $numcells]\x1b\[0m + } + } + set outputlines $clearedlines + #todo - determine background/default to be in effect - DECECM ? + puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" + #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] + + } lf_start { #raw newlines # ---------------------- @@ -780,27 +843,48 @@ tcl::namespace::eval overtype { append rendered $overflow_right set overflow_right "" } else { + #review - we should really make renderline do the work...? set overflow_width [punk::ansi::printing_length $overflow_right] if {$visualwidth + $overflow_width <= $renderwidth} { append rendered $overflow_right set overflow_right "" } else { - if {$visualwidth < $renderwidth} { - set graphemes [punk::char::grapheme_split $overflow_width] - set add "" - set addlen $visualwidth - set remaining_overflow $graphemes - foreach g $graphemes { - set w [overtype::grapheme_width_cached] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - lpop remaining_overflow - } else { - break - } + if {[tcl::dict::get $vtstate autowrap_mode]} { + set outputlines [linsert $outputlines $renderedrow $overflow_right] + set overflow_right "" + set row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } + + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break + } + } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code + } } - append rendered $add set overflow_right [join $remaining_overflow ""] } } @@ -829,14 +913,16 @@ tcl::namespace::eval overtype { #we may also have other control sequences that came after col 80 e.g cursor save if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" - #assertion - there should be no overflow.. - puts $lhs + #assertion - there should be no overflow.. + puts $lhs + } + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right set row $post_render_row #set row $renderedrow @@ -981,7 +1067,7 @@ tcl::namespace::eval overtype { #normal single-width grapheme overflow #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { incr row set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { @@ -1014,7 +1100,7 @@ tcl::namespace::eval overtype { #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { if {$renderwidth < 2} { #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character set idx 0 @@ -1055,6 +1141,14 @@ tcl::namespace::eval overtype { set row $post_render_row set col $post_render_col } + set_window_title { + set newtitle [lindex $instruction 1] + puts stderr "overtype::renderspace set_window_title [ansistring VIEW $newtitle] instruction '$instruction'" + # + } + reset_colour_palette { + puts stderr "overtype::renderspace instruction '$instruction' unimplemented" + } default { puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" } @@ -1062,7 +1156,7 @@ tcl::namespace::eval overtype { } - if {!$opt_expand_right && !$autowrap_mode} { + if {!$opt_expand_right && ![tcl::dict::get $vtstate autowrap_mode]} { #not allowed to overflow column or wrap therefore we get overflow data to truncate if {[tcl::dict::get $opts -ellipsis]} { set show_ellipsis 1 @@ -1160,11 +1254,22 @@ tcl::namespace::eval overtype { } set result [join $outputlines \n] - if {$info_mode} { + if {!$opt_info} { + return $result + } else { #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? #append result \n$instruction_stats\n + set inforesult [dict create\ + result $result\ + last_instruction $instruction\ + instruction_stats $instruction_stats\ + ] + if {$opt_info == 2} { + return [pdict -channel none inforesult] + } else { + return $inforesult + } } - return $result } #todo - left-right ellipsis ? @@ -2368,14 +2473,22 @@ tcl::namespace::eval overtype { } else { #linefeed occurred in middle or at end of text #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - if {$idx == -1 || $overflow_idx > $idx} { - #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + if {$insert_mode == 0} { + incr cursor_row + if {$idx == -1 || $overflow_idx > $idx} { + #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + } + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + incr cursor_row + #don't adjust the overflow_idx + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction lf_mid + break ;# could have overdata following the \n - don't keep processing } - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break } } @@ -3077,8 +3190,11 @@ tcl::namespace::eval overtype { set idx [expr {$cursor_column -1}] set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] incr idx_over + if {[llength $outcols]} { + priv::render_erasechar 0 [llength $outcols] + } priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move + set instruction clear_and_move break } 3 { @@ -3749,6 +3865,72 @@ tcl::namespace::eval overtype { } 7OSC - 8OSC { + # OSCs are terminated with ST of either \007 or \x1b\\ - we allow either whether code was 7 or 8 bit + if {[tcl::string::index $codenorm end] eq "\007"} { + set code_content [tcl::string::range $codenorm 4 end-1] ;#ST is \007 + } else { + set code_content [tcl::string::range $codenorm 4 end-2] ;#ST is \x1b\\ + } + set first_colon [tcl::string::first {;} $code_content] + if {$first_colon == -1} { + #there probably should always be a colon - but we'll try to make sense of it without + set osc_code $code_content ;#e.g \x1b\]104\007 vs \x1b\]104\;\007 + } else { + set osc_code [tcl::string::range $code_content 0 $first_colon-1] + } + switch -exact -- $osc_code { + 2 { + set newtitle [tcl::string::range $code_content 2 end] + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list set_window_title $newtitle] + break + } + 4 { + #OSC 4 - set colour palette + #can take multiple params + #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ + set params [tcl::string::range $code_content 1 end] + + puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 { + #OSC 10 through 17 - so called 'dynamic colours' + #can take multiple params - each successive parameter changes the next colour in the list + #- e.g if code started at 11 - next param is for 12. 17 takes only one param because there are no more + #10 change text foreground colour + #11 change text background colour + #12 change text cursor colour + #13 change mouse foreground colour + #14 change mouse background colour + #15 change tektronix foreground colour + #16 change tektronix background colour + #17 change highlight colour + set params [tcl::string::range $code_content 2 end] + + puts stderr "overtype::renderline OSC $osc_code set dynamic colours unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 18 { + #why is this not considered one of the dynamic colours above? + #https://www.xfree86.org/current/ctlseqs.html + #tektronix cursor color + puts stderr "overtype::renderline OSC 18 - set tektronix cursor color unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + 104 { + #reset colour palette + #we want to do it for the current rendering context only (vvt) - not just pass through to underlying vt + puts stderr "overtype::renderline OSC 104 reset colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list reset_colour_palette] + break + } + default { + puts stderr "overtype::renderline OSC - UNIMPLEMENTED. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } } default { @@ -3791,6 +3973,23 @@ tcl::namespace::eval overtype { #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW set in_overflow 1 } + set trailing_nulls 0 + foreach ch [lreverse $outcols] { + if {$ch eq "\u0000"} { + incr trailing_nulls + } else { + break + } + } + if {$trailing_nulls} { + set first_tail_null_posn [expr {[llength $outcols] - $trailing_nulls}] + } else { + set first_tail_null_posn -1 + } + + #puts stderr "first_tail_null_posn: $first_tail_null_posn" + #puts stderr "colview: [ansistring VIEW $outcols]" + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -3865,8 +4064,17 @@ tcl::namespace::eval overtype { } append outstring $gxleader append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] + if {$ch eq "\u0000"} { + if {$cp437_glyphs} { + #map all nulls including at tail to space + append outstring " " + } else { + if {$trailing_nulls && $i < $first_tail_null_posn} { + append outstring " " ;#map inner nulls to space + } else { + append outstring \u0000 + } + } } else { append outstring $ch } @@ -3874,12 +4082,20 @@ tcl::namespace::eval overtype { incr i } #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + #review - presence of overflow_right doesn't indicate line's trailing nulls should remain. + #The cells could have been erased? + #if {!$cp437_glyphs} { + # #if {![ansistring length $overflow_right]} { + # # set outstring [tcl::string::trimright $outstring "\u0000"] + # #} + # set outstring [tcl::string::trimright $outstring "\u0000"] + # set outstring [tcl::string::map {\u0000 " "} $outstring] + #} + + + #REVIEW + #set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + #set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] set replay_codes "" if {[llength $understacks] > 0} { @@ -4207,8 +4423,9 @@ tcl::namespace::eval overtype::priv { upvar outcols o upvar understacks ustacks upvar understacks_gx gxstacks + upvar replay_codes_overlay replay #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller of render_erasechar should do that mapping and only supply 1 or greater. if {![tcl::string::is integer -strict $count] || $count < 1} { error "render_erasechar count must be integer >= 1" } @@ -4223,8 +4440,9 @@ tcl::namespace::eval overtype::priv { } set num [expr {$end - $start + 1}] set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + #DECECM ??? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list $replay]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] ;# ??? review return } proc render_setchar {i c } { diff --git a/src/vendormodules/test/tomlish-1.1.1.tm b/src/vendormodules/test/tomlish-1.1.1.tm index 885c56a1047c8fb58c59a7777af8743df10eb957..4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd 100644 GIT binary patch delta 2720 zcmV;R3SafDz5(^X0kA9_0|>7uvo9PkC=s0vDOqXs_-;Z3004j-001wO@jWJ!SY8Z& z?U`+B<33izz0=l4fUSU*?f!R{G%3ItxatDGW^sZj9i}Eo%9XaMEb7PK@vqNQ7Z&Ce$`j z^y81xj^X2jbYoLFU_VliK00x!2IKX^zXlZJqH0N zFuS_`^X&2q4CY%fwWJAcCJIcYPU#`e&-7KIf&xfwWsVS(UdD*;Hw_y-iMVLp@OGu(8Qgj*3;nj(HR0>&ZOs zg}Abfbu1T{bm7Zcp`o^h+8Sb^8|zRJO4B^A_t>Uh%s(KnhjH1+ALG zL|V(LnKJ(99c+7l^U%!^LLDQ`vThPyceQ0n_es4P*_%wbY}^EHySr-6v%9dPejD?M z=*YNq@xCtg%kU)L(IS_5s>#Z)UK(E`wr$&Y?2GALA5(zlb-={Ja0$&q@IMEqCDVJD z)?wQG-sL%%nmcHY{u(z@ufZZMj45#{l4PbVX&`F)0uVT5#{1pxF1>l)_<|<+!6vw~ z*l4tfYmvqrpmzT`V;_<(CG$OFyTsUUUoS3BFRuwKADgYwialNPhr7drdHbtgjtU(% zzv~?yd9XCzd z?ru1ovT-sRjozNojI=q_TxkWVOlB}nbI=nGI=$sm{L8dRB`D2q_&X3B#(6r$vT1^E z<|)(!zJCyUk`@}sj(?Chz~l^KrkT=t5$Pi5kfbcek{=HC=`Ul^En0+}-xUH&b(Bz5 zDw1r<6_#`0_bh>4&-Bd*<_G(G%VIIj(p-~*t=#MVh~@Vz7i^SZ?Z=Eqn#b=U2z+^z zPNz5`vbCqU#xAHL(N<{>|1GE!^nq-e5rz*FpxpsM}KQx5tA`U{*VaC71@Df zG+$(J|>v|%+9 zmSr3};6`)glucP>YJ{`y+0m)9;w0A6VOAgtWZC~pRn2!XhB*}izq8VX8^7HRs6)hnB=9 zkCwe#J|hQd5^2RgfF)#P(ZNW?!?i@RlVB8d7`__AH-97WkJEqt{%{TqPGEl3|8#cw z8;lkkz_Y}LtB7Sh6f(qZnRe+WI#vOMR3heJm0k=I5j4=jq9bUB37QY|VoQk9amdD+ z=OLeFdRCES6ediY+yT>tT;CNbO8Fg^7#xu-DM>Ro)*@gdG-IeU772g%DndIxcbm_R=xhm*HVTHAlda3%TnIG; zsvlilMKf0C_`zfOF}6- z#4jcnWfH2|P}K&PrMe`N*}O7#2Oh|oP?Tj#;J6j%5EpbA1{{|vqHHvo!Bi+ks!@`f zr++c9sh6O8*rj$WO|oEM@1N?jLGYwrh3s|2bTsjS+sv;PG`b6QGpu9o5WPutSYDuo zW*VNv+tr9g97?QZW-oO2diA<>pWT|?)jnBhUIvWI>6Xxxg8w-rvco}u;k=`4w^r6uv(np@Iev$uX>+72))jg~%P``2&_ z2V2uKN(1e?@MG;Leeu_M|E%1T?eotMpZaDY-3DEnYw@JAXt;qd)vp`3aw~KHg9C3| zLB7=6pmVp*-M)>xRrgwN=6CdPy?^Y}??F~Q(f2{?XLt}4Q5ti~;GHjKc^Ywh1VguU z?&#DgqcT2-d6Z}DW$s6@ZczNhe)+j`zs~)>g8Qura&rsf;tM*Q=V|VLhgZU2-SvE* zGZk6_AH0<#Lg$H{Cw?@6bJwS00;o> z2r9E@O}qpH2(Ku!CQ|JS4V?`sS!wk6ZbAeA0DzMqUQi%!2P#?k<5xLr1polwDF6U5 z0000000000000000OLrLL0lh`uU;S^?FcGazJF@S2?PKD^a=m~CIA2c0000000000 a005#+lR;b$qV9n>fg@H z&3^1A&|q&p;lIHProCcC1@XK;1Nttssaa+?^$SUFoaXjz(pRsG>n80u_J6POZtciT zbLY&JIVvKY6#03Z{$~&C-Tx9UMlkOTGd}T2=nc!kXWv&mwLf$!rg>vh&wK}^lvF#_ zyCE0bcY7H(_WNJ*`*e!w_{1t6r}GM;VG_~xVV!HeSf+MQF|n7I{}``0sk)0p)h=Yx z7PG)Nr?hmJWq9;A^gB83fB)+l%jd_7?*-{Dca4iO`*7~>Km8^3B1=V-UGJS|RTH~> zUcfcCa+3dY6Q#bZ#u+QQmjzvu{`#W+;M@oLGM9S|>SlRJOU=FRY<9)QHG5@?pX1}n zlcac$H`WJp#fugQMcK$ceR*d4YpG)&({~tO?TDJm|HXm1He%m}<4HGWT;;vjs{hw+ zxk8zy#X^aDL7j^nUO7md^)x8krFNwwZLjF@XNq1Eet5XLsuVo9CTqPbdgj47r49UX zyEvFX16jk~^mFdc!nF zjjMIpku!5ypK7-3TCOu={c(Gd%WLgko5iA0(mq<+@)PH1#cio|S>RT2>_olVWG?%? z-RGI-{`PqumXjAD^Y@o{uuzTi&l789qa7Jh|y)*Zbs*6ek!u%E4KwET*?=LJ@m zvqHzBE-$@wVBLAH9Cx^^htK>k&#jWGx=fvMiDL@Y4(pyPaA~q{ExJczexF?g^~A9ALv0 z@4qZ@HuKU6ZL7U`J=Q-Iy42ObpFuN}GkjqdTW}`NY&EU&aPfrO=`C`6^IuKKoaV}0 z|ET`TRNh76Z>3V+3nfnWl-}`n-YVAqcc*&VW6v-zdVJ^D*NKsDmOXUc)9kKwIb!nk z*?TSKJSnZ+?f>T7mv!GyI)64xa5CNAvP|sH`GYS{d|_NxI>StU+8(yx>f5Jp#y@s@ zWg4-hk@IR#J9PeoZ@2wDYFa+6$L-|Cg23Y+Ao=8sN>$ zBErDHz`=lCz!(Ax7{$r*p2DLA%xD2KTEL9l0>+C<1>> _get_keyval_value from '$keyval_element'<<<" + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" set found_value 0 #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) @@ -135,7 +117,7 @@ namespace eval tomlish { foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -156,10 +138,14 @@ namespace eval tomlish { #simple (non-container, no-substitution) datatype set result [list type $type value $value] } - STRING { + STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ITABLE - ARRAY { + LITSTRING { + #REVIEW + set result [list type $type value $value] + } + TABLE - ITABLE - ARRAY - MULTISTRING { #jmn2024 - added ITABLE - review #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! @@ -188,7 +174,7 @@ namespace eval tomlish { variable tablenames_seen [list] - puts ">>> processing '$tomlish'<<<" + log::info ">>> processing '$tomlish'<<<" set items $tomlish foreach lst $items { @@ -208,7 +194,7 @@ namespace eval tomlish { #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { KEYVAL - QKEYVAL { - puts "--> processing $tag: $item" + log::debug "--> processing $tag: $item" set key [lindex $item 1] #!todo - normalize key. (may be quoted/doublequoted) @@ -228,7 +214,7 @@ namespace eval tomlish { error "Table name '$tablename' has already been directly defined in the toml data. Invalid." } - puts "--> processing $tag (name: $tablename): $item" + log::debug "--> processing $tag (name: $tablename): $item" set name_segments [::tomlish::utils::tablename_split $tablename] set last_seg "" #toml spec rule - all segments mst be non-empty @@ -305,8 +291,8 @@ namespace eval tomlish { lappend tablenames_seen $tablename - puts ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - puts ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { @@ -329,6 +315,7 @@ namespace eval tomlish { #!todo. } ITABLE { + #SEP??? set datastructure [list] foreach element [lrange $item 1 end] { set type [lindex $element 0] @@ -350,16 +337,20 @@ namespace eval tomlish { ARRAY { #arrays in toml are allowed to contain mixtures of types set datastructure [list] - puts "--> processing array: $item" + log::debug "--> processing array: $item" foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - STRING - INT - FLOAT - BOOL - DATETIME { + INT - FLOAT - BOOL - DATETIME { set value [lindex $element 1] lappend datastructure [list type $type value $value] } - TABLE - ARRAY { + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + TABLE - ARRAY - MULTISTRING { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } @@ -372,6 +363,89 @@ namespace eval tomlish { } } } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } WS - COMMENT - NEWLINE { #ignore } @@ -415,14 +489,6 @@ namespace eval tomlish { } - #proc sample1 {p1 args} { - # #*** !doctools - # #[call [fun sample1] [arg p1] [opt {?option value...?}]] - # #[para]Description of sample1 - # return "ok" - #} - - #*** !doctools @@ -430,6 +496,7 @@ namespace eval tomlish { } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + namespace eval tomlish::encode { #*** !doctools #[subsection {Namespace tomlish::encode}] @@ -445,7 +512,7 @@ namespace eval tomlish::encode { proc int {i} { #whole numbers, may be prefixed with a + or - #Leading zeros are not allowed - #Hex,octal binary forms not allowed + #Hex,octal binary forms are allowed (toml 1.0) #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. # - We should probably raise an error for number larger than this and suggest the user supply it as a string? @@ -465,6 +532,10 @@ namespace eval tomlish::encode { } proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [string tolower $f]] + } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] } else { @@ -481,29 +552,15 @@ namespace eval tomlish::encode { } proc boolean {b} { - if {$b eq "0"} { - set b "false" - } - if {$b eq 1} { - set b "true" - } - set b [tcl::string::tolower $b] - if {$b in {yes y}} { - set b "true" - } - if {$b in {no n}} { - set b "false" - } - if {$b eq "t"} { - set b "true" - } - if {$b eq "f"} { - set b "false" - } - if {$b in {true false}} { - return [list BOOL $b] + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![string is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { - error "Unable to interpret '$b' as Toml boolean. [::tomlish::parse::report_line]" + if {[expr {$b && 1}]} { + return [list BOOL true] + } else { + return [list BOOL false] + } } } @@ -560,7 +617,7 @@ namespace eval tomlish::encode { } MULTISTRING { #explicitly list the valid child tags - if {$tag ni {STRING WS NEWLINE CONT}} { + if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { error "Invalid tag '$tag' encountered within a MULTISTRING" } } @@ -636,8 +693,11 @@ namespace eval tomlish::encode { # return \"[lindex $item 1]\" } + STRINGPART { + return [lindex $item 1] + } MULTISTRING { - #Double quoted string which is a container for newlines,whitespace and multiple strings + #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts set multistring "" ;#variable to build up the string foreach part [lrange $item 1 end] { append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] @@ -691,7 +751,7 @@ namespace eval tomlish::encode { #(encode tomlish as toml) interp alias {} tomlish::to_toml {} tomlish::encode::tomlish - +# namespace eval tomlish::decode { @@ -793,7 +853,7 @@ namespace eval tomlish::decode { set prevstate $state ##### set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - puts "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" set state $nextstate if {$state eq "err"} { @@ -822,10 +882,14 @@ namespace eval tomlish::decode { } comma { #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" } endinlinetable { puts stderr "endinlinetable" } + endmultiquote { + puts stderr "endmultiquote for last_space_action 'pop'" + } default { error "unexpected tokenType '$tokenType' for last_space_action 'pop'" } @@ -842,7 +906,7 @@ namespace eval tomlish::decode { barekey { set v($nest) [list KEYVAL $tok] ;#$tok is the keyname } - quotedkey { + quotedkey - itablequotedkey { set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname } tablename { @@ -858,7 +922,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -875,6 +939,14 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } + startmultiquote { + puts stderr "push trigger tokenType startmultiquote (todo)" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + #JMN ??? + #set next_tokenType_known 1 + #::tomlish::parse::set_tokenType "multistring" + #set tok "" + } default { error "push trigger tokenType '$tokenType' not yet implemented" } @@ -900,46 +972,68 @@ namespace eval tomlish::decode { puts stderr "decode::toml error. did not expect startlinetable without space level change" } startquote { - if {$nextstate eq "string"} { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "string" - set tok "" - } elseif {$nextstate eq "quotedkey"} { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "quotedkey" - set tok "" - } else { - error "not implemented. startquote. nextstate: $nextstate" + switch -exact -- $nextstate { + string { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itablequotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "startquote switch case not implemented for nextstate: $nextstate" + } } } startmultiquote { + #review + puts stderr "no space level change - got startmultiquote" set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "multistring" + ::tomlish::parse::set_tokenType "stringpart" set tok "" } endquote { #nothing to do? set tok "" } + endmultiquote { + #JMN!! + set tok "" + } string { lappend v($nest) [list STRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] + } multistring { + #review lappend v($nest) [list MULTISTRING $tok] } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + } untyped-value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { - set tag "BOOL" + set tag BOOL } elseif {[::tomlish::utils::is_int $tok]} { - set tag "INT" + set tag INT } elseif {[::tomlish::utils::is_float $tok]} { - set tag "FLOAT" + set tag FLOAT } elseif {[::tomlish::utils::is_datetime $tok]} { - set tag "DATETIME" + set tag DATETIME } else { error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" } @@ -954,7 +1048,7 @@ namespace eval tomlish::decode { lappend v($nest) = } comma { - lappend v($nest) "SEP" + lappend v($nest) SEP } newline { incr linenum @@ -963,8 +1057,11 @@ namespace eval tomlish::decode { whitespace { lappend v($nest) [list WS $tok] } + continuation { + lappend v($nest) CONT + } bom { - lappend v($nest) "BOM" + lappend v($nest) BOM } eof { #ok - nothing more to add to the tomlish list. @@ -1066,22 +1163,28 @@ namespace eval tomlish::utils { incr i if {$c eq "."} { - if {$mode eq "unquoted"} { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } elseif {$mode eq "quoted"} { - append seg $c - } elseif {$mode eq "unknown"} { - lappend segments $seg - set seg "" - } elseif {$mode eq "litquoted"} { - append seg $c - } else { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { @@ -1139,7 +1242,7 @@ namespace eval tomlish::utils { } if {$i == $sLen} { #end of data - puts "End of data: mode='$mode'" + ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { quoted { if {$c ne "\""} { @@ -1255,7 +1358,7 @@ namespace eval tomlish::utils { } set c [::string index $str $i] - puts "unescape_string. got char $c" + ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { #we don't expect unescaped unicode characters from 0000 to 001F - @@ -1336,7 +1439,7 @@ namespace eval tomlish::utils { } } } - puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" if {$unicode4_active} { error "End of string reached before complete unicode escape sequence \uHHHH" } @@ -1408,7 +1511,7 @@ namespace eval tomlish::utils { } #test only that the characters in str are valid for the toml specified type 'integer'. - proc int_validchars {str} { + proc int_validchars1 {str} { set numchars [::string length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 @@ -1416,29 +1519,47 @@ namespace eval tomlish::utils { return 0 } } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } proc is_int {str} { - set matches [regexp -all {[0-9\_\-\+]} $str] + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range - #check for leading zeroes + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) set check [::string map {+ "" - "" _ ""} $str] - if {([::string length $check] > 1) && ([::string range $check 0 0] eq "0")} { + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } + # --------------------------------------- + #check +,- only occur in the first position. if {[::string last - $str] > 0} { - return false + return 0 } if {[::string last + $str] > 0} { - return false + return 0 } - #!todo - check bounds - #even though Tcl can handle bignums, we won't accept anything outside of toml spec. + set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![string is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. #presumably very large numbers would have to be supplied in a toml file as strings. - set numeric_value [::string map {_ ""} $str] + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max if {$numeric_value > $::tomlish::max_int} { return 0 } @@ -1458,12 +1579,21 @@ namespace eval tomlish::utils { if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { - return 0 + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } } } proc is_float {str} { set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } if {[::string length $str] == $matches} { #all characters in legal range @@ -1487,6 +1617,7 @@ namespace eval tomlish::utils { #strip underscores for tcl double check set check [::string map {_ ""} $str] + #string is double accepts inf nan +NaN etc. if {![::string is double $check]} { return 0 } @@ -1568,10 +1699,10 @@ namespace eval tomlish::parse { curly-space {\ whitespace "curly-space"\ newline "curly-space"\ - barekey {pushspace "commakeyval-space"}\ - quotedkey "commakeyval-space"\ - xuntyped-value {pushspace "commakeyval-space"}\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ endinlinetable "popspace"\ + startquote "itablequotedkey"\ comma "curly-space"\ eof "err"\ comment "err"\ @@ -1584,10 +1715,10 @@ namespace eval tomlish::parse { curly-syntax {\ whitespace "curly-syntax"\ newline "curly-syntax"\ - barekey {pushspace "commakeyval-space"}\ - quotedkey "commakeyval-space"\ - xuntyped-value {pushspace "commakeyval-space"}\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ endinlinetable "popspace"\ + startquote "itablequotedkey"\ comma "curly-space"\ eof "err"\ comment "err"\ @@ -1635,15 +1766,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ - commakeyval-syntax {whitespace "commakeyval-syntax" endquote "commakeyval-syntax" newline "err" equal "value-expected" eof "err"} + itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + #dict set stateMatrix\ + # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} dict set stateMatrix\ - commakeytail {whitespace "commakeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} dict set stateMatrix\ - commakeyval-space {} + itablekeyval-space {} + dict set stateMatrix\ + itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} dict set stateMatrix\ keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} dict set stateMatrix\ @@ -1660,7 +1795,7 @@ namespace eval tomlish::parse { dict set stateMatrix\ multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} dict set stateMatrix\ tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} dict set stateMatrix\ @@ -1722,11 +1857,11 @@ namespace eval tomlish::parse { array-space array-syntax curly-space curly-syntax keyval-space keytail - commakeyval-space commakeytail + itablekeyval-space itablevaltail } variable spacePushTransitions { keyval-space keyval-syntax - commakeyval-space commakeyval-syntax + itablekeyval-space itablekeyval-syntax array-space array-space curly-space curly-space key-space tablename @@ -1752,7 +1887,7 @@ namespace eval tomlish::parse { if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - puts "getNextState tokentype:$tokentype , currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { popspace { spacestack pop @@ -1763,7 +1898,7 @@ namespace eval tomlish::parse { if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { set next [dict get $::tomlish::parse::spacePopTransitions $target] - puts "--->> pop transition to space $target redirected state to $next <<---" + ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1772,11 +1907,11 @@ namespace eval tomlish::parse { samespace { #note the same data as popspace (spacePopTransitions) is used here. set parent [spacestack peek] - puts ">>>>>>>>> got parent $parent <<<<<" + ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" lassign $parent type target if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { set next [dict get $::tomlish::parse::spacePopTransitions $target] - puts "--->> samespace transition to space $target redirected state to $next <<---" + ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1802,7 +1937,7 @@ namespace eval tomlish::parse { #set next [list pushspace [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - puts "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" + ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" set result [::tomlish::parse::getNextState $nexttokentype $tokentype] } pushspace { @@ -1814,7 +1949,7 @@ namespace eval tomlish::parse { #puts $::tomlish::parse::spacePushTransitions if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { set next [dict get $::tomlish::parse::spacePushTransitions $target] - puts "--->> push transition to space $target redirected state to $next <<---" + ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1941,6 +2076,7 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" + set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [string index $s [expr {$i - 1}]] @@ -1955,6 +2091,8 @@ namespace eval tomlish::parse { set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -1993,6 +2131,7 @@ namespace eval tomlish::parse { } } lc { + set multi_dquote "" ;#!! #test jmn2024 #left curly brace try { @@ -2001,7 +2140,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring { + string - stringpart { if {$slash_active} {append tok "\\"} append tok $c } @@ -2025,7 +2164,6 @@ namespace eval tomlish::parse { } } } else { - #$slash_active not relevant when no tokenType switch -exact -- $state { value-expected { #switch last key to tablename?? @@ -2033,6 +2171,14 @@ namespace eval tomlish::parse { set tok "\{" return 1 } + multistring-space { + set_tokenType "stringpart" + if {$slash_active} { + set tok "\\\{" + } else { + set tok "\{" + } + } key-space { #invalid - but allow parser statemachine to report it. ? set_tokenType "startinlinetable" @@ -2058,6 +2204,7 @@ namespace eval tomlish::parse { } rc { + set multi_dquote "" ;#!! #right curly brace try { if {[string length $tokenType]} { @@ -2065,7 +2212,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - comment { + string - stringpart - comment { if {$slash_active} {append tok "\\"} append tok $c } @@ -2083,7 +2230,7 @@ namespace eval tomlish::parse { dict set token_waiting tok "" return 1 } - commakeytail { + itablevaltail { } default { @@ -2133,13 +2280,16 @@ namespace eval tomlish::parse { set tok "\}" return 1 } - commakeytail { + itablevaltail { set_tokenType "endinlinetable" set tok "" #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 incr i -1 return 1 } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } default { #JMN2024b keytail? error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" @@ -2154,6 +2304,7 @@ namespace eval tomlish::parse { } lb { + set multi_dquote "" ;#!! #left square bracket try { if {[::string length $tokenType]} { @@ -2161,7 +2312,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring { + string - stringpart { if {$slash_active} {append tok "\\"} append tok $c } @@ -2218,30 +2369,37 @@ namespace eval tomlish::parse { } } rb { + set multi_dquote "" ;#!! #right square bracket try { if {[string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {$tokenType in {"string" "multistring" "comment"}} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType eq "tablename"} { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } elseif {$tokenType eq "tablearrayname"} { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } else { - incr i -1 - return 1 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } + tablearraynames { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } } } else { #$slash_active not relevant when no tokenType @@ -2290,13 +2448,15 @@ namespace eval tomlish::parse { } } bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! #backslash if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - litstring - multilitstring - comment - tablename - tablearrayname { + string - litstring - multilitstring - comment - tablename - tablearrayname { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2304,6 +2464,25 @@ namespace eval tomlish::parse { set slash_active 1 } } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } barekey { error "Unexpected backslash during barekey. [tomlish::parse::report_line]" } @@ -2312,85 +2491,129 @@ namespace eval tomlish::parse { } } } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + if {$state eq "multistring-space"} { + set slash_active 1 + } else { + error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } dq { #double quote try { if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - set_tokenType "startmultiquote" - return 1 - } else { - error "unexpected token length in 'startquotesequence'" + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length in 'startquotesequence'" + } } - } elseif {$tokenType eq "endquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - set_tokenType "endmultiquote" - return 1 - } else { - error "unexpected token length in 'endquotesequence'" + endquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "endmultiquote" + return 1 + } else { + error "unexpected token length in 'endquotesequence'" + } } - } elseif {$tokenType eq "string"} { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" - return 1 + string { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } } - } elseif {$tokenType eq "multistring"} { - if {$slash_active} { - append tok "\\" + stringpart { + #sub element of multistring + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + value-expected { + if {$multi_dquote eq "\"\""} { + dict set token_waiting type startmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + #end whitespace token and reprocess + incr i -1 + return 1 + #append multi_dquote "\"" + } + } + default { + dict set token_waiting type startquote + dict set token_waiting tok "\"" + return 1 + } + } + } + comment { + if {$slash_active} {append tok "\\"} append tok $c - } else { - incr i -1 - - if {$multi_endquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - return 1 + } + quotedkey - itablequotedkey { + if {$slash_active} { + append tok "\\" + append tok $c } else { - append multi_endquote "\"" + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 } } - } elseif {$tokenType eq "whitespace"} { - - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 - } elseif {$tokenType eq "comment"} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType eq "quotedkey"} { - if {$slash_active} { - append tok "\\" + tablename - tablearrayname { + if {$slash_active} {append tok "\\"} append tok $c - } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + } + starttablename - starttablearrayname { + incr i -1 ;## return 1 } - } elseif {$tokenType in {"tablename" "tablearrayname"}} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType in {"starttablename" "starttablearrayname"}} { - incr i -1 ;## - return 1 - } else { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } } } else { #$slash_active not relevant when no tokenType @@ -2404,6 +2627,17 @@ namespace eval tomlish::parse { set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote set tok $c } + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } key-space { set tokenType startquote set tok $c @@ -2430,6 +2664,8 @@ namespace eval tomlish::parse { } } = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -2438,10 +2674,13 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - comment - quotedkey { + string - comment - quotedkey { #for these tokenTypes an = is just data. append tok $c } + stringpart { + append tok $dquotes$c + } whitespace { dict set token_waiting type equal dict set token_waiting tok = @@ -2457,56 +2696,103 @@ namespace eval tomlish::parse { } } } else { - set_tokenType equal - set tok = - return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok ${dquotes}= + } + default { + set_tokenType equal + set tok = + return 1 + } + } } } cr { + set dquotes $multi_dquote + set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + stringpart { + append tok $dquotes$c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } } - #!todo - error out if cr inappropriate for tokenType - append tok $c } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) set_tokenType newline set tok cr } } lf { + set dquotes $multi_dquote + set multi_dquote "" ;#!! # \n newline - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {$tokenType eq "newline"} { - #this lf is the trailing part of a crlf - append tok lf + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + newline { + #this lf is the trailing part of a crlf + append tok lf + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + } else { + set had_slash $slash_active + set slash_active 0 + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 return 1 } else { - #newline ends all other tokens. - #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) - #note for whitespace: - # we will use the convention that \n terminates the current whitespace even if whitespace follows - # ie whitespace is split into separate whitespace tokens at each newline - - #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_tokenType newline + set tok lf return 1 } - } else { - set_tokenType newline - set tok lf - return 1 } } , { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { @@ -2517,6 +2803,9 @@ namespace eval tomlish::parse { string - comment - quotedkey - tablename - tablearrayname { append tok $c } + stringpart { + append tok $dquotes$c + } default { dict set token_waiting type comma dict set token_waiting tok "," @@ -2524,12 +2813,25 @@ namespace eval tomlish::parse { } } } else { - set_tokenType comma - set tok "," - return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "," + } + multiliteral-space { + set_tokenType literalpart + set tok "," + } + default { + set_tokenType comma + set tok "," + return 1 + } + } } } . { + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { @@ -2537,7 +2839,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - untyped-value { + string - stringpart - comment - quotedkey - untyped-value { append tok $c } baretablename - tablename - tablearrayname { @@ -2559,18 +2861,29 @@ namespace eval tomlish::parse { } } } else { - set_tokenType "untyped-value" - set tok $c - #set_tokenType period - #set tok "." - #return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "." + } + multiliteral-space { + set_tokenType literalpart + set tok "." + } + default { + set_tokenType untyped-value + set tok "." + } + } } } " " { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { + set had_slash $slash_active + set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence @@ -2589,9 +2902,36 @@ namespace eval tomlish::parse { incr i -1 return 1 } - quotedkey - string - comment - whitespace { + comment { + if {$had_slash} { + append tok "\\" + } + append tok $c + } + quotedkey - string { + if {$had_slash} { + append tok "\\" + } + #if {$dquotes eq "\""} { + #} append tok $c } + whitespace { + append tok $c + } + stringpart { + if {$had_slash} { + #REVIEW + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + #keeping WS separate allows easier processing of CONT stripping + append tok $dquotes + incr i -1 + return 1 + } + } starttablename { incr i -1 return 1 @@ -2610,66 +2950,129 @@ namespace eval tomlish::parse { } } } else { - if {$state in {"tablename" "tablearrayname"}} { - #tablename can have leading,trailing and interspersed whitespace! - #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - set tok $c - } else { - set_tokenType "whitespace" - append tok $c + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + if {$had_slash} { + set tok "\\$c" + } else { + set tok $c + } + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return + } + set_tokenType "whitespace" + append tok $c + } + } + default { + if {$had_slash} { + error "unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } } } } tab { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {($tokenType eq "barekey")} { - #whitespace is a terminator for bare keys - incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c - return 1 - } elseif {$tokenType eq "untyped-value"} { - #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c - incr i -1 - return 1 - } elseif {($tokenType eq "quotedkey")} { - append tok $c - } elseif {$tokenType eq "string"} { - append tok $c - } elseif {$tokenType eq "comment"} { - append tok $c - } elseif {$tokenType eq "whitespace"} { - append tok $c - } elseif {$tokenType eq "starttablename"} { - incr i -1 - return 1 - } elseif {$tokenType eq "starttablearrayname"} { - incr i -1 - return 1 - } elseif {$tokenType in {"tablename" "tablearrayname"}} { - #include whitespace in the tablename/tablearrayname - #Will need to be normalized upon interpreting the tomlish as a datastructure - append tok $c - } else { - error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set token_waiting type whitespace + #set token_waiting tok $c + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + quotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + append tok $dquotes$c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } } } else { - if {$state in {"tablename" "tablearrayname"}} { - #tablename can have leading,trailing and interspersed whitespace! - #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - set tok $c - } else { - set_tokenType "whitespace" - append tok $c + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + default { + set_tokenType "whitespace" + append tok $c + } } } } @@ -2680,14 +3083,19 @@ namespace eval tomlish::parse { return 1 } default { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } + endquotesequence { + puts stderr "endquotesequence: $tok" + } whitespace { incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. return 1 @@ -2704,12 +3112,17 @@ namespace eval tomlish::parse { #allow statemachine to set context for subsequent chars return 1 } + stringpart { + append tok $dquotes$c + } default { #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } } else { + set had_slash $slash_active + set slash_active 0 switch -exact -- $state { key-space - curly-space - curly-syntax { #if no currently active token - assume another key value pair @@ -2720,6 +3133,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } tablename { set_tokenType "tablename" set tok $c @@ -2765,7 +3187,7 @@ namespace eval tomlish::parse { dict set token_waiting tok "eof" return 1 } else { - puts "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" } diff --git a/src/vfs/_vfscommon/modules/overtype-1.6.5.tm b/src/vfs/_vfscommon/modules/overtype-1.6.5.tm index 38ce71c2..492341d6 100644 --- a/src/vfs/_vfscommon/modules/overtype-1.6.5.tm +++ b/src/vfs/_vfscommon/modules/overtype-1.6.5.tm @@ -233,7 +233,6 @@ tcl::namespace::eval overtype { -width \uFFEF\ -height \uFFEF\ -startcolumn 1\ - -wrap 0\ -ellipsis 0\ -ellipsistext $default_ellipsis_horizontal\ -ellipsiswhitespace 0\ @@ -243,11 +242,13 @@ tcl::namespace::eval overtype { -exposed1 \uFFFD\ -exposed2 \uFFFD\ -experimental 0\ - -cp437 1\ + -cp437 0\ -looplimit \uFFEF\ -crm_mode 0\ -reverse_mode 0\ -insert_mode 0\ + -wrap 0\ + -info 0\ -console {stdin stdout stderr}\ ] #expand_right is perhaps consistent with the idea of the page_size being allowed to grow horizontally.. @@ -263,14 +264,19 @@ tcl::namespace::eval overtype { #-ellipsis args not used if -wrap is true foreach {k v} $argsflags { switch -- $k { - -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace + -looplimit - -width - -height - -startcolumn - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -transparent - -exposed1 - -exposed2 - -experimental - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -console { + - -info - -console { tcl::dict::set opts $k $v } + -wrap - -autowrap_mode { + #temp alias -autowrap_mode for consistency with renderline + #todo - + tcl::dict::set opts -wrap $v + } default { error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" } @@ -280,10 +286,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- #review - expand_left for RTL text? set opt_expand_right [tcl::dict::get $opts -expand_right] - ##### - # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. - set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) - ##### #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. set opt_width [tcl::dict::get $opts -width] set opt_height [tcl::dict::get $opts -height] @@ -298,50 +300,34 @@ tcl::namespace::eval overtype { set opt_crm_mode [tcl::dict::get $opts -crm_mode] set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] set opt_insert_mode [tcl::dict::get $opts -insert_mode] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_autowrap_mode [tcl::dict::get $opts -wrap] + #??? wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] + set opt_info [tcl::dict::get $opts -info] - #initial state for renderspace 'terminal' reset - set initial_state [dict create\ - -width $opt_width\ - -height $opt_height\ - -crm_mode $opt_crm_mode\ - -reverse_mode $opt_reverse_mode\ - -insert_mode $opt_insert_mode\ - -cp437 $opt_cp437\ - ] # ---------------------------- # -experimental dev flag to set flags etc # ---------------------------- set data_mode 0 - set info_mode 0 set edit_mode 0 set opt_experimental [tcl::dict::get $opts -experimental] foreach o $opt_experimental { switch -- $o { - old_mode { - set info_mode 1 - } data_mode { set data_mode 1 } - info_mode { - set info_mode 1 - } edit_mode { set edit_mode 1 } } } # ---------------------------- - - #modes - set insert_mode $opt_insert_mode ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l - set autowrap_mode $opt_wrap - set reverse_mode $opt_reverse_mode - set crm_mode $opt_crm_mode set underblock [tcl::string::map {\r\n \n} $underblock] @@ -367,6 +353,20 @@ tcl::namespace::eval overtype { set renderwidth $opt_width set renderheight $opt_height } + #initial state for renderspace 'terminal' reset + set initial_state [dict create\ + renderwidth $renderwidth\ + renderheight $renderheight\ + crm_mode $opt_crm_mode\ + reverse_mode $opt_reverse_mode\ + insert_mode $opt_insert_mode\ + autowrap_mode $opt_autowrap_mode\ + cp437 $opt_cp437\ + ] + #modes + #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l + #opt_startcolumn ?? - DECSLRM ? + set vtstate $initial_state # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -494,50 +494,55 @@ tcl::namespace::eval overtype { } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderargs [list -experimental $opt_experimental\ - -cp437 $opt_cp437\ - -info 1\ - -crm_mode $crm_mode\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ - -reverse_mode $reverse_mode\ - -cursor_restore_attributes $cursor_saved_attributes\ - -transparent $opt_transparent\ - -width $renderwidth\ - -exposed1 $opt_exposed1\ - -exposed2 $opt_exposed2\ - -expand_right $opt_expand_right\ - -cursor_column $col\ - -cursor_row $row\ + set renderargs [list -experimental $opt_experimental\ + -cp437 $opt_cp437\ + -info 1\ + -crm_mode [tcl::dict::get $vtstate crm_mode]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ + -reverse_mode [tcl::dict::get $vtstate reverse_mode]\ + -cursor_restore_attributes $cursor_saved_attributes\ + -transparent $opt_transparent\ + -width [tcl::dict::get $vtstate renderwidth]\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -expand_right $opt_expand_right\ + -cursor_column $col\ + -cursor_row $row\ $undertext\ $overtext\ ] set LASTCALL $renderargs set rinfo [renderline {*}$renderargs] - set instruction [tcl::dict::get $rinfo instruction] - set insert_mode [tcl::dict::get $rinfo insert_mode] - set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# - set reverse_mode [tcl::dict::get $rinfo reverse_mode] + set instruction [tcl::dict::get $rinfo instruction] + tcl::dict::set vtstate crm_mode [tcl::dict::get $rinfo crm_mode] + tcl::dict::set vtstate insert_mode [tcl::dict::get $rinfo insert_mode] + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + tcl::dict::set vtstate reverse_mode [tcl::dict::get $rinfo reverse_mode] #how to support reverse_mode in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? # - review - the answer is probably that we don't need to - it is set/reset only during application of overtext - set crm_mode [tcl::dict::get $rinfo crm_mode] - set rendered [tcl::dict::get $rinfo result] - set overflow_right [tcl::dict::get $rinfo overflow_right] - set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] - set unapplied [tcl::dict::get $rinfo unapplied] - set unapplied_list [tcl::dict::get $rinfo unapplied_list] - set post_render_col [tcl::dict::get $rinfo cursor_column] - set post_render_row [tcl::dict::get $rinfo cursor_row] - set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] - set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] - set visualwidth [tcl::dict::get $rinfo visualwidth] - set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] - set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] - tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + + #Note carefully the difference betw overflow_right and unapplied. + #overflow_right may need to be included in next run before the unapplied data + #overflow_right most commonly has data when in insert_mode + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] - if {0 && $reverse_mode} { + if {0 && [tcl::dict::get $vtstate reverse_mode]} { #test branch - todo - prune puts stderr "---->[ansistring VIEW $replay_codes_overlay] rendered: $rendered" #review @@ -593,19 +598,29 @@ tcl::namespace::eval overtype { #todo - handle potential insertion mode as above for cursor restore? #keeping separate branches for debugging - review and merge as appropriate when stable - tcl::dict::incr instruction_stats $instruction - switch -- $instruction { + set instruction_type [lindex $instruction 0] ;#some instructions have params + tcl::dict::incr instruction_stats $instruction_type + switch -- $instruction_type { reset { #reset the 'renderspace terminal' (not underlying terminal) set row 1 set col 1 - - + set vtstate [tcl::dict::merge $vtstate $initial_state] + #todo - clear screen } {} { + #end of supplied line input #lf included in data set row $post_render_row set col $post_render_col + if {![llength $unapplied_list]} { + if {$overflow_right ne ""} { + incr row + } + } else { + puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" + } + set col $opt_startcolumn } up { @@ -708,17 +723,18 @@ tcl::namespace::eval overtype { puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" - set sub_info [overtype::renderline -info 1\ - -width $renderwidth\ - -insert_mode $insert_mode\ - -autowrap_mode $autowrap_mode\ + set sub_info [overtype::renderline\ + -info 1\ + -width [tcl::dict::get $vtstate renderwidth]\ + -insert_mode [tcl::dict::get $vtstate insert_mode]\ + -autowrap_mode [tcl::dict::get $vtstate autowrap_mode]\ -expand_right [tcl::dict::get $opts -opt_expand_right]\ ""\ $overflow_right\ ] - set foldline [tcl::dict::get $sub_info result] - set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? - set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + set foldline [tcl::dict::get $sub_info result] + tcl::dict::set vtstate insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed..? + tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -745,6 +761,53 @@ tcl::namespace::eval overtype { set col $post_render_col #overflow + unapplied? } + clear_and_move { + #e.g 2J + if {$post_render_row > [llength $outputlines]} { + set row [llength $outputlines] + } else { + set row $post_render_row + } + set col $post_render_col + set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant + set clearedlines [list] + foreach ln $outputlines { + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $renderwidth]\x1b\[0m + if 0 { + + set lineparts [punk::ansi::ta::split_codes $ln] + set numcells 0 + foreach {pt _code} $lineparts { + if {$pt ne ""} { + foreach grapheme [punk::char::grapheme_split $pt] { + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + incr numcells 1 + } + default { + if {$grapheme eq "\u0000"} { + incr numcells 1 + } else { + incr numcells [grapheme_width_cached $grapheme] + } + } + } + + } + } + } + #replays/resets each line + lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \000 $numcells]\x1b\[0m + } + } + set outputlines $clearedlines + #todo - determine background/default to be in effect - DECECM ? + puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" + #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] + + } lf_start { #raw newlines # ---------------------- @@ -780,27 +843,48 @@ tcl::namespace::eval overtype { append rendered $overflow_right set overflow_right "" } else { + #review - we should really make renderline do the work...? set overflow_width [punk::ansi::printing_length $overflow_right] if {$visualwidth + $overflow_width <= $renderwidth} { append rendered $overflow_right set overflow_right "" } else { - if {$visualwidth < $renderwidth} { - set graphemes [punk::char::grapheme_split $overflow_width] - set add "" - set addlen $visualwidth - set remaining_overflow $graphemes - foreach g $graphemes { - set w [overtype::grapheme_width_cached] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - lpop remaining_overflow - } else { - break - } + if {[tcl::dict::get $vtstate autowrap_mode]} { + set outputlines [linsert $outputlines $renderedrow $overflow_right] + set overflow_right "" + set row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } + + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break + } + } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code + } } - append rendered $add set overflow_right [join $remaining_overflow ""] } } @@ -829,14 +913,16 @@ tcl::namespace::eval overtype { #we may also have other control sequences that came after col 80 e.g cursor save if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" - #assertion - there should be no overflow.. - puts $lhs + #assertion - there should be no overflow.. + puts $lhs + } + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode } - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right set row $post_render_row #set row $renderedrow @@ -981,7 +1067,7 @@ tcl::namespace::eval overtype { #normal single-width grapheme overflow #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" set row $post_render_row ;#renderline will not advance row when reporting overflow char - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { incr row set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { @@ -1014,7 +1100,7 @@ tcl::namespace::eval overtype { #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc - if {$autowrap_mode} { + if {[tcl::dict::get $vtstate autowrap_mode]} { if {$renderwidth < 2} { #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character set idx 0 @@ -1055,6 +1141,14 @@ tcl::namespace::eval overtype { set row $post_render_row set col $post_render_col } + set_window_title { + set newtitle [lindex $instruction 1] + puts stderr "overtype::renderspace set_window_title [ansistring VIEW $newtitle] instruction '$instruction'" + # + } + reset_colour_palette { + puts stderr "overtype::renderspace instruction '$instruction' unimplemented" + } default { puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" } @@ -1062,7 +1156,7 @@ tcl::namespace::eval overtype { } - if {!$opt_expand_right && !$autowrap_mode} { + if {!$opt_expand_right && ![tcl::dict::get $vtstate autowrap_mode]} { #not allowed to overflow column or wrap therefore we get overflow data to truncate if {[tcl::dict::get $opts -ellipsis]} { set show_ellipsis 1 @@ -1160,11 +1254,22 @@ tcl::namespace::eval overtype { } set result [join $outputlines \n] - if {$info_mode} { + if {!$opt_info} { + return $result + } else { #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? #append result \n$instruction_stats\n + set inforesult [dict create\ + result $result\ + last_instruction $instruction\ + instruction_stats $instruction_stats\ + ] + if {$opt_info == 2} { + return [pdict -channel none inforesult] + } else { + return $inforesult + } } - return $result } #todo - left-right ellipsis ? @@ -2368,14 +2473,22 @@ tcl::namespace::eval overtype { } else { #linefeed occurred in middle or at end of text #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" - incr cursor_row - if {$idx == -1 || $overflow_idx > $idx} { - #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow - set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + if {$insert_mode == 0} { + incr cursor_row + if {$idx == -1 || $overflow_idx > $idx} { + #don't set overflow_idx higher if it's already set lower and we're adding graphemes to overflow + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to expand_right = 1 + } + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + incr cursor_row + #don't adjust the overflow_idx + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction lf_mid + break ;# could have overdata following the \n - don't keep processing } - set instruction lf_mid - priv::render_unapplied $overlay_grapheme_control_list $gci - break } } @@ -3077,8 +3190,11 @@ tcl::namespace::eval overtype { set idx [expr {$cursor_column -1}] set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] incr idx_over + if {[llength $outcols]} { + priv::render_erasechar 0 [llength $outcols] + } priv::render_unapplied $overlay_grapheme_control_list $gci - set instruction move + set instruction clear_and_move break } 3 { @@ -3749,6 +3865,72 @@ tcl::namespace::eval overtype { } 7OSC - 8OSC { + # OSCs are terminated with ST of either \007 or \x1b\\ - we allow either whether code was 7 or 8 bit + if {[tcl::string::index $codenorm end] eq "\007"} { + set code_content [tcl::string::range $codenorm 4 end-1] ;#ST is \007 + } else { + set code_content [tcl::string::range $codenorm 4 end-2] ;#ST is \x1b\\ + } + set first_colon [tcl::string::first {;} $code_content] + if {$first_colon == -1} { + #there probably should always be a colon - but we'll try to make sense of it without + set osc_code $code_content ;#e.g \x1b\]104\007 vs \x1b\]104\;\007 + } else { + set osc_code [tcl::string::range $code_content 0 $first_colon-1] + } + switch -exact -- $osc_code { + 2 { + set newtitle [tcl::string::range $code_content 2 end] + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list set_window_title $newtitle] + break + } + 4 { + #OSC 4 - set colour palette + #can take multiple params + #e.g \x1b\]4\;1\;red\;2\;green\x1b\\ + set params [tcl::string::range $code_content 1 end] + + puts stderr "overtype::renderline OSC 4 set colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 { + #OSC 10 through 17 - so called 'dynamic colours' + #can take multiple params - each successive parameter changes the next colour in the list + #- e.g if code started at 11 - next param is for 12. 17 takes only one param because there are no more + #10 change text foreground colour + #11 change text background colour + #12 change text cursor colour + #13 change mouse foreground colour + #14 change mouse background colour + #15 change tektronix foreground colour + #16 change tektronix background colour + #17 change highlight colour + set params [tcl::string::range $code_content 2 end] + + puts stderr "overtype::renderline OSC $osc_code set dynamic colours unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + + + } + 18 { + #why is this not considered one of the dynamic colours above? + #https://www.xfree86.org/current/ctlseqs.html + #tektronix cursor color + puts stderr "overtype::renderline OSC 18 - set tektronix cursor color unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + 104 { + #reset colour palette + #we want to do it for the current rendering context only (vvt) - not just pass through to underlying vt + puts stderr "overtype::renderline OSC 104 reset colour palette unimplemented. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction [list reset_colour_palette] + break + } + default { + puts stderr "overtype::renderline OSC - UNIMPLEMENTED. code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } + } } default { @@ -3791,6 +3973,23 @@ tcl::namespace::eval overtype { #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW set in_overflow 1 } + set trailing_nulls 0 + foreach ch [lreverse $outcols] { + if {$ch eq "\u0000"} { + incr trailing_nulls + } else { + break + } + } + if {$trailing_nulls} { + set first_tail_null_posn [expr {[llength $outcols] - $trailing_nulls}] + } else { + set first_tail_null_posn -1 + } + + #puts stderr "first_tail_null_posn: $first_tail_null_posn" + #puts stderr "colview: [ansistring VIEW $outcols]" + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -3865,8 +4064,17 @@ tcl::namespace::eval overtype { } append outstring $gxleader append outstring $sgrleader - if {$idx+1 < $cursor_column} { - append outstring [tcl::string::map {\u0000 " "} $ch] + if {$ch eq "\u0000"} { + if {$cp437_glyphs} { + #map all nulls including at tail to space + append outstring " " + } else { + if {$trailing_nulls && $i < $first_tail_null_posn} { + append outstring " " ;#map inner nulls to space + } else { + append outstring \u0000 + } + } } else { append outstring $ch } @@ -3874,12 +4082,20 @@ tcl::namespace::eval overtype { incr i } #flower.ans good test for null handling - reverse line building - if {![ansistring length $overflow_right]} { - set outstring [tcl::string::trimright $outstring "\u0000"] - } - set outstring [tcl::string::map {\u0000 " "} $outstring] - set overflow_right [tcl::string::trimright $overflow_right "\u0000"] - set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + #review - presence of overflow_right doesn't indicate line's trailing nulls should remain. + #The cells could have been erased? + #if {!$cp437_glyphs} { + # #if {![ansistring length $overflow_right]} { + # # set outstring [tcl::string::trimright $outstring "\u0000"] + # #} + # set outstring [tcl::string::trimright $outstring "\u0000"] + # set outstring [tcl::string::map {\u0000 " "} $outstring] + #} + + + #REVIEW + #set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + #set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] set replay_codes "" if {[llength $understacks] > 0} { @@ -4207,8 +4423,9 @@ tcl::namespace::eval overtype::priv { upvar outcols o upvar understacks ustacks upvar understacks_gx gxstacks + upvar replay_codes_overlay replay #ECH clears character attributes from erased character positions - #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller of render_erasechar should do that mapping and only supply 1 or greater. if {![tcl::string::is integer -strict $count] || $count < 1} { error "render_erasechar count must be integer >= 1" } @@ -4223,8 +4440,9 @@ tcl::namespace::eval overtype::priv { } set num [expr {$end - $start + 1}] set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? - set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] - set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + #DECECM ??? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list $replay]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] ;# ??? review return } proc render_setchar {i c } { diff --git a/src/vfs/_vfscommon/modules/punk/aliascore-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/aliascore-0.1.0.tm index 85d870d7..83c02d0b 100644 --- a/src/vfs/_vfscommon/modules/punk/aliascore-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/aliascore-0.1.0.tm @@ -102,6 +102,8 @@ tcl::namespace::eval punk::aliascore { variable aliases #use absolute ns ie must be prefixed with :: #single element commands are imported if source command already exists, otherwise aliased. multi element commands are aliased + + #functions must be in export list of their source namespace set aliases [tcl::dict::create\ tstr ::punk::lib::tstr\ list_as_lines ::punk::lib::list_as_lines\ @@ -109,11 +111,23 @@ tcl::namespace::eval punk::aliascore { linelist ::punk::lib::linelist\ linesort ::punk::lib::linesort\ pdict ::punk::lib::pdict\ - plist [list ::punk::lib::pdict -roottype list]\ - showlist [list ::punk::lib::showdict -roottype list]\ + plist {::punk::lib::pdict -roottype list}\ + showlist {::punk::lib::showdict -roottype list}\ showdict ::punk::lib::showdict\ ansistrip ::punk::ansi::ansistrip\ stripansi ::punk::ansi::ansistrip\ + ansiwrap ::punk::ansi::ansiwrap\ + colour ::punk::console::colour\ + ansi ::punk::console::ansi\ + color ::punk::console::colour\ + a+ ::punk::console::code_a+\ + A+ {::punk::console::code_a+ forcecolour}\ + a ::punk::console::code_a\ + A {::punk::console::code_a forcecolour}\ + a? ::punk::console::code_a?\ + A? {::punk::console::code_a? forcecolor}\ + smcup ::punk::console::enable_alt_screen\ + rmcup ::punk::console::disable_alt_screen\ ] #*** !doctools diff --git a/src/vfs/_vfscommon/modules/punk/ansi-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/ansi-0.1.1.tm index 267e680e..1a40c952 100644 --- a/src/vfs/_vfscommon/modules/punk/ansi-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/ansi-0.1.1.tm @@ -413,6 +413,7 @@ tcl::namespace::eval punk::ansi { tcl::namespace::export\ {a?} {a+} a \ ansistring\ + ansiwrap\ convert*\ clear*\ cursor_*\ @@ -722,77 +723,11 @@ tcl::namespace::eval punk::ansi { #candidate for zig/c implementation? proc stripansi2 {text} { - - set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } - proc stripansi1 {text} { - - #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW - - variable escape_terminals ;#dict - variable ::punk::ansi::ta::standalone_code_map ;#map to empty string - - set text [convert_g0 $text] - - - set text [tcl::string::map $standalone_code_map $text] - #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm - #\x1b#3 double-height letters top half - #\x1b#4 double-height letters bottom half - #\x1b#5 single-width line - #\x1b#6 double-width line - #\x1b#8 dec test fill screen - - - #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. - - #Theoretically line endings can occur within an ST payload (review e.g title?) - #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) - - set inputlist [split $text ""] - set outputlist [list] - - set in_escapesequence 0 - #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) - - set i 0 - foreach u $inputlist { - set v [lindex $inputlist $i+1] - set uv ${u}${v} - if {$in_escapesequence eq "2b"} { - #2nd byte - done. - set in_escapesequence 0 - } elseif {$in_escapesequence != 0} { - set endseq [tcl::dict::get $escape_terminals $in_escapesequence] - if {$u in $endseq} { - set in_escapesequence 0 - } elseif {$uv in $endseq} { - set in_escapesequence 2b ;#flag next byte as last in sequence - } - } else { - #handle both 7-bit and 8-bit CSI and OSC - if {[regexp {^(?:\033\[|\u009b)} $uv]} { - set in_escapesequence CSI - } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { - set in_escapesequence OSC - } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { - set in_escapesequence DCS - } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { - #SOS,PM,APC - all terminated with ST - set in_escapesequence MISC - } else { - lappend outputlist $u - } - } - incr i - } - return [join $outputlist ""] - } - #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set @@ -2029,7 +1964,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map - set fcposn [lsearch $args "forcecol*"] + set fcposn [lsearch $args "force*"] set fc "" set opt_forcecolour 0 if {$fcposn >= 0} { @@ -2409,7 +2344,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >= 0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -2767,7 +2702,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. set forcecolour 0 - set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + set fcpos [lsearch $args "force*"] ;#allow forcecolor forcecolour if {$fcpos >=0} { set forcecolour 1 set args [lremove $args $fcpos] @@ -3199,6 +3134,16 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emitblock {row col textblock} { + #*** !doctools + #[call [fun move_emitblock] [arg row] [arg col] [arg textblock]] + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + return $commands + } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] @@ -3315,18 +3260,90 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) - #Alt screen buffer + #Alt screen buffer - smcup/rmcup ti/te + #Note \x1b\[?47h doesn't work at all in some terminals (e.g alacritty,cmd on windows and reportedly kitty) + #It is also reported to have 'deceptively similar' effects to 1049 -but to be 'subtly broken' on some terminals. + #see: https://xn--rpa.cc/irl/term.html + #1049 (introduced by xterm in 1998?) considered the more modern version? + #1047,1048,1049 xterm private modes are 'composite' control sequences as replacement for original 47 sequence + #1049 - includes save cursor,switch to alt screen, clear screen + #e.g ? (below example not known to be exactly what 1049 does - but it's something similar) + #SMCUP + # \x1b7 (save cursor) + # \x1b\[?47h (switch) + # \x1b\[2J (clear screen) + #RMCUP + # \x1b\[?47l (switch back) + # \x1b8 (restore cursor) + + #1047 - clear screen on the way out (ony if actually on alt screen) proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? - #\x1b\[?1049h ;#xterm - return \x1b\[?47h + return \x1b\[?1049h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] - #\x1b\[?1049l + return \x1b\[?1049l + } + #47 - less widely supported(?) doesn't restore cursor or clear alt screen + proc enable_alt_screen2 {} { + return \x1b\[?47h + } + proc disable_alt_screen2 {} { return \x1b\[?47l } + proc term_colour_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_color_fg {colour} { + return "\x1b\]10\;$colour\x1b\\" + } + proc term_colour_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_color_bg {colour} { + return "\x1b\]11\;$colour\x1b\\" + } + proc term_colour_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_color_cursor {colour} { + return "\x1b\]12\;$colour\x1b\\" + } + proc term_colour_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_color_pointer_fg {colour} { + return "\x1b\]13\;$colour\x1b\\" + } + proc term_colour_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + proc term_color_pointer_bg {colour} { + return "\x1b\]14\;$colour\x1b\\" + } + #15,16 tektronix fg, tektronix bg ??? + proc term_colour_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + proc term_color_highlight_bg {colour} { + return "\x1b\]17\;$colour\x1b\\" + } + #18 tektronix cursor colour ??? + proc term_colour_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + proc term_color_highlight_fg {colour} { + return "\x1b\]19\;$colour\x1b\\" + } + #22 pointer shape - there are other methods too - not known to work on windows terminal based VTs - review + proc term_colour_reset {} { + return "\x1b\]104\;\x1b\\" + } + proc term_color_reset {} { + return "\x1b\]104\;\x1b\\" + } # -- --- --- proc erase_line {} { @@ -4398,6 +4415,10 @@ tcl::namespace::eval punk::ansi::ta { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } + proc detect_st_open {text} { + variable re_ST_open + expr {[regexp $re_ST_open $text]} + } #not in perl ta proc detect_csi {text} { @@ -4672,7 +4693,8 @@ tcl::namespace::eval punk::ansi::class { } oo::class create base_renderer { variable o_width - variable o_wrap o_overflow o_appendlines o_looplimit + variable o_autowrap_mode + variable o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered @@ -4690,25 +4712,33 @@ tcl::namespace::eval punk::ansi::class { } tcl::namespace::path $nspath #-- -- - if {[llength $args] < 2} { - error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + if {[llength $args] < 1} { + error {usage: ?-width ? ?-height ? ?-autowrap_mode [1|0]? ?-overflow [1|0]? from_ansistring} } - lassign [lrange $args end-1 end] from_ansistring to_ansistring + #lassign [lrange $args end-1 end] from_ansistring to_ansistring + set from_ansistring [lindex $args end] + set opts [tcl::dict::create\ - -width \uFFEF\ - -wrap 1\ - -overflow 0\ - -appendlines 1\ - -looplimit 15000\ - -experimental {}\ - -cursor_column 1\ - -cursor_row 1\ + -width \uFFEF\ + -height \uFFEF\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + -insert_mode 0\ + -autowrap_mode 1\ + -initial_ansistring ""\ ] - puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] foreach {k v} $argsflags { switch -- $k { - -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + -width - -height - + -overflow - -appendlines - -looplimit - -experimental - + -autowrap_mode - + -insert_mode - + -initial_ansistring { tcl::dict::set opts $k $v } default { @@ -4717,8 +4747,19 @@ tcl::namespace::eval punk::ansi::class { } } } + set initial_ansistring [tcl::dict::get $opts -initial_ansistring] + if {$initial_ansistring eq ""} { + set to_ansistring [punk::ansi::class::class_ansistring new ""] + } else { + #todo - verify obj vs raw string + set to_ansistring $initial_ansistring + } + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set o_width [tcl::dict::get $opts -width] - set o_wrap [tcl::dict::get $opts -wrap] + set o_height [tcl::dict::get $opts -height] + set o_autowrap_mode [tcl::dict::get $opts -autowrap_mode] + set o_insert_mode [tcl::dict::get $opts -insert_mode] set o_overflow [tcl::dict::get $opts -overflow] set o_appendlines [tcl::dict::get $opts -appendlines] set o_looplimit [tcl::dict::get $opts -looplimit] @@ -4736,6 +4777,9 @@ tcl::namespace::eval punk::ansi::class { method eval_in {script} { eval $script } + method renderbuf {} { + return $o_to_ansistring + } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column @@ -4755,6 +4799,12 @@ tcl::namespace::eval punk::ansi::class { set o_cursor_row $row } + #set/query cursor state + method cursor_state {args} { + lassign $args r c + return [dict create row [my cursor_row $r] column [my cursor_column $c]] + } + #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { @@ -4834,7 +4884,8 @@ tcl::namespace::eval punk::ansi::class { #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] } } - + #renderspace equivalent? channel based? + #todo $o_to_ansistring append $newtext return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] @@ -4960,11 +5011,10 @@ tcl::namespace::eval punk::ansi::class { if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" - } - if {$o_renderout ne ""} { - append result \n " render target ansistring: $o_renderout" - append result \n " render target has ansi : [$o_renderout has_ansi]" - append result \n " render target count : [$o_renderout count]" + set renderstring [$o_renderer renderbuf] + append result \n " render target ansistring: $renderstring" + append result \n " render target has ansi : [$renderstring has_ansi]" + append result \n " render target count : [$renderstring count]" } if {$verbose} { append result \n "ansisplits listing" @@ -5052,7 +5102,8 @@ tcl::namespace::eval punk::ansi::class { } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} - + #review + return [string length [join $o_ptlist ""]] } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { @@ -5119,10 +5170,9 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } - if {$o_renderout eq ""} { - #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? - set o_renderout [punk::ansi::class::class_ansistring new ""] - } + #if {$o_renderout eq ""} { + # set o_renderout [punk::ansi::class::class_ansistring new ""] + #} if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [tcl::namespace::tail $oinfo] @@ -5130,13 +5180,15 @@ tcl::namespace::eval punk::ansi::class { if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } else { return $currenttype } } else { puts "creating first renderer" - set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + #set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self]] } } #--- progressive rendering buffer - another ansistring object @@ -5149,10 +5201,13 @@ tcl::namespace::eval punk::ansi::class { return $o_renderwidth } #re-render if needed? - + puts stderr "renderwidth todo? re-render?" set o_renderwidth $rw } + method renderer {} { + return $o_renderer + } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. @@ -5160,10 +5215,36 @@ tcl::namespace::eval punk::ansi::class { } method renderbuf {} { #get the underlying renderobj - if any - return $o_renderout ;#also class_ansistring + #return $o_renderout ;#also class_ansistring + return [$o_renderer renderbuf] } - method render {} { + method render {{maxgraphemes ""}} { #full render - return buffer ansistring + set do_render 1 + set grapheme_count 0 + set other_count 0 + if {$maxgraphemes eq ""} { + while {$do_render} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } else { + while {$do_render && $grapheme_count <= $maxgraphemes} { + set rendition [my rendernext] + set do_render [dict get $rendition rendercount] + if {[dict get $rendition type] eq "g"} { + incr grapheme_count $do_render + } else { + incr other_count $do_render + } + } + } + return [dict create graphemes $grapheme_count other $other_count] } method rendernext {} { #render next available pt/code chunk only - not to end of available input @@ -5198,6 +5279,13 @@ tcl::namespace::eval punk::ansi::class { #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + #like Tcl's append class_ansistring append returns the result directly - which for ANSI - can be inconvenient in the terminal + #class_ansistring append_string is a convenience wrapper to avoid returning the raw result + method append_string {args} { + my append {*}$args + return + } + #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { @@ -5369,7 +5457,7 @@ tcl::namespace::eval punk::ansi::class { } - #method append_and_render - append and render up to end of appended data at same time + #method append_and_render? - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { diff --git a/src/vfs/_vfscommon/modules/punk/blockletter-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/blockletter-0.1.0.tm index 244190fa..9f07ec56 100644 --- a/src/vfs/_vfscommon/modules/punk/blockletter-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/blockletter-0.1.0.tm @@ -20,12 +20,14 @@ #*** !doctools #[manpage_begin shellspy_module_punk::blockletter 0 0.1.0] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] +#[titledesc {punk::blockletter frame-based large lettering test/logo}] [comment {-- Name section and table of contents description --}] #[moddesc {-}] [comment {-- Description at end of page heading --}] #[require punk::blockletter] #[keywords module] #[description] -#[para] - +#[para] This is primarily designed to test large lettering using the block2 frametype which requires the right font support +#[para] More reasonably sized block-lettering could be obtained using unicode half-blocks instead - but that doesn't allow the frame outline effect that block2 gives. +#[para] Individual blocks have a minimum width of 4 columns and a minimum height of 2 rows (smallest element that can be fully framed) # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ diff --git a/src/vfs/_vfscommon/modules/punk/config-0.1.tm b/src/vfs/_vfscommon/modules/punk/config-0.1.tm index 6e5dbeed..dd7ae873 100644 --- a/src/vfs/_vfscommon/modules/punk/config-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/config-0.1.tm @@ -60,12 +60,19 @@ tcl::namespace::eval punk::config { } # auto_exec_mechanism ;#whether to use exec instead of experimental shellfilter::run + + #optional channel transforms on stdout/stderr. + #can sometimes be useful to distinguish eventloop stdout/stderr writes compared to those triggered directly from repl commands + #If no distinction necessary - should use default_color_ + #The counterpart: default_color__repl is a transform that is added and removed with each repl evaluation. #startup color_stdout - parameters as suitable for punk::ansi::a+ (test with 'punk::ansi::a?') e.g "cyan bold" ;#not a good idea to default - set default_color_stdout "" + set default_color_stdout brightwhite ;#stdout colour including background calls (after etc) + set default_color_stdout_repl "" ;#stdout colour applied during direct repl call only #This wraps the stderr stream as it comes in with Ansi - probably best to default to empty.. but it's useful. #set default_color_stderr "red bold" #set default_color_stderr "web-lightsalmon" set default_color_stderr yellow ;#limit to basic colours for wider terminal support. yellow = term-olive + set default_color_stderr_repl "" ;#during repl call only set homedir "" if {[catch { @@ -132,7 +139,9 @@ tcl::namespace::eval punk::config { configset ".punkshell"\ scriptlib $default_scriptlib\ color_stdout $default_color_stdout\ + color_stdout_repl $default_color_stdout_repl\ color_stderr $default_color_stderr\ + color_stderr_repl $default_color_stderr_repl\ logfile_stdout $default_logfile_stdout\ logfile_stderr $default_logfile_stderr\ logfile_active 0\ @@ -172,9 +181,11 @@ tcl::namespace::eval punk::config { PUNK_CONFIGSET {type string}\ PUNK_SCRIPTLIB {type string}\ PUNK_AUTO_EXEC_MECHANISM {type string}\ - PUNK_AUTO_NOEXEC {type string default 0 help "set 1 to set Tcl's ::auto_noexec true.\nStops 'unknown' from running external programs"}\ - PUNK_COLOR_STDERR {type string}\ - PUNK_COLOR_STDOUT {type string}\ + PUNK_AUTO_NOEXEC {type string default 0 help "set 1 to set Tcl's ::auto_noexec true.\nStops 'unknown' from running external programs"}\ + PUNK_COLOR_STDERR {type string help "stderr colour transform. Use 'punk::ansi::a?' to see colour names"}\ + PUNK_COLOR_STDERR_REPL {type string help "stderr colour transform only while command running (not active during 'after')"}\ + PUNK_COLOR_STDOUT {type string help "stdout colour transform. Use 'punk::ansi::a?' to see colour names"}\ + PUNK_COLOR_STDOUT_REPL {type string help "stdout colour transform only while command running (not active during 'after')"}\ PUNK_LOGFILE_STDOUT {type string}\ PUNK_LOGFILE_STDERR {type string}\ PUNK_LOGFILE_ACTIVE {type string}\ diff --git a/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm index 4dd7bd66..e367ce9e 100644 --- a/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/console-0.1.1.tm @@ -864,6 +864,7 @@ namespace eval punk::console { #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? #It will stop underlines/bold/reverse as well as SGR colours #what about ansi movement codes etc? + #we already have colour on|off which disables SGR codes in a+ etc as well as stderr/stdout channel transforms proc ansi {{onoff {}}} { variable ansi_wanted if {[string length $onoff]} { @@ -891,6 +892,7 @@ namespace eval punk::console { } } catch {punk::repl::reset_prompt} + puts stderr "::punk::console::ansi - use 'colour' command to turn SGR codes on/off" return [expr {$ansi_wanted}] } @@ -1295,10 +1297,10 @@ namespace eval punk::console { if {![catch {twapi::set_console_title $windowtitle} result]} { return $windowtitle } else { - error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + error "punk::console::local::titleset failed to set title - try punk::console::ansi::titleset" } } else { - error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + error "punk::console::local::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" } } proc titleget {} { @@ -1306,12 +1308,12 @@ namespace eval punk::console { if {![catch {twapi::get_console_title} result]} { return $result } else { - error "punk::console::titleset failed to set title - ensure twapi is available" + error "punk::console::local::titleset failed to set title - ensure twapi is available" } } else { #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title - # won't work on all platforms/terminals - but may be worth implementing - error "punk::console::titleget has no local mechanism to get the window title on this platform." + # won't work on all platforms/terminals - but may be worth implementing (for overtype::renderspace / frames etc) + error "punk::console::local::titleget has no local mechanism to get the window title on this platform." } } } @@ -1327,7 +1329,7 @@ namespace eval punk::console { if { $ansi_wanted <= 0} { punk::console::local::titleset $windowtitle } else { - tailcall ansi::titleset $windowtitle + ansi::titleset $windowtitle } } #no known pure-ansi solution @@ -1486,8 +1488,6 @@ namespace eval punk::console { namespace import ansi::insert_lines namespace import ansi::delete_lines - interp alias {} smcup {} ::punk::console::enable_alt_screen - interp alias {} rmcup {} ::punk::console::disable_alt_screen #experimental proc rhs_prompt {col text} { @@ -1881,12 +1881,6 @@ namespace eval punk::console { -interp alias {} colour {} punk::console::colour -interp alias {} ansi {} punk::console::ansi -interp alias {} color {} punk::console::colour -interp alias {} a+ {} punk::console::code_a+ -interp alias {} a {} punk::console::code_a -interp alias {} a? {} punk::console::code_a? diff --git a/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm index 96feac53..f1934d3c 100644 --- a/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm @@ -219,7 +219,8 @@ tcl::namespace::eval punk::nav::fs { } if {[punk::nav::fs::system::codethread_is_running]} { if {[llength [info commands ::punk::console::titleset]]} { - ::punk::console::titleset [lrange $result 1 end] + #if ansi is off - punk::console::titleset will try 'local' api method - which can fail + catch {::punk::console::titleset [lrange $result 1 end]} } } if {[string match //zipfs:/* $location]} { @@ -489,7 +490,7 @@ tcl::namespace::eval punk::nav::fs { tsv::lappend repl runchunks-$repl_runid {*}$chunklist } if {[llength [info commands ::punk::console::titleset]]} { - ::punk::console::titleset [lrange $result 1 end] ;#strip location key + catch {::punk::console::titleset [lrange $result 1 end]} ;#strip location key } } if {$repl_runid == 0} { diff --git a/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm index 70f924d7..cf0bf70c 100644 --- a/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm @@ -1561,7 +1561,8 @@ tcl::namespace::eval punk::ns { #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] - set origin [nseval $targetns [list ::namespace origin $name]] + set origin [nseval $targetns [list ::namespace origin $name]] + set resolved [nseval $targetns [list ::namespace which $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { @@ -1613,7 +1614,8 @@ tcl::namespace::eval punk::ns { } lappend argl $a } - list proc [nsjoin ${targetns} $name] $argl $body + #list proc [nsjoin ${targetns} $name] $argl $body + list proc $resolved $argl $body } diff --git a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm index 864c4030..937988cf 100644 --- a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm @@ -31,7 +31,9 @@ package require shellfilter #package require punk package require punk::lib package require punk::aliascore -punk::aliascore::init +if {[catch {punk::aliascore::init} errM]} { + puts stderr "punk::aliascore::init error: $errM" +} package require punk::config package require punk::ns package require punk::ansi @@ -2576,8 +2578,41 @@ namespace eval repl { } } proc colour args { - thread::send %replthread% [list punk::console::colour {*}$args] - interp eval code [list punk::console::colour {*}$args] + set colour_state [thread::send %replthread% [list punk::console::colour]] + if {[llength $args]} { + #colour call was not a query + set new_state [thread::send %replthread% [list punk::console::colour {*}$args]] + if {[expr {$new_state}] ne [expr {$colour_state}]} { + interp eval code [list punk::console::colour {*}$args] ;#align code interp's view of colour state with repl thread + interp eval code [string map [list $new_state] { + #adjust channel transform stack + set docolour [expr {}] + if {!$docolour} { + set s [lindex $::codeinterp::outstack end] + if {$s ne ""} { + shellfilter::stack::remove stdout $s + } + set s [lindex $::codeinterp::errstack end] + if {$s ne ""} { + shellfilter::stack::remove stderr $s + } + } else { + set running_config $::punk::config::running + if {[string length [dict get $running_config color_stdout]]} { + lappend ::codeinterp::outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + } + if {[string length [dict get $running_config color_stderr]]} { + lappend ::codeinterp::errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] + } + + } + }] + } + return $new_state + } else { + return $colour_state + } + #todo - add/remove shellfilter stacked ansiwrap } proc mode args { thread::send %replthread% [list punk::console::mode {*}$args] @@ -2686,6 +2721,10 @@ namespace eval repl { #review argv0,argv,argc interp eval code { + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } set ::argv0 %argv0% set ::auto_path %autopath% #puts stdout "safe interp" @@ -2724,6 +2763,10 @@ namespace eval repl { set ::auto_path %autopath% #puts stdout "safe interp" #flush stdout + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } } interp eval code [list set ::tcl_platform(os) $::tcl_platform(os)] interp eval code [list set ::tcl_platform(osVersion) $::tcl_platform(osVersion)] @@ -2775,7 +2818,11 @@ namespace eval repl { set ::auto_path %autopath% tcl::tm::remove {*}[tcl::tm::list] tcl::tm::add {*}[lreverse %tmlist%] - #puts "-->[chan names]" + puts "code interp chan names-->[chan names]" + namespace eval ::codeinterp { + variable errstack {} + variable outstack {} + } # -- --- #review @@ -2805,11 +2852,22 @@ namespace eval repl { #catch {package require packageTrace} package require punk package require shellrun + + package require shellfilter + set running_config $::punk::config::running + if {[string length [dict get $running_config color_stderr]] && [punk::console::colour]} { + lappend ::codeinterp::errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] + } + if {[string length [dict get $running_config color_stdout]] && [punk::console::colour]} { + lappend ::codeinterp::outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + } + package require textblock } errM]} { puts stderr "========================" puts stderr "code interp error:" puts stderr $errM + puts stderr $::errorInfo puts stderr "========================" error "$errM" } diff --git a/src/vfs/_vfscommon/modules/punk/repl/codethread-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/repl/codethread-0.1.0.tm index 99eac01b..09b8a0be 100644 --- a/src/vfs/_vfscommon/modules/punk/repl/codethread-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/repl/codethread-0.1.0.tm @@ -151,16 +151,19 @@ tcl::namespace::eval punk::repl::codethread { puts stderr "runscript is meant to be called from the parent repl thread via a thread::send to the codethread" return } + set outstack [list] + set errstack [list] upvar ::punk::config::running running_config - if {[string length [dict get $running_config color_stdout]] && [punk::console::colour]} { - lappend outstack [shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout]]] + if {[string length [dict get $running_config color_stdout_repl]] && [interp eval code punk::console::colour]} { + lappend outstack [interp eval code [list shellfilter::stack::add stdout ansiwrap -settings [list -colour [dict get $running_config color_stdout_repl]]]] } - lappend outstack [shellfilter::stack::add stdout tee_to_var -settings {-varname ::punk::repl::codethread::output_stdout}] - if {[string length [dict get $running_config color_stderr]] && [punk::console::colour]} { - lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr]]] - #lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour cyan]] + lappend outstack [interp eval code [list shellfilter::stack::add stdout tee_to_var -settings {-varname ::punk::repl::codethread::output_stdout}]] + + if {[string length [dict get $running_config color_stderr_repl]] && [interp eval code punk::console::colour]} { + lappend errstack [interp eval code [list shellfilter::stack::add stderr ansiwrap -settings [list -colour [dict get $running_config color_stderr_repl]]]] + # #lappend errstack [shellfilter::stack::add stderr ansiwrap -settings [list -colour cyan]] } - lappend errstack [shellfilter::stack::add stderr tee_to_var -settings {-varname ::punk::repl::codethread::output_stderr}] + lappend errstack [interp eval code [list shellfilter::stack::add stderr tee_to_var -settings {-varname ::punk::repl::codethread::output_stderr}]] #an experiment #set errhandle [shellfilter::stack::item_tophandle stderr] @@ -177,8 +180,8 @@ tcl::namespace::eval punk::repl::codethread { #interp transfer code $errhandle "" #flush $errhandle - set lastoutchar [string index [punk::ansi::ansistrip $output_stdout] end] - set lasterrchar [string index [punk::ansi::ansistrip $output_stderr] end] + set lastoutchar [string index [punk::ansi::ansistrip [interp eval code set ::punk::repl::codethread::output_stdout]] end] + set lasterrchar [string index [punk::ansi::ansistrip [interp eval code set ::punk::repl::codethread::output_stderr]] end] #puts stderr "-->[ansistring VIEW -lf 1 $lastoutchar$lasterrchar]" set tid [thread::id] @@ -188,11 +191,12 @@ tcl::namespace::eval punk::repl::codethread { tsv::set codethread_$tid errorcode $::errorCode + #only remove from shellfilter::stack the items we added to stack in this function foreach s [lreverse $outstack] { - shellfilter::stack::remove stdout $s + interp eval code [list shellfilter::stack::remove stdout $s] } foreach s [lreverse $errstack] { - shellfilter::stack::remove stderr $s + interp eval code [list shellfilter::stack::remove stderr $s] } thread::cond notify $replthread_cond } diff --git a/src/vfs/_vfscommon/modules/punk/rest-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/rest-0.1.0.tm new file mode 100644 index 00000000..2198f2c6 --- /dev/null +++ b/src/vfs/_vfscommon/modules/punk/rest-0.1.0.tm @@ -0,0 +1,296 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) DKF (based on DKF's REST client support class) +# (C) 2024 JMN - packaging/possible mods +# +# @@ Meta Begin +# Application punk::rest 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::rest 0 0.1.0] +#[copyright "2024"] +#[titledesc {punk::rest}] [comment {-- Name section and table of contents description --}] +#[moddesc {experimental rest}] [comment {-- Description at end of page heading --}] +#[require punk::rest] +#[keywords module rest http] +#[description] +#[para] Experimental *basic rest as wrapper over http lib - use tcllib's rest package for a more complete implementation of a rest client + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::rest +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::rest +#[list_begin itemized] + +package require Tcl 8.6- +package require http +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::rest::class { + #*** !doctools + #[subsection {Namespace punk::rest::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::rest { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + + + + #*** !doctools + #[subsection {Namespace punk::rest}] + #[para] Core API functions for punk::rest + #[list_begin definitions] + + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + set objname [namespace current]::matrixchain + if {$objname ni [info commands $objname]} { + # Support class for RESTful web services. + # This wraps up the http package to make everything appear nicer. + oo::class create CLIENT { + variable base wadls acceptedmimetypestack + + constructor baseURL { + set base $baseURL + my LogWADL $baseURL + } + + # TODO: Cookies! + + method ExtractError {tok} { + return [http::code $tok],[http::data $tok] + } + + method OnRedirect {tok location} { + upvar 1 url url + set url $location + # By default, GET doesn't follow redirects; the next line would + # change that... + #return -code continue + set where $location + my LogWADL $where + if {[string equal -length [string length $base/] $location $base/]} { + set where [string range $where [string length $base/] end] + return -level 2 [split $where /] + } + return -level 2 $where + } + + method LogWADL url { + return;# do nothing + set tok [http::geturl $url?_wadl] + set w [http::data $tok] + http::cleanup $tok + if {![info exist wadls($w)]} { + set wadls($w) 1 + puts stderr $w + } + } + + method PushAcceptedMimeTypes args { + lappend acceptedmimetypestack [http::config -accept] + http::config -accept [join $args ", "] + return + } + method PopAcceptedMimeTypes {} { + set old [lindex $acceptedmimetypestack end] + set acceptedmimetypestack [lrange $acceptedmimetypestack 0 end-1] + http::config -accept $old + return + } + + method DoRequest {method url {type ""} {value ""}} { + for {set reqs 0} {$reqs < 5} {incr reqs} { + if {[info exists tok]} { + http::cleanup $tok + } + set tok [http::geturl $url -method $method -type $type -query $value] + if {[http::ncode $tok] > 399} { + set msg [my ExtractError $tok] + http::cleanup $tok + return -code error $msg + } elseif {[http::ncode $tok] > 299 || [http::ncode $tok] == 201} { + set location {} + if {[catch { + set location [dict get [http::meta $tok] Location] + }]} { + http::cleanup $tok + error "missing a location header!" + } + my OnRedirect $tok $location + } else { + set s [http::data $tok] + http::cleanup $tok + return $s + } + } + error "too many redirections!" + } + + method GET args { + return [my DoRequest GET $base/[join $args /]] + } + + method POST {args} { + set type [lindex $args end-1] + set value [lindex $args end] + set m POST + set path [join [lrange $args 0 end-2] /] + return [my DoRequest $m $base/$path $type $value] + } + + method PUT {args} { + set type [lindex $args end-1] + set value [lindex $args end] + set m PUT + set path [join [lrange $args 0 end-2] /] + return [my DoRequest $m $base/$path $type $value] + } + + method DELETE args { + set m DELETE + my DoRequest $m $base/[join $args /] + return + } + export GET POST PUT DELETE + } + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::rest ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::rest::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::rest::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::rest::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::rest::system { + #*** !doctools + #[subsection {Namespace punk::rest::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::rest [tcl::namespace::eval punk::rest { + variable pkg punk::rest + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm b/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm index e1983653..4f887fd5 100644 --- a/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm +++ b/src/vfs/_vfscommon/modules/shellfilter-0.1.9.tm @@ -654,6 +654,7 @@ namespace eval shellfilter::chan { #detect will detect ansi SGR and gron groff and other codes if {[punk::ansi::ta::detect $buf]} { #split_codes_single regex faster than split_codes - but more resulting parts + #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc) set parts [punk::ansi::ta::split_codes_single $buf] #process all pt/code pairs except for trailing pt foreach {pt code} [lrange $parts 0 end-1] { @@ -725,21 +726,70 @@ namespace eval shellfilter::chan { } else { #puts "-->esc but no detect" #no complete ansi codes - but at least one esc is present - if {[string first \x1b $buf] == [llength $buf]-1} { + if {[string last \x1b $buf] == [llength $buf]-1} { #only esc is last char in buf #puts ">>trailing-esc<<" set o_buffered \x1b set emit [string range $buf 0 end-1] } else { + set emit_anyway 0 #todo - ensure non-ansi escapes in middle of chunks don't lead to ever growing buffer - append o_buffered $chunk - set emit "" + if {[punk::ansi::ta::detect_st_open $buf]} { + #no detect - but we have an ST open (privacy msg etc) - allow a larger chunk before we give up - could include newlines (and even nested codes - although not widely interpreted that way in terms) + set st_partial_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of unclosed ST code + #todo - configurable ST max - use 1k for now + if {$st_partial_len < 1001} { + append o_buffered $chunk + set emit "" + } else { + set emit_anyway 1 + } + } else { + set possible_code_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of possible code + #most opening sequences are 1,2 or 3 chars - review? + set open_sequence_detected [punk::ansi::ta::detect_open $buf] + if {$possible_code_len > 10 && !$open_sequence_detected} { + set emit_anyway 1 + } else { + #could be composite sequence with params - allow some reasonable max sequence length + #todo - configurable max sequence length + #len 40-50 quite possible for SGR sequence using coloured underlines etc, even without redundancies + # - allow some headroom for redundant codes when the caller didn't merge. + if {$possible_code_len < 101} { + append o_buffered $chunk + set emit "" + } else { + #allow a little more grace if we at least have an opening ansi sequence of any type.. + if {$open_sequence_detected && $possible_code_len < 151} { + append o_buffered $chunk + set emit "" + } else { + set emit_anyway 1 + } + } + } + } + if {$emit_anyway} { + #looked ansi-like - but we've given enough length without detecting close.. + #treat as possible plain text with some esc or unrecognised ansi sequence + if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { + set emit $o_do_colour$buf$o_do_normal + } else { + set emit $buf + } + } } } } else { #no esc #puts stdout [a+ yellow]...[a] - set emit $buf + #test! + if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { + set emit $o_do_colour$buf$o_do_normal + } else { + set emit $buf + } + #set emit $buf set o_buffered "" } return [dict create emit $emit stacksize [llength $o_codestack]] diff --git a/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm b/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm index 885c56a1047c8fb58c59a7777af8743df10eb957..4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd 100644 GIT binary patch delta 2720 zcmV;R3SafDz5(^X0kA9_0|>7uvo9PkC=s0vDOqXs_-;Z3004j-001wO@jWJ!SY8Z& z?U`+B<33izz0=l4fUSU*?f!R{G%3ItxatDGW^sZj9i}Eo%9XaMEb7PK@vqNQ7Z&Ce$`j z^y81xj^X2jbYoLFU_VliK00x!2IKX^zXlZJqH0N zFuS_`^X&2q4CY%fwWJAcCJIcYPU#`e&-7KIf&xfwWsVS(UdD*;Hw_y-iMVLp@OGu(8Qgj*3;nj(HR0>&ZOs zg}Abfbu1T{bm7Zcp`o^h+8Sb^8|zRJO4B^A_t>Uh%s(KnhjH1+ALG zL|V(LnKJ(99c+7l^U%!^LLDQ`vThPyceQ0n_es4P*_%wbY}^EHySr-6v%9dPejD?M z=*YNq@xCtg%kU)L(IS_5s>#Z)UK(E`wr$&Y?2GALA5(zlb-={Ja0$&q@IMEqCDVJD z)?wQG-sL%%nmcHY{u(z@ufZZMj45#{l4PbVX&`F)0uVT5#{1pxF1>l)_<|<+!6vw~ z*l4tfYmvqrpmzT`V;_<(CG$OFyTsUUUoS3BFRuwKADgYwialNPhr7drdHbtgjtU(% zzv~?yd9XCzd z?ru1ovT-sRjozNojI=q_TxkWVOlB}nbI=nGI=$sm{L8dRB`D2q_&X3B#(6r$vT1^E z<|)(!zJCyUk`@}sj(?Chz~l^KrkT=t5$Pi5kfbcek{=HC=`Ul^En0+}-xUH&b(Bz5 zDw1r<6_#`0_bh>4&-Bd*<_G(G%VIIj(p-~*t=#MVh~@Vz7i^SZ?Z=Eqn#b=U2z+^z zPNz5`vbCqU#xAHL(N<{>|1GE!^nq-e5rz*FpxpsM}KQx5tA`U{*VaC71@Df zG+$(J|>v|%+9 zmSr3};6`)glucP>YJ{`y+0m)9;w0A6VOAgtWZC~pRn2!XhB*}izq8VX8^7HRs6)hnB=9 zkCwe#J|hQd5^2RgfF)#P(ZNW?!?i@RlVB8d7`__AH-97WkJEqt{%{TqPGEl3|8#cw z8;lkkz_Y}LtB7Sh6f(qZnRe+WI#vOMR3heJm0k=I5j4=jq9bUB37QY|VoQk9amdD+ z=OLeFdRCES6ediY+yT>tT;CNbO8Fg^7#xu-DM>Ro)*@gdG-IeU772g%DndIxcbm_R=xhm*HVTHAlda3%TnIG; zsvlilMKf0C_`zfOF}6- z#4jcnWfH2|P}K&PrMe`N*}O7#2Oh|oP?Tj#;J6j%5EpbA1{{|vqHHvo!Bi+ks!@`f zr++c9sh6O8*rj$WO|oEM@1N?jLGYwrh3s|2bTsjS+sv;PG`b6QGpu9o5WPutSYDuo zW*VNv+tr9g97?QZW-oO2diA<>pWT|?)jnBhUIvWI>6Xxxg8w-rvco}u;k=`4w^r6uv(np@Iev$uX>+72))jg~%P``2&_ z2V2uKN(1e?@MG;Leeu_M|E%1T?eotMpZaDY-3DEnYw@JAXt;qd)vp`3aw~KHg9C3| zLB7=6pmVp*-M)>xRrgwN=6CdPy?^Y}??F~Q(f2{?XLt}4Q5ti~;GHjKc^Ywh1VguU z?&#DgqcT2-d6Z}DW$s6@ZczNhe)+j`zs~)>g8Qura&rsf;tM*Q=V|VLhgZU2-SvE* zGZk6_AH0<#Lg$H{Cw?@6bJwS00;o> z2r9E@O}qpH2(Ku!CQ|JS4V?`sS!wk6ZbAeA0DzMqUQi%!2P#?k<5xLr1polwDF6U5 z0000000000000000OLrLL0lh`uU;S^?FcGazJF@S2?PKD^a=m~CIA2c0000000000 a005#+lR;b$qV9n>fg@H z&3^1A&|q&p;lIHProCcC1@XK;1Nttssaa+?^$SUFoaXjz(pRsG>n80u_J6POZtciT zbLY&JIVvKY6#03Z{$~&C-Tx9UMlkOTGd}T2=nc!kXWv&mwLf$!rg>vh&wK}^lvF#_ zyCE0bcY7H(_WNJ*`*e!w_{1t6r}GM;VG_~xVV!HeSf+MQF|n7I{}``0sk)0p)h=Yx z7PG)Nr?hmJWq9;A^gB83fB)+l%jd_7?*-{Dca4iO`*7~>Km8^3B1=V-UGJS|RTH~> zUcfcCa+3dY6Q#bZ#u+QQmjzvu{`#W+;M@oLGM9S|>SlRJOU=FRY<9)QHG5@?pX1}n zlcac$H`WJp#fugQMcK$ceR*d4YpG)&({~tO?TDJm|HXm1He%m}<4HGWT;;vjs{hw+ zxk8zy#X^aDL7j^nUO7md^)x8krFNwwZLjF@XNq1Eet5XLsuVo9CTqPbdgj47r49UX zyEvFX16jk~^mFdc!nF zjjMIpku!5ypK7-3TCOu={c(Gd%WLgko5iA0(mq<+@)PH1#cio|S>RT2>_olVWG?%? z-RGI-{`PqumXjAD^Y@o{uuzTi&l789qa7Jh|y)*Zbs*6ek!u%E4KwET*?=LJ@m zvqHzBE-$@wVBLAH9Cx^^htK>k&#jWGx=fvMiDL@Y4(pyPaA~q{ExJczexF?g^~A9ALv0 z@4qZ@HuKU6ZL7U`J=Q-Iy42ObpFuN}GkjqdTW}`NY&EU&aPfrO=`C`6^IuKKoaV}0 z|ET`TRNh76Z>3V+3nfnWl-}`n-YVAqcc*&VW6v-zdVJ^D*NKsDmOXUc)9kKwIb!nk z*?TSKJSnZ+?f>T7mv!GyI)64xa5CNAvP|sH`GYS{d|_NxI>StU+8(yx>f5Jp#y@s@ zWg4-hk@IR#J9PeoZ@2wDYFa+6$L-|Cg23Y+Ao=8sN>$ zBErDHz`=lCz!(Ax7{$r*p2DLA%xD2KTEL9l0>+C<1>> _get_keyval_value from '$keyval_element'<<<" + log::notice ">>> _get_keyval_value from '$keyval_element'<<<" set found_value 0 #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) @@ -135,7 +117,7 @@ namespace eval tomlish { foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { - STRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { + STRING - MULTISTRING - INT - FLOAT - BOOL - DATETIME - TABLE - ARRAY - ITABLE { set type [lindex $sub 0] set value [lindex $sub 1] set found_sub $sub @@ -156,10 +138,14 @@ namespace eval tomlish { #simple (non-container, no-substitution) datatype set result [list type $type value $value] } - STRING { + STRING - STRINGPART { set result [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ITABLE - ARRAY { + LITSTRING { + #REVIEW + set result [list type $type value $value] + } + TABLE - ITABLE - ARRAY - MULTISTRING { #jmn2024 - added ITABLE - review #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! @@ -188,7 +174,7 @@ namespace eval tomlish { variable tablenames_seen [list] - puts ">>> processing '$tomlish'<<<" + log::info ">>> processing '$tomlish'<<<" set items $tomlish foreach lst $items { @@ -208,7 +194,7 @@ namespace eval tomlish { #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { KEYVAL - QKEYVAL { - puts "--> processing $tag: $item" + log::debug "--> processing $tag: $item" set key [lindex $item 1] #!todo - normalize key. (may be quoted/doublequoted) @@ -228,7 +214,7 @@ namespace eval tomlish { error "Table name '$tablename' has already been directly defined in the toml data. Invalid." } - puts "--> processing $tag (name: $tablename): $item" + log::debug "--> processing $tag (name: $tablename): $item" set name_segments [::tomlish::utils::tablename_split $tablename] set last_seg "" #toml spec rule - all segments mst be non-empty @@ -305,8 +291,8 @@ namespace eval tomlish { lappend tablenames_seen $tablename - puts ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - puts ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { @@ -329,6 +315,7 @@ namespace eval tomlish { #!todo. } ITABLE { + #SEP??? set datastructure [list] foreach element [lrange $item 1 end] { set type [lindex $element 0] @@ -350,16 +337,20 @@ namespace eval tomlish { ARRAY { #arrays in toml are allowed to contain mixtures of types set datastructure [list] - puts "--> processing array: $item" + log::debug "--> processing array: $item" foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - STRING - INT - FLOAT - BOOL - DATETIME { + INT - FLOAT - BOOL - DATETIME { set value [lindex $element 1] lappend datastructure [list type $type value $value] } - TABLE - ARRAY { + STRING { + set value [lindex $element 1] + lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] + } + TABLE - ARRAY - MULTISTRING { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } @@ -372,6 +363,89 @@ namespace eval tomlish { } } } + MULTISTRING { + #triple dquoted string + log::debug "--> processing multistring: $item" + set stringvalue "" + set idx 0 + set parts [lrange $item 1 end] + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + STRINGPART { + append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] + } + CONT { + #When the last non-whitespace character on a line is an unescaped backslash, + #it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter + # review - we allow some whitespace in stringpart elements - can a stringpart ever be all whitespace? + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + append stringvalue "\\" ;#add the sep + } else { + #skip over ws without emitting + set idx [llength $parts] + } + } else { + set parts_til_nl [lrange $parts 0 $next_nl-1] + set non_ws [lsearch -index 0 -start $idx+1 -not $parts_til_nl WS] + if {$non_ws >= 0} { + append stringvalue "\\" + } else { + #skip over ws on this line + set idx $next_nl + #then have to check each subsequent line until we get to first non-whitespace + set trimming 1 + while {$trimming && $idx < [llength $parts]} { + set next_nl [lsearch -index 0 -start $idx+1 $parts NEWLINE] + if {$next_nl == -1} { + #last line + set non_ws [lsearch -index 0 -start $idx+1 -not $parts WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + } else { + set idx [llength $parts] + } + set trimming 0 + } else { + set non_ws [lsearch -index 0 -start $idx+1 -not [lrange $parts 0 $next_nl-1] WS] + if {$non_ws >= 0} { + set idx [expr {$non_ws -1}] + set trimming 0 + } else { + set idx $next_nl + #keep trimming + } + } + } + } + } + } + NEWLINE { + #if newline is first element - it is not part of the data of a multistring + if {$idx > 0} { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + } + WS { + append stringvalue [lindex $element 1] + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } WS - COMMENT - NEWLINE { #ignore } @@ -415,14 +489,6 @@ namespace eval tomlish { } - #proc sample1 {p1 args} { - # #*** !doctools - # #[call [fun sample1] [arg p1] [opt {?option value...?}]] - # #[para]Description of sample1 - # return "ok" - #} - - #*** !doctools @@ -430,6 +496,7 @@ namespace eval tomlish { } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + namespace eval tomlish::encode { #*** !doctools #[subsection {Namespace tomlish::encode}] @@ -445,7 +512,7 @@ namespace eval tomlish::encode { proc int {i} { #whole numbers, may be prefixed with a + or - #Leading zeros are not allowed - #Hex,octal binary forms not allowed + #Hex,octal binary forms are allowed (toml 1.0) #We will error out on encountering commas, as commas are interpreted differently depending on locale (and don't seem to be supported in the toml spec anyway) #!todo - Tcl can handle bignums - bigger than a 64bit signed long as specified in toml. # - We should probably raise an error for number larger than this and suggest the user supply it as a string? @@ -465,6 +532,10 @@ namespace eval tomlish::encode { } proc float {f} { + #convert any non-lower case variants of special values to lowercase for Toml + if {[string tolower $f] in {nan +nan -nan inf +inf -inf}} { + return [list FLOAT [string tolower $f]] + } if {[::tomlish::utils::is_float $f]} { return [list FLOAT $f] } else { @@ -481,29 +552,15 @@ namespace eval tomlish::encode { } proc boolean {b} { - if {$b eq "0"} { - set b "false" - } - if {$b eq 1} { - set b "true" - } - set b [tcl::string::tolower $b] - if {$b in {yes y}} { - set b "true" - } - if {$b in {no n}} { - set b "false" - } - if {$b eq "t"} { - set b "true" - } - if {$b eq "f"} { - set b "false" - } - if {$b in {true false}} { - return [list BOOL $b] + #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false + if {![string is boolean -strict $b]} { + error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { - error "Unable to interpret '$b' as Toml boolean. [::tomlish::parse::report_line]" + if {[expr {$b && 1}]} { + return [list BOOL true] + } else { + return [list BOOL false] + } } } @@ -560,7 +617,7 @@ namespace eval tomlish::encode { } MULTISTRING { #explicitly list the valid child tags - if {$tag ni {STRING WS NEWLINE CONT}} { + if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { error "Invalid tag '$tag' encountered within a MULTISTRING" } } @@ -636,8 +693,11 @@ namespace eval tomlish::encode { # return \"[lindex $item 1]\" } + STRINGPART { + return [lindex $item 1] + } MULTISTRING { - #Double quoted string which is a container for newlines,whitespace and multiple strings + #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts set multistring "" ;#variable to build up the string foreach part [lrange $item 1 end] { append multistring [::tomlish::encode::tomlish [list $part] $nextcontext] @@ -691,7 +751,7 @@ namespace eval tomlish::encode { #(encode tomlish as toml) interp alias {} tomlish::to_toml {} tomlish::encode::tomlish - +# namespace eval tomlish::decode { @@ -793,7 +853,7 @@ namespace eval tomlish::decode { set prevstate $state ##### set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - puts "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" set state $nextstate if {$state eq "err"} { @@ -822,10 +882,14 @@ namespace eval tomlish::decode { } comma { #comma for inline table will pop the keyvalue space + lappend v($nest) "SEP" } endinlinetable { puts stderr "endinlinetable" } + endmultiquote { + puts stderr "endmultiquote for last_space_action 'pop'" + } default { error "unexpected tokenType '$tokenType' for last_space_action 'pop'" } @@ -842,7 +906,7 @@ namespace eval tomlish::decode { barekey { set v($nest) [list KEYVAL $tok] ;#$tok is the keyname } - quotedkey { + quotedkey - itablequotedkey { set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname } tablename { @@ -858,7 +922,7 @@ namespace eval tomlish::decode { # tomlish list? set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablename: '$test_only'" + ::tomlish::log::debug "trimmed (but not normalized) tablename: '$test_only'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) @@ -875,6 +939,14 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } + startmultiquote { + puts stderr "push trigger tokenType startmultiquote (todo)" + set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE + #JMN ??? + #set next_tokenType_known 1 + #::tomlish::parse::set_tokenType "multistring" + #set tok "" + } default { error "push trigger tokenType '$tokenType' not yet implemented" } @@ -900,46 +972,68 @@ namespace eval tomlish::decode { puts stderr "decode::toml error. did not expect startlinetable without space level change" } startquote { - if {$nextstate eq "string"} { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "string" - set tok "" - } elseif {$nextstate eq "quotedkey"} { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "quotedkey" - set tok "" - } else { - error "not implemented. startquote. nextstate: $nextstate" + switch -exact -- $nextstate { + string { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "string" + set tok "" + } + quotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "quotedkey" + set tok "" + } + itablequotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablequotedkey" + set tok "" + } + default { + error "startquote switch case not implemented for nextstate: $nextstate" + } } } startmultiquote { + #review + puts stderr "no space level change - got startmultiquote" set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "multistring" + ::tomlish::parse::set_tokenType "stringpart" set tok "" } endquote { #nothing to do? set tok "" } + endmultiquote { + #JMN!! + set tok "" + } string { lappend v($nest) [list STRING $tok] } + stringpart { + lappend v($nest) [list STRINGPART $tok] + } multistring { + #review lappend v($nest) [list MULTISTRING $tok] } quotedkey { #lappend v($nest) [list QKEY $tok] ;#TEST + } + itablequotedkey { + } untyped-value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { - set tag "BOOL" + set tag BOOL } elseif {[::tomlish::utils::is_int $tok]} { - set tag "INT" + set tag INT } elseif {[::tomlish::utils::is_float $tok]} { - set tag "FLOAT" + set tag FLOAT } elseif {[::tomlish::utils::is_datetime $tok]} { - set tag "DATETIME" + set tag DATETIME } else { error "Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line]" } @@ -954,7 +1048,7 @@ namespace eval tomlish::decode { lappend v($nest) = } comma { - lappend v($nest) "SEP" + lappend v($nest) SEP } newline { incr linenum @@ -963,8 +1057,11 @@ namespace eval tomlish::decode { whitespace { lappend v($nest) [list WS $tok] } + continuation { + lappend v($nest) CONT + } bom { - lappend v($nest) "BOM" + lappend v($nest) BOM } eof { #ok - nothing more to add to the tomlish list. @@ -1066,22 +1163,28 @@ namespace eval tomlish::utils { incr i if {$c eq "."} { - if {$mode eq "unquoted"} { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } elseif {$mode eq "quoted"} { - append seg $c - } elseif {$mode eq "unknown"} { - lappend segments $seg - set seg "" - } elseif {$mode eq "litquoted"} { - append seg $c - } else { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -exact -- $mode { + unquoted { + #dot marks end of segment. + lappend segments $seg + set seg "" + set mode "unknown" + } + quoted { + append seg $c + } + unknown { + lappend segments $seg + set seg "" + } + litquoted { + append seg $c + } + default { + #mode: syntax + #we got our dot. - the syntax mode is now satisfied. + set mode "unknown" + } } } elseif {($c eq "\"") && ($lastChar ne "\\")} { if {$mode eq "unknown"} { @@ -1139,7 +1242,7 @@ namespace eval tomlish::utils { } if {$i == $sLen} { #end of data - puts "End of data: mode='$mode'" + ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { quoted { if {$c ne "\""} { @@ -1255,7 +1358,7 @@ namespace eval tomlish::utils { } set c [::string index $str $i] - puts "unescape_string. got char $c" + ::tomlish::log::debug "unescape_string. got char $c" scan $c %c n if {($n <= 31) && ($n != 9) && ($n != 10) && ($n != 13)} { #we don't expect unescaped unicode characters from 0000 to 001F - @@ -1336,7 +1439,7 @@ namespace eval tomlish::utils { } } } - puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" + #puts stdout "EOF 4:$unicode4_active 8:$unicode8_active slash:$slash_active" if {$unicode4_active} { error "End of string reached before complete unicode escape sequence \uHHHH" } @@ -1408,7 +1511,7 @@ namespace eval tomlish::utils { } #test only that the characters in str are valid for the toml specified type 'integer'. - proc int_validchars {str} { + proc int_validchars1 {str} { set numchars [::string length $str] if {[regexp -all {[0-9\_\-\+]} $str] == $numchars} { return 1 @@ -1416,29 +1519,47 @@ namespace eval tomlish::utils { return 0 } } + #add support for hex,octal,binary 0x.. 0o.. 0b... + proc int_validchars {str} { + set numchars [::string length $str] + if {[regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] == $numchars} { + return 1 + } else { + return 0 + } + } proc is_int {str} { - set matches [regexp -all {[0-9\_\-\+]} $str] + set matches [regexp -all {[0-9\_xo\-\+A-Fa-f]} $str] - if {[::string length $str] == $matches} { + if {[tcl::string::length $str] == $matches} { #all characters in legal range - #check for leading zeroes + + # --------------------------------------- + #check for leading zeroes in non 0x 0b 0o #first strip any +, - or _ (just for this test) set check [::string map {+ "" - "" _ ""} $str] - if {([::string length $check] > 1) && ([::string range $check 0 0] eq "0")} { + if {([tcl::string::length $check] > 1) && ([tcl::string::index $check 0] eq "0") && ([tcl::string::index $check 1] ni {o x b})} { return 0 } + # --------------------------------------- + #check +,- only occur in the first position. if {[::string last - $str] > 0} { - return false + return 0 } if {[::string last + $str] > 0} { - return false + return 0 } - #!todo - check bounds - #even though Tcl can handle bignums, we won't accept anything outside of toml spec. + set numeric_value [::string map {_ ""} $str] ;#allow some earlier tcl versions which don't support underscores + #use Tcl's integer check to ensure we don't let things like 3e4 through - which is a float (would need to be 0x3e4 for hex) + if {![string is integer -strict $numeric_value]} { + return 0 + } + #!todo - check bounds only based on some config value + #even though Tcl can handle bignums, we won't accept anything outside of toml 1.0 minimum requirements. #presumably very large numbers would have to be supplied in a toml file as strings. - set numeric_value [::string map {_ ""} $str] + #Review - toml 1.0 only says that it must handle up to 2^63 - not that this is a max if {$numeric_value > $::tomlish::max_int} { return 0 } @@ -1458,12 +1579,21 @@ namespace eval tomlish::utils { if {[regexp -all {[eE0-9\_\-\+\.]} $str] == $numchars} { return 1 } else { - return 0 + #only allow lower case for these special values - as per Toml 1.0 spec + if {$str ni {inf +inf -inf nan +nan -nan}} { + return 0 + } else { + return 1 + } } } proc is_float {str} { set matches [regexp -all {[eE0-9\_\-\+\.]} $str] + #don't test for case variations - as Toml doesn't allow (whereas Tcl allows Inf NaN etc) + if {$str in {inf +inf -inf nan +nan -nan}} { + return 1 + } if {[::string length $str] == $matches} { #all characters in legal range @@ -1487,6 +1617,7 @@ namespace eval tomlish::utils { #strip underscores for tcl double check set check [::string map {_ ""} $str] + #string is double accepts inf nan +NaN etc. if {![::string is double $check]} { return 0 } @@ -1568,10 +1699,10 @@ namespace eval tomlish::parse { curly-space {\ whitespace "curly-space"\ newline "curly-space"\ - barekey {pushspace "commakeyval-space"}\ - quotedkey "commakeyval-space"\ - xuntyped-value {pushspace "commakeyval-space"}\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ endinlinetable "popspace"\ + startquote "itablequotedkey"\ comma "curly-space"\ eof "err"\ comment "err"\ @@ -1584,10 +1715,10 @@ namespace eval tomlish::parse { curly-syntax {\ whitespace "curly-syntax"\ newline "curly-syntax"\ - barekey {pushspace "commakeyval-space"}\ - quotedkey "commakeyval-space"\ - xuntyped-value {pushspace "commakeyval-space"}\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ endinlinetable "popspace"\ + startquote "itablequotedkey"\ comma "curly-space"\ eof "err"\ comment "err"\ @@ -1635,15 +1766,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ - commakeyval-syntax {whitespace "commakeyval-syntax" endquote "commakeyval-syntax" newline "err" equal "value-expected" eof "err"} + itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} + #dict set stateMatrix\ + # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} dict set stateMatrix\ - commakeytail {whitespace "commakeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} dict set stateMatrix\ - commakeyval-space {} + itablekeyval-space {} + dict set stateMatrix\ + itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" newline "err" equal "value-expected" eof "err"} + keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} dict set stateMatrix\ keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} dict set stateMatrix\ @@ -1660,7 +1795,7 @@ namespace eval tomlish::parse { dict set stateMatrix\ multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" newline "multistring-space" eof "err" endmultiquote ""} + multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} dict set stateMatrix\ tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} dict set stateMatrix\ @@ -1722,11 +1857,11 @@ namespace eval tomlish::parse { array-space array-syntax curly-space curly-syntax keyval-space keytail - commakeyval-space commakeytail + itablekeyval-space itablevaltail } variable spacePushTransitions { keyval-space keyval-syntax - commakeyval-space commakeyval-syntax + itablekeyval-space itablekeyval-syntax array-space array-space curly-space curly-space key-space tablename @@ -1752,7 +1887,7 @@ namespace eval tomlish::parse { if {[dict exists $::tomlish::parse::stateMatrix $currentstate $tokentype]} { set transition_to [dict get $::tomlish::parse::stateMatrix $currentstate $tokentype] - puts "getNextState tokentype:$tokentype , currentstate:$currentstate : transition_to = $transition_to" + ::tomlish::log::info "getNextState tokentype:$tokentype currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { popspace { spacestack pop @@ -1763,7 +1898,7 @@ namespace eval tomlish::parse { if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { set next [dict get $::tomlish::parse::spacePopTransitions $target] - puts "--->> pop transition to space $target redirected state to $next <<---" + ::tomlish::log::debug "--->> pop transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1772,11 +1907,11 @@ namespace eval tomlish::parse { samespace { #note the same data as popspace (spacePopTransitions) is used here. set parent [spacestack peek] - puts ">>>>>>>>> got parent $parent <<<<<" + ::tomlish::log::debug ">>>>>>>>> got parent $parent <<<<<" lassign $parent type target if {[dict exists $::tomlish::parse::spacePopTransitions $target]} { set next [dict get $::tomlish::parse::spacePopTransitions $target] - puts "--->> samespace transition to space $target redirected state to $next <<---" + ::tomlish::log::debug "--->> samespace transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1802,7 +1937,7 @@ namespace eval tomlish::parse { #set next [list pushspace [lindex $transition_to 1]] set nexttokentype ${tokentype}2 ;#fake token type e.g tablename2 or tablearrayname2 - puts "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" + ::tomlish::log::notice "REENTRANCY!!! calling getNextState $nexttokentype $tokentype" set result [::tomlish::parse::getNextState $nexttokentype $tokentype] } pushspace { @@ -1814,7 +1949,7 @@ namespace eval tomlish::parse { #puts $::tomlish::parse::spacePushTransitions if {[dict exists $::tomlish::parse::spacePushTransitions $target]} { set next [dict get $::tomlish::parse::spacePushTransitions $target] - puts "--->> push transition to space $target redirected state to $next <<---" + ::tomlish::log::info "--->> push transition to space $target redirected state to $next <<---" } else { set next $target } @@ -1941,6 +2076,7 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" + set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [string index $s [expr {$i - 1}]] @@ -1955,6 +2091,8 @@ namespace eval tomlish::parse { set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -1993,6 +2131,7 @@ namespace eval tomlish::parse { } } lc { + set multi_dquote "" ;#!! #test jmn2024 #left curly brace try { @@ -2001,7 +2140,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring { + string - stringpart { if {$slash_active} {append tok "\\"} append tok $c } @@ -2025,7 +2164,6 @@ namespace eval tomlish::parse { } } } else { - #$slash_active not relevant when no tokenType switch -exact -- $state { value-expected { #switch last key to tablename?? @@ -2033,6 +2171,14 @@ namespace eval tomlish::parse { set tok "\{" return 1 } + multistring-space { + set_tokenType "stringpart" + if {$slash_active} { + set tok "\\\{" + } else { + set tok "\{" + } + } key-space { #invalid - but allow parser statemachine to report it. ? set_tokenType "startinlinetable" @@ -2058,6 +2204,7 @@ namespace eval tomlish::parse { } rc { + set multi_dquote "" ;#!! #right curly brace try { if {[string length $tokenType]} { @@ -2065,7 +2212,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - comment { + string - stringpart - comment { if {$slash_active} {append tok "\\"} append tok $c } @@ -2083,7 +2230,7 @@ namespace eval tomlish::parse { dict set token_waiting tok "" return 1 } - commakeytail { + itablevaltail { } default { @@ -2133,13 +2280,16 @@ namespace eval tomlish::parse { set tok "\}" return 1 } - commakeytail { + itablevaltail { set_tokenType "endinlinetable" set tok "" #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 incr i -1 return 1 } + itablekeyval-syntax { + error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" + } default { #JMN2024b keytail? error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" @@ -2154,6 +2304,7 @@ namespace eval tomlish::parse { } lb { + set multi_dquote "" ;#!! #left square bracket try { if {[::string length $tokenType]} { @@ -2161,7 +2312,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring { + string - stringpart { if {$slash_active} {append tok "\\"} append tok $c } @@ -2218,30 +2369,37 @@ namespace eval tomlish::parse { } } rb { + set multi_dquote "" ;#!! #right square bracket try { if {[string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {$tokenType in {"string" "multistring" "comment"}} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType eq "tablename"} { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } elseif {$tokenType eq "tablearrayname"} { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" - return 1 - } else { - incr i -1 - return 1 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + string - stringpart - comment { + if {$slash_active} {append tok "\\"} + append tok $c + } + tablename { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } + tablearraynames { + if {$slash_active} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } } } else { #$slash_active not relevant when no tokenType @@ -2290,13 +2448,15 @@ namespace eval tomlish::parse { } } bsl { + set dquotes $multi_dquote + set multi_dquote "" ;#!! #backslash if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - litstring - multilitstring - comment - tablename - tablearrayname { + string - litstring - multilitstring - comment - tablename - tablearrayname { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2304,6 +2464,25 @@ namespace eval tomlish::parse { set slash_active 1 } } + stringpart { + if {$slash_active} { + #assert - quotes empty - or we wouldn't have slash_active + set slash_active 0 + append tok "\\\\" + } else { + append tok $dquotes + set slash_active 1 + } + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } barekey { error "Unexpected backslash during barekey. [tomlish::parse::report_line]" } @@ -2312,85 +2491,129 @@ namespace eval tomlish::parse { } } } else { - error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + if {$state eq "multistring-space"} { + set slash_active 1 + } else { + error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" + } } } dq { #double quote try { if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - set_tokenType "startmultiquote" - return 1 - } else { - error "unexpected token length in 'startquotesequence'" + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length in 'startquotesequence'" + } } - } elseif {$tokenType eq "endquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - set_tokenType "endmultiquote" - return 1 - } else { - error "unexpected token length in 'endquotesequence'" + endquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + set_tokenType "endmultiquote" + return 1 + } else { + error "unexpected token length in 'endquotesequence'" + } } - } elseif {$tokenType eq "string"} { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" - return 1 + string { + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } } - } elseif {$tokenType eq "multistring"} { - if {$slash_active} { - append tok "\\" + stringpart { + #sub element of multistring + if {$slash_active} { + append tok "\\" + append tok $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } + value-expected { + if {$multi_dquote eq "\"\""} { + dict set token_waiting type startmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + #end whitespace token and reprocess + incr i -1 + return 1 + #append multi_dquote "\"" + } + } + default { + dict set token_waiting type startquote + dict set token_waiting tok "\"" + return 1 + } + } + } + comment { + if {$slash_active} {append tok "\\"} append tok $c - } else { - incr i -1 - - if {$multi_endquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - return 1 + } + quotedkey - itablequotedkey { + if {$slash_active} { + append tok "\\" + append tok $c } else { - append multi_endquote "\"" + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 } } - } elseif {$tokenType eq "whitespace"} { - - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 - } elseif {$tokenType eq "comment"} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType eq "quotedkey"} { - if {$slash_active} { - append tok "\\" + tablename - tablearrayname { + if {$slash_active} {append tok "\\"} append tok $c - } else { - dict set token_waiting type endquote - dict set token_waiting tok "\"" + } + starttablename - starttablearrayname { + incr i -1 ;## return 1 } - } elseif {$tokenType in {"tablename" "tablearrayname"}} { - if {$slash_active} {append tok "\\"} - append tok $c - } elseif {$tokenType in {"starttablename" "starttablearrayname"}} { - incr i -1 ;## - return 1 - } else { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } } } else { #$slash_active not relevant when no tokenType @@ -2404,6 +2627,17 @@ namespace eval tomlish::parse { set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote set tok $c } + multistring-space { + #REVIEW + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } else { + append multi_dquote "\"" + } + } key-space { set tokenType startquote set tok $c @@ -2430,6 +2664,8 @@ namespace eval tomlish::parse { } } = { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -2438,10 +2674,13 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - multistring - comment - quotedkey { + string - comment - quotedkey { #for these tokenTypes an = is just data. append tok $c } + stringpart { + append tok $dquotes$c + } whitespace { dict set token_waiting type equal dict set token_waiting tok = @@ -2457,56 +2696,103 @@ namespace eval tomlish::parse { } } } else { - set_tokenType equal - set tok = - return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok ${dquotes}= + } + default { + set_tokenType equal + set tok = + return 1 + } + } } } cr { + set dquotes $multi_dquote + set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + stringpart { + append tok $dquotes$c + } + default { + #!todo - error out if cr inappropriate for tokenType + append tok $c + } } - #!todo - error out if cr inappropriate for tokenType - append tok $c } else { + #lf may be appended if next + #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) set_tokenType newline set tok cr } } lf { + set dquotes $multi_dquote + set multi_dquote "" ;#!! # \n newline - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {$tokenType eq "newline"} { - #this lf is the trailing part of a crlf - append tok lf + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + newline { + #this lf is the trailing part of a crlf + append tok lf + return 1 + } + stringpart { + if {$dquotes ne ""} { + append tok $dquotes + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + default { + #newline ends all other tokens. + #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) + #note for whitespace: + # we will use the convention that \n terminates the current whitespace even if whitespace follows + # ie whitespace is split into separate whitespace tokens at each newline + + #puts "-------------- newline lf during tokenType $tokenType" + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } + } + } else { + set had_slash $slash_active + set slash_active 0 + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 return 1 } else { - #newline ends all other tokens. - #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) - #note for whitespace: - # we will use the convention that \n terminates the current whitespace even if whitespace follows - # ie whitespace is split into separate whitespace tokens at each newline - - #puts "-------------- newline lf during tokenType $tokenType" - dict set token_waiting type newline - dict set token_waiting tok lf + set_tokenType newline + set tok lf return 1 } - } else { - set_tokenType newline - set tok lf - return 1 } } , { + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { @@ -2517,6 +2803,9 @@ namespace eval tomlish::parse { string - comment - quotedkey - tablename - tablearrayname { append tok $c } + stringpart { + append tok $dquotes$c + } default { dict set token_waiting type comma dict set token_waiting tok "," @@ -2524,12 +2813,25 @@ namespace eval tomlish::parse { } } } else { - set_tokenType comma - set tok "," - return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "," + } + multiliteral-space { + set_tokenType literalpart + set tok "," + } + default { + set_tokenType comma + set tok "," + return 1 + } + } } } . { + set multi_dquote "" ;#!! if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 if {[::string length $tokenType]} { @@ -2537,7 +2839,7 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - untyped-value { + string - stringpart - comment - quotedkey - untyped-value { append tok $c } baretablename - tablename - tablearrayname { @@ -2559,18 +2861,29 @@ namespace eval tomlish::parse { } } } else { - set_tokenType "untyped-value" - set tok $c - #set_tokenType period - #set tok "." - #return 1 + switch -exact -- $state { + multistring-space { + set_tokenType stringpart + set tok "." + } + multiliteral-space { + set_tokenType literalpart + set tok "." + } + default { + set_tokenType untyped-value + set tok "." + } + } } } " " { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { + set had_slash $slash_active + set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence @@ -2589,9 +2902,36 @@ namespace eval tomlish::parse { incr i -1 return 1 } - quotedkey - string - comment - whitespace { + comment { + if {$had_slash} { + append tok "\\" + } + append tok $c + } + quotedkey - string { + if {$had_slash} { + append tok "\\" + } + #if {$dquotes eq "\""} { + #} append tok $c } + whitespace { + append tok $c + } + stringpart { + if {$had_slash} { + #REVIEW + incr i -2 + return 1 + } else { + #split into STRINGPART aaa WS " " + #keeping WS separate allows easier processing of CONT stripping + append tok $dquotes + incr i -1 + return 1 + } + } starttablename { incr i -1 return 1 @@ -2610,66 +2950,129 @@ namespace eval tomlish::parse { } } } else { - if {$state in {"tablename" "tablearrayname"}} { - #tablename can have leading,trailing and interspersed whitespace! - #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - set tok $c - } else { - set_tokenType "whitespace" - append tok $c + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + if {$had_slash} { + set tok "\\$c" + } else { + set tok $c + } + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return + } + set_tokenType "whitespace" + append tok $c + } + } + default { + if {$had_slash} { + error "unexpected backslash [tomlish::parse::report_line]" + } + set_tokenType "whitespace" + append tok $c + } } } } tab { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { - if {$tokenType eq "startquotesequence"} { - _shortcircuit_startquotesequence - } elseif {($tokenType eq "barekey")} { - #whitespace is a terminator for bare keys - incr i -1 - #set token_waiting type whitespace - #set token_waiting tok $c - return 1 - } elseif {$tokenType eq "untyped-value"} { - #unquoted values (int,date,float etc) are terminated by whitespace - #dict set token_waiting type whitespace - #dict set token_waiting tok $c - incr i -1 - return 1 - } elseif {($tokenType eq "quotedkey")} { - append tok $c - } elseif {$tokenType eq "string"} { - append tok $c - } elseif {$tokenType eq "comment"} { - append tok $c - } elseif {$tokenType eq "whitespace"} { - append tok $c - } elseif {$tokenType eq "starttablename"} { - incr i -1 - return 1 - } elseif {$tokenType eq "starttablearrayname"} { - incr i -1 - return 1 - } elseif {$tokenType in {"tablename" "tablearrayname"}} { - #include whitespace in the tablename/tablearrayname - #Will need to be normalized upon interpreting the tomlish as a datastructure - append tok $c - } else { - error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + barekey { + #whitespace is a terminator for bare keys + incr i -1 + #set token_waiting type whitespace + #set token_waiting tok $c + return 1 + } + untyped_value { + #unquoted values (int,date,float etc) are terminated by whitespace + #dict set token_waiting type whitespace + #dict set token_waiting tok $c + incr i -1 + return 1 + } + quotedkey { + append tok $c + } + string - comment - whitespace { + append tok $c + } + stringpart { + append tok $dquotes$c + } + starttablename - starttablearrayname { + incr i -1 + return 1 + } + tablename - tablearraynames { + #include whitespace in the tablename/tablearrayname + #Will need to be normalized upon interpreting the tomlish as a datastructure + append tok $c + } + default { + error "Received whitespace tab during tokenType '$tokenType' [tomlish::parse::report_line]" + } } } else { - if {$state in {"tablename" "tablearrayname"}} { - #tablename can have leading,trailing and interspersed whitespace! - #These will not be treated as whitespace tokens, instead forming part of the name. - set_tokenType $state - set tok $c - } else { - set_tokenType "whitespace" - append tok $c + set had_slash $slash_active + if {$slash_active} { + set slash_active 0 + } + switch -exact -- $state { + tablename - tablearrayname { + #tablename can have leading,trailing and interspersed whitespace! + #These will not be treated as whitespace tokens, instead forming part of the name. + set_tokenType $state + set tok $c + } + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + set_tokenType stringpart + set tok $dquotes + incr i -1 + return 1 + } else { + set_tokenType whitespace + append tok $c + } + } + } + default { + set_tokenType "whitespace" + append tok $c + } } } } @@ -2680,14 +3083,19 @@ namespace eval tomlish::parse { return 1 } default { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 + set dquotes $multi_dquote + set multi_dquote "" ;#!! if {[::string length $tokenType]} { + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } + endquotesequence { + puts stderr "endquotesequence: $tok" + } whitespace { incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. return 1 @@ -2704,12 +3112,17 @@ namespace eval tomlish::parse { #allow statemachine to set context for subsequent chars return 1 } + stringpart { + append tok $dquotes$c + } default { #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } } else { + set had_slash $slash_active + set slash_active 0 switch -exact -- $state { key-space - curly-space - curly-syntax { #if no currently active token - assume another key value pair @@ -2720,6 +3133,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + multistring-space { + set_tokenType "stringpart" + if {$had_slash} { + #assert - we don't get had_slash and dquotes at same time + set tok \\$c + } else { + set tok $dquotes$c + } + } tablename { set_tokenType "tablename" set tok $c @@ -2765,7 +3187,7 @@ namespace eval tomlish::parse { dict set token_waiting tok "eof" return 1 } else { - puts "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" + ::tomlish::log::debug "No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]" set tokenType "eof" set tok "eof" }