From 99c93cac8da4aa578dde25344c0f38b375e58b3b Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Mon, 18 Mar 2024 22:24:13 +1100 Subject: [PATCH] update bootsupport modules --- src/bootsupport/include_modules.config | 1 + src/bootsupport/modules/overtype-1.6.0.tm | 3223 +++++++++++++++++ src/bootsupport/modules/punk/ansi-0.1.1.tm | 2293 ++++++++++-- src/bootsupport/modules/punk/args-0.1.0.tm | 2 +- src/bootsupport/modules/punk/cap-0.1.0.tm | 2 +- .../punk/cap/handlers/templates-0.1.0.tm | 4 +- src/bootsupport/modules/punk/char-0.1.0.tm | 35 +- src/bootsupport/modules/punk/console-0.1.1.tm | 128 +- .../modules/punk/fileline-0.1.0.tm | 8 +- src/bootsupport/modules/punk/lib-0.1.1.tm | 29 +- .../punk/mix/commandset/loadedlib-0.1.0.tm | 4 +- .../punk/mix/commandset/scriptwrap-0.1.0.tm | 984 ++--- .../templates/layouts/project/src/build.tcl | 6 + .../templates/layouts/project/src/make.tcl | 995 +++++ src/bootsupport/modules/punk/ns-0.1.0.tm | 76 +- src/bootsupport/modules/punkcheck-0.1.0.tm | 112 +- src/bootsupport/modules/uuid-1.0.7.tm | 245 ++ 17 files changed, 7349 insertions(+), 798 deletions(-) create mode 100644 src/bootsupport/modules/overtype-1.6.0.tm create mode 100644 src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl create mode 100644 src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl create mode 100644 src/bootsupport/modules/uuid-1.0.7.tm diff --git a/src/bootsupport/include_modules.config b/src/bootsupport/include_modules.config index 74638c9..328b407 100644 --- a/src/bootsupport/include_modules.config +++ b/src/bootsupport/include_modules.config @@ -11,6 +11,7 @@ set bootsupport_modules [list\ src/vendormodules textutil::tabify\ src/vendormodules textutil::split\ src/vendormodules textutil::wcswidth\ + src/vendormodules uuid\ modules punkcheck\ modules punk::ansi\ modules punk::args\ diff --git a/src/bootsupport/modules/overtype-1.6.0.tm b/src/bootsupport/modules/overtype-1.6.0.tm new file mode 100644 index 0000000..22f4d06 --- /dev/null +++ b/src/bootsupport/modules/overtype-1.6.0.tm @@ -0,0 +1,3223 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) Julian Noble 2003-2023 +# +# @@ Meta Begin +# Application overtype 1.6.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin overtype_module_overtype 0 1.6.0] +#[copyright "2024"] +#[titledesc {overtype text layout - ansi aware}] [comment {-- Name section and table of contents description --}] +#[moddesc {overtype text layout}] [comment {-- Description at end of page heading --}] +#[require overtype] +#[keywords module text ansi] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of overtype +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by overtype +#[list_begin itemized] + +package require Tcl 8.6 +package require textutil +package require punk::lib ;#required for lines_as_list +package require punk::ansi ;#required to detect, split, strip and calculate lengths +package require punk::char ;#box drawing - and also unicode character width determination for proper layout of text with double-column-width chars +package require punk::assertion +#*** !doctools +#[item] [package {Tcl 8.6}] +#[item] [package textutil] +#[item] [package punk::ansi] +#[para] - required to detect, split, strip and calculate lengths of text possibly containing ansi codes +#[item] [package punk::char] +#[para] - box drawing - and also unicode character width determination for proper layout of text with double-column-width chars + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section API] + + +#Julian Noble - 2003 +#Released under standard 'BSD license' conditions. +# +#todo - ellipsis truncation indicator for center,right + +#v1.4 2023-07 - naive ansi color handling - todo - fix string range +# - need to extract and replace ansi codes? + +namespace eval overtype { + namespace import ::punk::assertion::assert + punk::assertion::active true + + namespace export * + variable default_ellipsis_horizontal "..." ;#fallback + variable default_ellipsis_vertical "..." + namespace eval priv { + proc _init {} { + upvar ::overtype::default_ellipsis_horizontal e_h + upvar ::overtype::default_ellipsis_vertical e_v + set e_h [format %c 0x2026] ;#Unicode Horizontal Ellipsis + set e_v [format %c 0x22EE] + #The unicode ellipsis looks more natural than triple-dash which is centred vertically whereas ellipsis is at floorline of text + #Also - unicode ellipsis has semantic meaning that other processors can interpret + #unicode does also provide a midline horizontal ellipsis 0x22EF + + #set e [format %c 0x2504] ;#punk::char::charshort boxd_ltdshhz - Box Drawings Light Triple Dash Horizontal + #if {![catch {package require punk::char}]} { + # set e [punk::char::charshort boxd_ltdshhz] + #} + } + } + priv::_init +} +proc overtype::about {} { + return "Simple text formatting. Author JMN. BSD-License" +} + +namespace eval overtype { + variable grapheme_widths [dict create] + + variable escape_terminals + #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). + dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] + #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic + dict set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals + + #self-contained 2 byte ansi escape sequences - review more? + variable ansi_2byte_codes_dict + set ansi_2byte_codes_dict [dict create\ + "reset_terminal" "\u001bc"\ + "save_cursor_posn" "\u001b7"\ + "restore_cursor_posn" "\u001b8"\ + "cursor_up_one" "\u001bM"\ + "NEL - Next Line" "\u001bE"\ + "IND - Down one line" "\u001bD"\ + "HTS - Set Tab Stop" "\u001bH"\ + ] + + #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. + # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ +} + + +#proc overtype::stripansi {text} { +# variable escape_terminals ;#dict +# variable ansi_2byte_codes_dict +# #important that we don't spend too much time on this for plain text that doesn't contain any escapes anyway +# if {[string first \033 $text] <0 && [string first \009c $text] <0} { +# #\033 same as \x1b +# return $text +# } +# +# set text [convert_g0 $text] +# +# #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. +# #line endings can theoretically occur within an ansi escape sequence (review e.g title?) +# set inputlist [split $text ""] +# set outputlist [list] +# +# set 2bytecodes [dict values $ansi_2byte_codes_dict] +# +# set in_escapesequence 0 +# #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls +# set i 0 +# foreach u $inputlist { +# set v [lindex $inputlist $i+1] +# set uv ${u}${v} +# if {$in_escapesequence eq "2b"} { +# #2nd byte - done. +# set in_escapesequence 0 +# } elseif {$in_escapesequence != 0} { +# set escseq [dict get $escape_terminals $in_escapesequence] +# if {$u in $escseq} { +# set in_escapesequence 0 +# } elseif {$uv in $escseq} { +# set in_escapseequence 2b ;#flag next byte as last in sequence +# } +# } else { +# #handle both 7-bit and 8-bit CSI and OSC +# if {[regexp {^(?:\033\[|\u009b)} $uv]} { +# set in_escapesequence CSI +# } elseif {[regexp {^(?:\033\]|\u009c)} $uv]} { +# set in_escapesequence OSC +# } elseif {$uv in $2bytecodes} { +# #self-contained e.g terminal reset - don't pass through. +# set in_escapesequence 2b +# } else { +# lappend outputlist $u +# } +# } +# incr i +# } +# return [join $outputlist ""] +#} + + + + + +proc overtype::string_columns {text} { + if {[punk::ansi::ta::detect $text]} { + #error "error string_columns is for calculating character length of string - ansi codes must be stripped/rendered first e.g with punk::ansi::stripansi. Alternatively try punk::ansi::printing_length" + set text [punk::ansi::stripansi $text] + } + return [punk::char::ansifreestring_width $text] +} + +#todo - consider a way to merge overtype::left/centre/right +#These have similar algorithms/requirements - and should be refactored to be argument-wrappers over a function called something like overtype::renderblock +#overtype::renderblock could render the input to a defined (possibly overflowing in x or y) rectangle overlapping the underlay. +#(i.e not even necessariy having it's top left within the underlay) +namespace eval overtype::priv { +} + +#could return larger than colwidth +proc _get_row_append_column {row} { + upvar outputlines outputlines + set idx [expr {$row -1}] + if {$row <= 1 || $row > [llength $outputlines]} { + return 1 + } else { + upvar opt_overflow opt_overflow + upvar colwidth colwidth + set existinglen [punk::ansi::printing_length [lindex $outputlines $idx]] + set endpos [expr {$existinglen +1}] + if {$opt_overflow} { + return $endpos + } else { + if {$endpos > $colwidth} { + return $colwidth + 1 + } else { + return $endpos + } + } + } +} +#string range should generally be avoided for both undertext and overtext which contain ansi escapes and other cursor affecting chars such as \b and \r +#render onto an already-rendered (ansi already processed) 'underlay' string, a possibly ansi-laden 'overlay' string. +#The underlay and overlay can be multiline blocks of text of varying line lengths. +#The overlay may just be an ansi-colourised block - or may contain ansi cursor movements and cursor save/restore calls - in which case the apparent length and width of the overlay can't be determined as if it was a block of text. +#This is a single-shot rendering of strings - ie there is no way to chain another call containing a cursor-restore to previously rendered output and have it know about any cursor-saves in the first call. +# a cursor start position other than top-left is a possible addition to consider. +#see editbuf in punk::repl for a more stateful ansi-processor. Both systems use loops over overtype::renderline +proc overtype::left {args} { + #*** !doctools + #[call [fun overtype::left] [arg args] ] + #[para] usage: ?-transparent [0|1]? ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext + + # @c overtype starting at left (overstrike) + # @c can/should we use something like this?: 'format "%-*s" $len $overtext + variable default_ellipsis_horizontal + + if {[llength $args] < 2} { + error {usage: ?-transparent [0|1]? ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} + } + lassign [lrange $args end-1 end] underblock overblock + set defaults [dict create\ + -bias ignored\ + -width \uFFEF\ + -height \uFFeF\ + -wrap 0\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -appendlines 1\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -experimental 0\ + -looplimit 100000\ + ] + #-ellipsis args not used if -wrap is true + set argsflags [lrange $args 0 end-2] + dict for {k v} $argsflags { + switch -- $k { + -looplimit - -width - -height - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -appendlines - -transparent - -exposed1 - -exposed2 - -experimental {} + default { + set known_opts [dict keys $defaults] + error "overtype::left unknown option '$k'. Known options: $known_opts" + } + } + } + set opts [dict merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_overflow [dict get $opts -overflow] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_wrap [dict get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### + #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. + set opt_width [dict get $opts -width] + set opt_height [dict get $opts -height] + set opt_appendlines [dict get $opts -appendlines] + set opt_transparent [dict get $opts -transparent] + set opt_ellipsistext [dict get $opts -ellipsistext] + set opt_ellipsiswhitespace [dict get $opts -ellipsiswhitespace] + set opt_exposed1 [dict get $opts -exposed1] ;#widechar_exposed_left - todo + set opt_exposed2 [dict get $opts -exposed2] ;#widechar_exposed_right - todo + # -- --- --- --- --- --- + + #a hack until we work out how to avoid infinite loops... + # + set looplimit [dict get $opts -looplimit] + + # ---------------------------- + # -experimental dev flag to set flags etc + # ---------------------------- + set data_mode 0 + set test_mode 1 + set info_mode 0 + set edit_mode 0 + set opt_experimental [dict get $opts -experimental] + foreach o $opt_experimental { + switch -- $o { + test_mode { + set test_mode 1 + set info_mode 1 + } + old_mode { + set test_mode 0 + set info_mode 1 + } + data_mode { + set data_mode 1 + } + info_mode { + set info_mode 1 + } + edit_mode { + set edit_mode 1 + } + } + } + # ---------------------------- + + #modes + set insert_mode 0 ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l + set autowrap_mode $opt_wrap + set reverse_mode 0 + + + set norm [list \r\n \n] + set underblock [string map $norm $underblock] + set overblock [string map $norm $overblock] + + + #set underlines [split $underblock \n] + + #underblock is a 'rendered' block - so width height make sense + if {$opt_width eq "\uFFEF"} { + lassign [blocksize $underblock] _w colwidth _h colheight + } else { + set colwidth $opt_width + set colheight $opt_height + } + if {$underblock eq ""} { + set blank "\x1b\[0m\x1b\[0m" + #set underlines [list "\x1b\[0m\x1b\[0m"] + set underlines [lrepeat $colheight $blank] + } else { + set underlines [lines_as_list -ansiresets 1 $underblock] + } + + #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. + #In practice an overlay ANSI stream can be a single line with ansi moves/restores etc - or even have no moves or newlines, just relying on wrapping at the output colwidth + #In such cases - we process the whole shebazzle for the first output line - only reducing by the applied amount at the head each time, reprocessing the long tail each time. + #(in cases where there are interline moves or cursor jumps anyway) + #This works - but doesn't seem efficient. + #On the other hand.. maybe it depends on the data. For simpler files it's more efficient than splitting first + + if {!$test_mode} { + set inputchunks [split $overblock \n] + } else { + set scheme 3 + switch -- $scheme { + 0 { + #one big chunk + set inputchunks [list $overblock] + } + 1 { + set inputchunks [punk::ansi::ta::split_codes $overblock] + } + 2 { + + #split into lines if possible first - then into plaintext/ansi-sequence chunks ? + set inputchunks [list ""] ;#put an empty plaintext split in for starters + set i 1 + set lines [split $overblock \n] + foreach ln $lines { + if {$i < [llength $lines]} { + append ln \n + } + set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? + set lastpt [lindex $inputchunks end] + lset inputchunks end [string cat $lastpt [lindex $sequence_split 0]] + lappend inputchunks {*}[lrange $sequence_split 1 end] + incr i + } + } + 3 { + #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice + set lflines [list] + set inputchunks [split $overblock \n] + foreach ln $inputchunks { + append ln \n + lappend lflines $ln + } + if {[llength $lflines]} { + lset lflines end [string range [lindex $lflines end] 0 end-1] + } + set inputchunks $lflines[unset lflines] + + } + } + } + + + #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height + #lassign [blocksize $overblock] _w overblock_width _h overblock_height + + + set replay_codes_underlay [dict create 1 ""] + #lappend replay_codes_overlay "" + set replay_codes_overlay "" + set unapplied "" + set cursor_saved_position [dict create] + set cursor_saved_attributes "" + + + set outputlines $underlines + set overidx 0 + + #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext + set row 1 + if {$data_mode} { + set col [_get_row_append_column $row] + } else { + set col 1 + } + + set instruction_stats [dict create] + + set loop 0 + #while {$overidx < [llength $inputchunks]} { } + + while {[llength $inputchunks]} { + #set overtext [lindex $inputchunks $overidx]; lset inputchunks $overidx "" + set overtext [lpop inputchunks 0] + if {![string length $overtext]} { + continue + } + #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" + set undertext [lindex $outputlines [expr {$row -1}]] + set renderedrow $row + + #renderline pads each underaly line to width with spaces and should track where end of data is + + + #set overtext [string cat [lindex $replay_codes_overlay $overidx] $overtext] + set overtext [string cat $replay_codes_overlay $overtext] + if {[dict exists $replay_codes_underlay $row]} { + set undertext [string cat [dict get $replay_codes_underlay $row] $undertext] + } + #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - + #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l + #set LASTCALL [list -info 1 -insert_mode $insert_mode -autowrap_mode $autowrap_mode -transparent $opt_transparent -width $colwidth -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -cursor_column $col -cursor_row $row $undertext $overtext] + set rinfo [renderline -experimental $opt_experimental -info 1 -insert_mode $insert_mode -cursor_restore_attributes $cursor_saved_attributes -autowrap_mode $autowrap_mode -transparent $opt_transparent -width $colwidth -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -cursor_column $col -cursor_row $row $undertext $overtext] + set instruction [dict get $rinfo instruction] + set insert_mode [dict get $rinfo insert_mode] + set autowrap_mode [dict get $rinfo autowrap_mode] ;# + #set reverse_mode [dict get $rinfo reverse_mode];#how to support in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? + set rendered [dict get $rinfo result] + set overflow_right [dict get $rinfo overflow_right] + set overflow_right_column [dict get $rinfo overflow_right_column] + set unapplied [dict get $rinfo unapplied] + set post_render_col [dict get $rinfo cursor_column] + set post_render_row [dict get $rinfo cursor_row] + set c_saved_pos [dict get $rinfo cursor_saved_position] + set c_saved_attributes [dict get $rinfo cursor_saved_attributes] + set visualwidth [dict get $rinfo visualwidth] + set insert_lines_above [dict get $rinfo insert_lines_above] + set insert_lines_below [dict get $rinfo insert_lines_below] + dict set replay_codes_underlay [expr {$renderedrow+1}] [dict get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [dict get $rinfo replay_codes_overlay] + set replay_codes_overlay [dict get $rinfo replay_codes_overlay] + + + + #-- todo - detect looping properly + if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { + puts stderr "overtype::left loop?" + puts [ansistring VIEW $rinfo] + break + } + #-- + + if {[dict size $c_saved_pos] >= 1} { + set cursor_saved_position $c_saved_pos + set cursor_saved_attributes $c_saved_attributes + } + + #background line is narrower than data in line + + set overflow_handled 0 + if {!$opt_overflow && !$autowrap_mode} { + #not allowed to overflow column or wrap therefore we get overflow data to truncate + if {[dict get $opts -ellipsis]} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[string trim $lostdata] eq ""} { + set show_ellipsis 0 + } + #set lostdata [string range $overtext end-[expr {$overflowlength-1}] end] + if {[string trim [ansistrip $lostdata]] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + set overflow_handled 1 + } else { + #no wrap - no ellipsis - silently truncate + set overflow_handled 1 + } + } + + + + set nextprefix "" + + + #todo - handle potential insertion mode as above for cursor restore? + #keeping separate branches for debugging - review and merge as appropriate when stable + dict incr instruction_stats $instruction + switch -- $instruction { + {} { + if {$test_mode == 0} { + incr row + if {$data_mode} { + set col [_get_row_append_column $row] + if {$col > $colwidth} { + + } + } else { + set col 1 + } + } else { + #lf included in data + set row $post_render_row + set col $post_render_col + + #set col 1 + #if {$post_render_row != $renderedrow} { + # set col 1 + #} else { + # set col $post_render_col + #} + } + } + up { + + #renderline knows it's own line number, and knows not to go above row l + #it knows that a move whilst 1-beyond the width conflicts with the linefeed and reduces the move by one accordingly. + #row returned should be correct. + #column may be the overflow column - as it likes to report that to the caller. + + #Note that an ansi up sequence after last column going up to a previous line and also beyond the last column, will result in the next grapheme going onto the following line. + #this seems correct - as the column remains beyond the right margin so subsequent chars wrap (?) review + #puts stderr "up $post_render_row" + #puts stderr "$rinfo" + + #puts stdout "1 row:$row col $col" + set row $post_render_row + #data_mode (naming?) determines if we move to end of existing data or not. + #data_mode 0 means ignore existing line length and go to exact column + #set by -experimental flag + if {$data_mode == 0} { + set col $post_render_col + } else { + #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data + #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + + #puts stdout "2 row:$row col $col" + #puts stdout "-----------------------" + #puts stdout $rinfo + #flush stdout + } + down { + if {$data_mode == 0} { + #renderline doesn't know how far down we can go.. + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set row $post_render_row + set col $post_render_col + } else { + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + + } + } + restore_cursor { + #testfile belinda.ans uses this + + #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" + if {[dict exists $cursor_saved_position row]} { + set row [dict get $cursor_saved_position row] + set col [dict get $cursor_saved_position column] + #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" + #set nextprefix $cursor_saved_attributes + #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes + set replay_codes_overlay [dict get $rinfo replay_codes_overlay]$cursor_saved_attributes + #set replay_codes_overlay $cursor_saved_attributes + set cursor_saved_position [dict create] + set cursor_saved_attributes "" + } else { + #TODO + #?restore without save? + #should move to home position and reset ansi SGR? + #puts stderr "overtype::left cursor_restore without save data available" + } + #If we were inserting prior to hitting the cursor_restore - there could be overflow_right data - generally the overtype functions aren't for inserting - but ansi can enable it + #if we were already in overflow when cursor_restore was hit - it shouldn't have been processed as an action - just stored. + if {!$overflow_handled && $overflow_right ne ""} { + #wrap before restore? - possible effect on saved cursor position + #this overflow data has previously been rendered so has no cursor movements or further save/restore operations etc + #we can just insert another call to renderline to solve this.. ? + #It would perhaps be more properly handled as a queue of instructions from our initial renderline call + #we don't need to worry about overflow next call (?)- but we should carry forward our gx and ansi stacks + + puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" + + set sub_info [overtype::renderline -info 1 -width $colwidth -insert_mode $insert_mode -autowrap_mode $autowrap_mode -overflow [dict get $opts -overflow] "" $overflow_right] + set foldline [dict get $sub_info result] + set insert_mode [dict get $sub_info insert_mode] ;#probably not needed.. + set autowrap_mode [dict get $sub_info autowrap_mode] ;#nor this.. + linsert outputlines $renderedrow $foldline + #review - row & col set by restore - but not if there was no save.. + } + set overflow_handled 1 + + } + move { + ######## + if {$post_render_row > [llength $outputlines]} { + #Ansi moves need to create new lines ? + #if {$opt_appendlines} { + # set diff [expr {$post_render_row - [llength $outputlines]}] + # if {$diff > 0} { + # lappend outputlines {*}[lrepeat $diff ""] + # } + # set row $post_render_row + #} else { + set row [llength $outputlines] + #} + } else { + set row $post_render_row + } + ####### + set col $post_render_col + #overflow + unapplied? + } + lf_start { + #raw newlines - must be test_mode + # ---------------------- + #test with fruit.ans + #test - treating as newline below... + #append rendered $overflow_right + #set overflow_right "" + set row $renderedrow + incr row + if {$row > [llength $outputlines]} { + lappend outputlines "" + } + set col 1 + # ---------------------- + } + lf_mid { + + if 0 { + #set rhswidth [punk::ansi::printing_length $overflow_right] + #only show debug when we have overflow? + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + + set rhs "" + if {$overflow_right ne ""} { + set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $overflow_right]] + set rhs [textblock::frame -title overflow_right $rhs] + } + puts [textblock::join $lhs " $post_render_col " $rhs] + } + + if {!$test_mode} { + #rendered + append rendered $overflow_right + #set replay_codes_overlay "" + set overflow_right "" + + + set row $renderedrow + + set col 1 + incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + set edit_mode 0 + if {$edit_mode} { + set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] + set overflow_right "" + set unapplied "" + set row $post_render_row + #set col $post_render_col + set col 1 + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + append rendered $overflow_right + set overflow_right "" + set row $post_render_row + set col 1 + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } + } + } + lf_overflow { + #linefeed after colwidth e.g at column 81 for an 80 col width + #we may also have other control sequences that came after col 80 e.g cursor save + + if 0 { + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" + + #assertion - there should be no overflow.. + puts $lhs + } + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right + + set row $post_render_row + #set row $renderedrow + #incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + set col 1 + + } + newlines_above { + #we get a newlines_above instruction when received at column 1 + #In some cases we want to treat that as request to insert a new blank line above, and move our row 1 down (staying with the data) + #in other cases - we want to treat at column 1 the same as any other + + puts "--->newlines_above" + puts "rinfo: $rinfo" + #renderline doesn't advance the row for us - the caller has the choice to implement or not + set row $post_render_row + set col $post_render_col + if {$insert_lines_above > 0} { + set row $renderedrow + set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] + incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above + #? set row $post_render_row #can renderline tell us? + } + } + newlines_below { + #obsolete? - use for ANSI insert lines sequence + if {$data_mode == 0} { + puts --->nl_below + set row $post_render_row + set col $post_render_col + if {$insert_lines_below == 1} { + if {$test_mode == 0} { + set row $renderedrow + set outputlines [linsert $outputlines [expr {$renderedrow }] {*}[lrepeat $insert_lines_below ""]] ;#note - linsert can add to end too + incr row $insert_lines_below + set col 1 + } else { + #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] + #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] + #set rhs "" + #if {$overflow_right ne ""} { + # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] + # set rhs [textblock::frame -title overflow_right $rhs] + #} + #puts [textblock::join $lhs $rhs] + + #rendered + append rendered $overflow_right + # + + + set overflow_right "" + set row $renderedrow + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat $insert_lines_below ""] + } + incr row $insert_lines_below + set col 1 + + + + } + } + } else { + set row $post_render_row + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } else { + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + } + } + wrapmoveforward { + #doesn't seem to be used by fruit.ans testfile + #used by dzds.ans + #note that cursor_forward may move deep into the next line - or even span multiple lines !TODO + set c $colwidth + set r $post_render_row + if {$post_render_col > $colwidth} { + set i $c + while {$i <= $post_render_col} { + if {$c == $colwidth+1} { + incr r + if {$opt_appendlines} { + if {$r < [llength $outputlines]} { + lappend outputlines "" + } + } + set c 1 + } else { + incr c + } + incr i + } + set col $c + } else { + #why are we getting this instruction then? + puts stderr "wrapmoveforward - test" + set r [expr {$post_render_row +1}] + set c $post_render_col + } + set row $r + set col $c + } + wrapmovebackward { + set c $colwidth + set r $post_render_row + if {$post_render_col < 1} { + set c 1 + set i $c + while {$i >= $post_render_col} { + if {$c == 0} { + if {$r > 1} { + incr r -1 + set c $colwidth + } else { + #leave r at 1 set c 1 + #testfile besthpav.ans first line top left border alignment + set c 1 + break + } + } else { + incr c -1 + } + incr i -1 + } + set col $c + } else { + puts stderr "Wrapmovebackward - but postrendercol >= 1???" + } + set row $r + set col $c + } + overflow { + #normal single-width grapheme overflow + #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" + set row $post_render_row ;#renderline will not advance row when reporting overflow char + incr row + set col 1 ;#whether wrap or not - next data is at column 1 + if {!$autowrap_mode} { + set overflow_handled 1 + set unapplied "" + #handled by dropping it + } + } + overflow_splitchar { + #2nd half of grapheme would overflow - grapheme returned in unapplied. There may also be overflow_right from earlier inserts + #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc + incr row + if {$autowrap_mode} { + set col 1 + } else { + set overflow_handled 1 + #handled by dropping it + } + } + vt { + + #can vt add a line like a linefeed can? + set row $post_render_row + set col $post_render_col + } + default { + puts stderr "overtype::left unhandled renderline instruction '$instruction'" + } + + } + + if {$renderedrow <= [llength $outputlines]} { + lset outputlines [expr {$renderedrow-1}] $rendered + } else { + if {$opt_appendlines} { + lappend outputlines $rendered + } else { + #? + lset outputlines [expr {$renderedrow-1}] $rendered + } + } + + if {!$overflow_handled} { + append nextprefix $overflow_right + } + + append nextprefix $unapplied + + if 0 { + if {$nextprefix ne ""} { + set nextoveridx [expr {$overidx+1}] + if {$nextoveridx >= [llength $inputchunks]} { + lappend inputchunks $nextprefix + } else { + #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] + set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] + } + } + } + + if {$nextprefix ne ""} { + set inputchunks [linsert $inputchunks 0 $nextprefix] + } + + + incr overidx + incr loop + if {$loop >= $looplimit} { + puts stderr "overtype::left looplimit reached" + lappend outputlines "[a+ red bold] - looplimit $looplimit reached[a]" + break + } + } + + set result [join $outputlines \n] + if {$info_mode} { + append result \n$instruction_stats\n + } + return $result +} + +namespace eval overtype::piper { + proc overcentre {args} { + if {[llength $args] < 2} { + error {usage: ?-bias left|right? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::centre {*}$argsflags $under $over + } + proc overleft {args} { + if {[llength $args] < 2} { + error {usage: ?-startcolumn ? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::left {*}$argsflags $under $over + } +} +#todo - left-right ellipsis ? +proc overtype::centre {args} { + variable default_ellipsis_horizontal + if {[llength $args] < 2} { + error {usage: ?-transparent [0|1]? ?-bias [left|right]? ?-overflow [1|0]? undertext overtext} + } + + foreach {underblock overblock} [lrange $args end-1 end] break + + #todo - vertical vs horizontal overflow for blocks + set defaults [dict create\ + -bias left\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + ] + set argsflags [lrange $args 0 end-2] + dict for {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 {} + default { + set known_opts [dict keys $defaults] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + set opts [dict merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [dict get $opts -transparent] + set opt_ellipsis [dict get $opts -ellipsis] + set opt_ellipsistext [dict get $opts -ellipsistext] + set opt_ellipsiswhitespace [dict get $opts -ellipsiswhitespace] + set opt_exposed1 [dict get $opts -exposed1] + set opt_exposed2 [dict get $opts -exposed2] + # -- --- --- --- --- --- + + + set norm [list \r\n \n] + set underblock [string map $norm $underblock] + set overblock [string map $norm $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {$colwidth - $overblock_width}] + if {$under_exposed_max > 0} { + #background block is wider + if {$under_exposed_max % 2 == 0} { + #even left/right exposure + set left_exposed [expr {$under_exposed_max / 2}] + } else { + set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division + if {[string tolower [dict get $opts -bias]] eq "left"} { + set left_exposed $beforehalf + } else { + #bias to the right + set left_exposed [expr {$beforehalf + 1}] + } + } + } else { + set left_exposed 0 + } + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + set undertext "$undertext[string repeat { } $udiff]" + } + set undertext [string cat $replay_codes_underlay $undertext] + set overtext [string cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + #review - right-to-left langs should elide on left! - extra option required + + if {$overflowlength > 0} { + #overlay line wider or equal + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow [dict get $opts -overflow] -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rendered [dict get $rinfo result] + set overflow_right [dict get $rinfo overflow_right] + set unapplied [dict get $rinfo unapplied] + #todo - get replay_codes from overflow_right instead of wherever it was truncated? + + #overlay line data is wider - trim if overflow not specified in opts - and overtype an ellipsis at right if it was specified + if {![dict get $opts -overflow]} { + #lappend outputlines [string range $overtext 0 [expr {$colwidth - 1}]] + #set overtext [string range $overtext 0 $colwidth-1 ] + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + #don't use string range on ANSI data + #set lostdata [string range $overtext end-[expr {$overflowlength-1}] end] + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[string trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + } + } + lappend outputlines $rendered + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent $undertext $overtext] + } else { + #background block is wider than or equal to data for this line + #lappend outputlines [renderline -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rinfo [renderline -info 1 -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + lappend outputlines [dict get $rinfo result] + } + set replay_codes_underlay [dict get $rinfo replay_codes_underlay] + set replay_codes_overlay [dict get $rinfo replay_codes_overlay] + } + return [join $outputlines \n] +} + +proc overtype::right {args} { + #NOT the same as align-right - which should be done to the overblock first if required + variable default_ellipsis_horizontal + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? ?-transparent 0|? undertext overtext} + } + foreach {underblock overblock} [lrange $args end-1 end] break + + set defaults [dict create\ + -bias ignored\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -align "left"\ + ] + set argsflags [lrange $args 0 end-2] + dict for {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -align {} + default { + set known_opts [dict keys $defaults] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + set opts [dict merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [dict get $opts -transparent] + set opt_ellipsis [dict get $opts -ellipsis] + set opt_ellipsistext [dict get $opts -ellipsistext] + set opt_ellipsiswhitespace [dict get $opts -ellipsiswhitespace] + set opt_overflow [dict get $opts -overflow] + set opt_exposed1 [dict get $opts -exposed1] + set opt_exposed2 [dict get $opts -exposed2] + set opt_align [dict get $opts -align] + # -- --- --- --- --- --- + + set norm [list \r\n \n] + set underblock [string map $norm $underblock] + set overblock [string map $norm $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] + set left_exposed $under_exposed_max + + + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + #puts xxx + append undertext [string repeat { } $udiff] + } + if {$overtext_datalen < $overblock_width} { + set odiff [expr {$overblock_width - $overtext_datalen}] + switch -- $opt_align { + left { + set startoffset 0 + } + right { + set startoffset $odiff + } + default { + set half [expr {$odiff / 2}] + #set lhs [string repeat { } $half] + #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left + #set rhs [string repeat { } $righthalf] + set startoffset $half + } + } + } else { + set startoffset 0 ;#negative? + } + + set undertext [string cat $replay_codes_underlay $undertext] + set overtext [string cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + if {$overflowlength > 0} { + #raw overtext wider than undertext column + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] + set replay_codes [dict get $rinfo replay_codes] + set rendered [dict get $rinfo result] + if {!$opt_overflow} { + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata [string range $overtext end-[expr {$overflowlength-1}] end] + if {[string trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set ellipsis [string cat $replay_codes $opt_ellipsistext] + #todo - overflow on left if allign = right?? + set rendered [overtype::right $rendered $ellipsis] + } + } + } + lappend outputlines $rendered + } else { + #padded overtext + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] + #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] + lappend outputlines [dict get $rinfo result] + } + set replay_codes [dict get $rinfo replay_codes] + set replay_codes_underlay [dict get $rinfo replay_codes_underlay] + set replay_codes_overlay [dict get $rinfo replay_codes_overlay] + } + + return [join $outputlines \n] +} + +# -- --- --- --- --- --- --- --- --- --- --- +proc overtype::transparentline {args} { + foreach {under over} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + set defaults [dict create\ + -transparent 1\ + -exposed 1 " "\ + -exposed 2 " "\ + ] + set newargs [dict merge $defaults $argsflags] + tailcall overtype::renderline {*}$newargs $under $over +} +#renderline may not make sense as it is in the long run for blocks of text - but is handy in the single-line-handling form anyway. +# We are trying to handle ansi codes in a block of text which is acting like a mini-terminal in some sense. +#We can process standard cursor moves such as \b \r - but no way to respond to other cursor movements e.g moving to other lines. +# +namespace eval overtype::piper { + proc renderline {args} { + if {[llength $args] < 2} { + error {usage: ?-start ? ?-transparent [0|1|]? ?-overflow [1|0]? overtext pipelinedata} + } + foreach {over under} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + tailcall overtype::renderline {*}$argsflags $under $over + } +} +interp alias "" piper_renderline "" overtype::piper::renderline + +#intended for single grapheme - but will work for multiple +#cannot contain ansi or newlines +#(a cache of ansifreestring_width calls - as these are quite regex heavy) +proc overtype::grapheme_width_cached {ch} { + variable grapheme_widths + if {[dict exists $grapheme_widths $ch]} { + return [dict get $grapheme_widths $ch] + } + set width [punk::char::ansifreestring_width $ch] + dict set grapheme_widths $ch $width + return $width +} + + + +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### +# renderline written from a left-right line orientation perspective as a first-shot at getting something useful. +# ultimately right-to-left, top-to-bottom and bottom-to-top are probably needed. +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### +# +# +#-returnextra enables returning of overflow and length +#review - use punk::ansi::ta::detect to short-circuit processing and do simpler string calcs as an optimisation? +#review - DECSWL/DECDWL double width line codes - very difficult/impossible to align and compose with other elements +#(could render it by faking it with sixels and a lot of work - find/make a sixel font and ensure it's exactly 2 cols per char) +#todo - review transparency issues with single/double width characters +#bidi - need a base direction and concept of directional runs for RTL vs LTR - may be best handled at another layer? +proc overtype::renderline {args} { + #*** !doctools + #[call [fun overtype::renderline] [arg args] ] + #[para] renderline is the core engine for overtype string processing (frames & textblocks), and the raw mode commandline repl for the Tcl Punk Shell + #[para] It is also a central part of an ansi (micro) virtual terminal-emulator of sorts + #[para] This system does a half decent job at rendering 90's ANSI art to manipulable colour text blocks that can be joined & framed for layout display within a unix or windows terminal + #[para] Renderline helps maintain ANSI text styling reset/replay codes so that the styling of one block doesn't affect another. + #[para] Calling on the punk::ansi library - it can coalesce codes to keep the size down. + #[para] It is a giant mess of doing exactly what common wisdom says not to do... lots at once. + #[para] renderline is part of the Unicode and ANSI aware Overtype system which 'renders' a block of text onto a static underlay + #[para] The underlay is generally expected to be an ordered set of lines or a rectangular text block analogous to a terminal screen - but it can also be ragged in line length, or just blank. + #[para] The overlay couuld be similar - in which case it may often be used to overwrite a column or section of the underlay. + #[para] The overlay could however be a sequence of ANSI-laden text that jumps all over the place. + # + #[para] renderline itself only deals with a single line - or sometimes a single character. It is generally called from a loop that does further terminal-like or textblock processing. + #[para] By suppyling the -info 1 option - it can return various fields indicating the state of the render. + #[para] The main 3 are the result, overflow_right, and unapplied. + #[para] Renderline handles cursor movements from either keystrokes or ANSI sequences but for a full system the aforementioned loop will need to be in place to manage the set of lines under manipulation. + + if {[llength $args] < 2} { + error {usage: ?-info 0|1? ?-startcolumn ? ?-cursor_column ? ?-cursor_row |""? ?-transparent [0|1|]? ?-overflow [1|0]? undertext overtext} + } + lassign [lrange $args end-1 end] under over + if {[string first \n $under] >= 0} { + error "overtype::renderline not allowed to contain newlines in undertext" + } + #if {[string first \n $over] >=0 || [string first \n $under] >= 0} { + # error "overtype::renderline not allowed to contain newlines" + #} + + set defaults [dict create\ + -etabs 0\ + -width \uFFEF\ + -overflow 0\ + -transparent 0\ + -startcolumn 1\ + -cursor_column 1\ + -cursor_row ""\ + -insert_mode 1\ + -autowrap_mode 1\ + -reverse_mode 0\ + -info 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -cursor_restore_attributes ""\ + -experimental {}\ + ] + #-cursor_restore_attributes only - for replay stack - position and actual setting/restoring handled by throwback to caller + + #cursor_row, when numeric will allow detection of certain row moves that are still within our row - allowing us to avoid an early return + #An empty string for cursor_row tells us we have no info about our own row context, and to return with an unapplied string if any row move occurs + + #exposed1 and exposed2 for first and second col of underying 2wide char which is truncated by transparency or overflow + #todo - return info about such grapheme 'cuts' in -info structure and/or create option to raise an error + + set argsflags [lrange $args 0 end-2] + dict for {k v} $argsflags { + switch -- $k { + -experimental - -width - -overflow - -transparent - -startcolumn - -cursor_column - -cursor_row - -insert_mode - -autowrap_mode - -reverse_mode - -info - -exposed1 - -exposed2 - -cursor_restore_attributes {} + default { + set known_opts [dict keys $defaults] + error "overtype::renderline unknown option '$k'. Known options: $known_opts" + } + } + } + set opts [dict merge $defaults $argsflags] + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_width [dict get $opts -width] + set opt_etabs [dict get $opts -etabs] + set opt_overflow [dict get $opts -overflow] + set opt_colstart [dict get $opts -startcolumn] ;#lhs limit for overlay - an offset to cursor_column - first visible column is 1. 0 or < 0 are before the start of the underlay + set opt_colcursor [dict get $opts -cursor_column];#start cursor column relative to overlay + set opt_row_context [dict get $opts -cursor_row] + if {[string length $opt_row_context]} { + if {![string is integer -strict $opt_row_context] || $opt_row_context <1 } { + error "overtype::renderline -cursor_row must be empty for unspecified/unknown or a non-zero positive integer. received: '$opt_row_context'" + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + #The _mode flags correspond to terminal modes that can be set/reset via escape sequences (e.g DECAWM wraparound mode) + set opt_insert_mode [dict get $opts -insert_mode];#should usually be 1 for each new line in editor mode but must be initialised to 1 externally (review) + #default is for overtype + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_autowrap_mode [dict get $opts -autowrap_mode] ;#DECAWM - char or movement can go beyond leftmost/rightmost col to prev/next line + set opt_reverse_mode [dict get $opts -reverse_mode] ;#DECSNM + # -- --- --- --- --- --- --- --- --- --- --- --- + set temp_cursor_saved [dict get $opts -cursor_restore_attributes] + + set test_mode 0 + set cp437_glyphs 0 + foreach e [dict get $opts -experimental] { + switch -- $e { + test_mode { + set test_mode 1 + set cp437_glyphs 1 + } + } + } + set cp437_map [dict create] + if {$cp437_glyphs} { + set cp437_map [set ::punk::ansi::cp437_map] + #for cp437 images we need to map these *after* splitting ansi + #some old files might use newline for its glyph.. but we can't easily support that. + #Not sure how old files did it.. maybe cr lf in sequence was newline and any lone cr or lf were displayed as glyphs? + dict unset cp437_map \n + } + + set opt_transparent [dict get $opts -transparent] + if {$opt_transparent eq "0"} { + set do_transparency 0 + } else { + set do_transparency 1 + if {$opt_transparent eq "1"} { + set opt_transparent {[\s]} + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_returnextra [dict get $opts -info] + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_exposed1 [dict get $opts -exposed1] + set opt_exposed2 [dict get $opts -exposed2] + # -- --- --- --- --- --- --- --- --- --- --- --- + + if {$opt_row_context eq ""} { + set cursor_row 1 + } else { + set cursor_row $opt_row_context + } + + + #----- + # + if {[info exists punk::console::tabwidth]} { + #punk console is updated if punk::console::set_tabstop_width is used or rep is started/restarted + #It is way too slow to test the current width by querying the terminal here - so it could conceivably get out of sync + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + + set overdata $over + if {!$cp437_glyphs} { + #REVIEW! tabify will give different answers for an ANSI colourised string vs plain text + if {!$opt_etabs} { + if {[string first \t $under] >= 0} { + #set under [textutil::tabify::untabify2 $under] + set under [textutil::tabify::untabifyLine $under $tw] + } + if {[string first \t $over] >= 0} { + #set overdata [textutil::tabify::untabify2 $over] + set overdata [textutil::tabify::untabifyLine $over $tw] + } + } + } + #------- + + #ta_detect ansi and do simpler processing? + + #we repeat tests for grapheme width in different loops - rather than create another datastructure to store widths based on column, + #we'll use the grapheme_width_cached function as a lookup table of all graphemes encountered - as there will often be repeats in different positions anyway. + + # -- --- --- --- --- --- --- --- + if {$under ne ""} { + set undermap [punk::ansi::ta::split_codes_single $under] + } else { + set undermap [list] + } + set understacks [list] + set understacks_gx [list] + + set i_u -1 ;#underlay may legitimately be empty + set undercols [list] + set u_codestack [list] + #u_gx_stack probably isn't really a stack - I don't know if g0 g1 can stack or not - for now we support only g0 anyway + set u_gx_stack [list] ;#separate stack for g0 (g1 g2 g3?) graphics on and off (DEC special graphics) + #set pt_underchars "" ;#for string_columns length calculation for overflow 0 truncation + set remainder [list] ;#for returnextra + foreach {pt code} $undermap { + #pt = plain text + #append pt_underchars $pt + if {$cp437_glyphs} { + set pt [string map $cp437_map $pt] + } + foreach grapheme [punk::char::grapheme_split $pt] { + #an ugly hack to serve *some* common case ascii quickly with byte-compiled literal switch - feels dirty. + #.. but even 0.5uS per char (grapheme_width_cached) adds up quickly when stitching lots of lines together. + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + set width 1 + } + default { + if {$grapheme eq "\u0000"} { + #use null as empty cell representation - review + #use of this will probably collide with some application at some point + #consider an option to set the empty cell character + set width 1 + } else { + set width [grapheme_width_cached $grapheme] + #we still want most controls and other zero-length codepoints such as \u200d (zero width joiner) to stay zero-length + #we substitute lone ESC that weren't captured within ANSI context as a debugging aid to see malformed ANSI + #todo - default to off and add a flag (?) to enable this substitution + if {[$width == 0]} { + if {$grapheme eq "\x1b"} { + set gvis [ansistring VIEW $grapheme] + set grapheme $gvis + set width 1 + } + } + } + } + } + #set width [grapheme_width_cached $grapheme] + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + + lappend undercols $grapheme + if {$width > 1} { + #presumably there are no triple-column or wider unicode chars.. until the aliens arrive.(?) + #but what about emoji combinations etc - can they be wider than 2? + #todo - if -etabs enabled - then we treat \t as the width determined by our elastic tabstop + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + lappend undercols "" + } + } + + #underlay should already have been rendered and not have non-sgr codes - but let's retain the check for them and not stack them if other codes are here + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + if {$code ne ""} { + set c1c2 [string range $code 0 1] + set leadernorm [string range [string map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\( 7GFX\ + ] $c1c2] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + switch -- $leadernorm { + 7CSI - 8CSI { + if {[string index $code end] eq "m"} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set u_codestack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set u_codestack [list $code] + } else { + #basic simplification first.. straight dups + set dup_posns [lsearch -all -exact $u_codestack $code] ;#-exact because of square-bracket glob chars + set u_codestack [lremove $u_codestack {*}$dup_posns] + lappend u_codestack $code + } + } + } + 7GFX { + switch -- [string index $code 2] { + "0" { + set u_gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess + } + B { + set u_gx_stack [list] + } + } + } + default { + + } + + } + + #if {[punk::ansi::codetype::is_sgr_reset $code]} { + # #set u_codestack [list] + #} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + #} elseif {[punk::ansi::codetype::is_sgr $code]} { + #} else { + # #leave SGR stack as is + # if {[punk::ansi::codetype::is_gx_open $code]} { + # } elseif {[punk::ansi::codetype::is_gx_close $code]} { + # } + #} + } + #consider also if there are other codes that should be stacked..? + } + + if {!$test_mode} { + #fill columns to width with spaces, and carry over stacks - we will have to keep track of where the underlying data ends manually - TODO + #Specifying a width is suitable for terminal-like applications and text-blocks + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff " "] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + } else { + #NULL empty cell indicator + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff "\u0000"] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + + } + if {$opt_width ne "\uFFEF"} { + set colwidth $opt_width + } else { + set colwidth [llength $undercols] + } + + + if 0 { + # ----------------- + # if we aren't extending understacks & understacks_gx each time we incr idx above the undercols length.. this doesn't really serve a purpose + # Review. + # ----------------- + #replay code for last overlay position in input line + # whether or not we get that far - we need to return it for possible replay on next line + if {[llength $understacks]} { + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + } else { + #in case overlay onto emptystring as underlay + lappend understacks [list] + lappend understacks_gx [list] + } + # ----------------- + } + + #trailing codes in effect for underlay + if {[llength $u_codestack]} { + #set replay_codes_underlay [join $u_codestack ""] + set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] + } else { + set replay_codes_underlay "" + } + + + # -- --- --- --- --- --- --- --- + #### + #if opt_colstart - we need to build a space (or any singlewidth char ?) padding on the left of the right number of columns. + #this will be processed as transparent - and handle doublewidth underlay characters appropriately + set startpad_overlay [string repeat " " [expr {$opt_colstart -1}]] + append startpad_overlay $overdata ;#overdata with left padding spaces based on col-start under will show through for left-padding portion regardless of -transparency + set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] + #### + + #??? + set colcursor $opt_colstart + #TODO - make a little virtual column object + #we need to refer to column1 or columnmin? or columnmax without calculating offsets due to to startcolumn + #need to lock-down what start column means from perspective of ANSI codes moving around - the offset perspective is unclear and a mess. + + + #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + #as at 2024-02 punk::char::grapheme_split uses these - not aware of more complex graphemes + + set overstacks [list] + set overstacks_gx [list] + + set o_codestack [list]; #SGR codestack (not other codes such as movement,insert key etc) + set o_gxstack [list] + set pt_overchars "" + set i_o 0 + set overlay_grapheme_control_list [list] ;#tag each with g, sgr or other. 'other' are things like cursor-movement or insert-mode or codes we don't recognise/use + #experiment + set overlay_grapheme_control_stacks [list] + foreach {pt code} $overmap { + if {$cp437_glyphs} { + set pt [string map $cp437_map $pt] + } + append pt_overchars $pt + #will get empty pt between adjacent codes + foreach grapheme [punk::char::grapheme_split $pt] { + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + incr i_o + lappend overlay_grapheme_control_list [list g $grapheme] + lappend overlay_grapheme_control_stacks $o_codestack + } + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + #order of if-else based on assumptions: + # that pure resets are fairly common - more so than leading resets with other info + # that non-sgr codes are not that common, so ok to check for resets before verifying it is actually SGR at all. + if {$code ne ""} { + lappend overlay_grapheme_control_stacks $o_codestack + #there will always be an empty code at end due to foreach on 2 vars with odd-sized list ending with pt (overmap coming from perlish split) + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set o_codestack [list "\x1b\[m"] ;#reset better than empty list - fixes some ansi art issues + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set o_codestack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $o_codestack $code] ;#must be -exact because of square-bracket glob chars + set o_codestack [lremove $o_codestack {*}$dup_posns] + lappend o_codestack $code + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[regexp {\x1b7|\x1b\[s} $code]} { + #experiment + #cursor_save - for the replays review. + #jmn + #set temp_cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + lappend overlay_grapheme_control_list [list other $code] + } elseif {[regexp {\x1b8|\x1b\[u} $code]} { + #experiment + #cursor_restore - for the replays + set o_codestack [list $temp_cursor_saved] + lappend overlay_grapheme_control_list [list other $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set o_gxstack [list "gx0_on"] + lappend overlay_grapheme_control_list [list gx0 gx0_on] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set o_gxstack [list] + lappend overlay_grapheme_control_list [list gx0 gx0_off] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend overlay_grapheme_control_list [list other $code] + } + } + } + + } + #replay code for last overlay position in input line - should take account of possible trailing sgr code after last grapheme + set max_overlay_grapheme_index [expr {$i_o -1}] + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + + #set replay_codes_overlay [join $o_codestack ""] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + + #if {[dict exists $overstacks $max_overlay_grapheme_index]} { + # set replay_codes_overlay [join [dict get $overstacks $max_overlay_grapheme_index] ""] + #} else { + # set replay_codes_overlay "" + #} + # -- --- --- --- --- --- --- --- + + + #potential problem - combinining diacritics directly following control chars like \r \b + + # -- --- --- + #we need to initialise overflow_idx before any potential row-movements - as they need to perform a loop break and force in_excess to 1 + if {$opt_overflow} { + #somewhat counterintuitively - overflow true means we can have lines as long as we want, but either way there can be excess data that needs to be thrown back to the calling loop. + set overflow_idx -1 + } else { + #overflow zero - we can't grow beyond our column width - so we get ellipsis or truncation + if {$opt_width ne "\uFFEF"} { + set overflow_idx [expr {$opt_width}] + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + } + # -- --- --- + + set outcols $undercols ;#leave undercols as is, outcols can potentially be appended to. + + set unapplied "" ;#if we break for move row (but not for /v ?) + set insert_lines_above 0 ;#return key + set insert_lines_below 0 + set instruction "" + + # -- --- --- + #cursor_save_dec, cursor_restore_dec etc + set cursor_restore_required 0 + set cursor_saved_attributes "" + set cursor_saved_position "" + # -- --- --- + + #set idx 0 ;# line index (cursor - 1) + #set idx [expr {$opt_colstart + $opt_colcursor} -1] + + #idx is the per column output index + set idx [expr {$opt_colcursor -1}] ;#don't use opt_colstart here - we have padded and won't start emitting until idx reaches opt_colstart-1 + #cursor_column is usually one above idx - but we have opt_colstart which is like a margin - todo: remove cursor_column from the following loop and calculate it's offset when breaking or at end. + #(for now we are incrementing/decrementing both in sync - which is a bit silly) + set cursor_column $opt_colcursor + + #idx_over is the per grapheme overlay index + set idx_over -1 + + + #movements only occur within the overlay range. + #an underlay is however not necessary.. e.g + #renderline -overflow 1 "" data + #foreach {pt code} $overmap {} + set insert_mode $opt_insert_mode ;#default 1 + set autowrap_mode $opt_autowrap_mode ;#default 1 + + + #puts "-->$overlay_grapheme_control_list<--" + #puts "-->overflow_idx: $overflow_idx" + for {set gci 0} {$gci < [llength $overlay_grapheme_control_list]} {incr gci} { + set gc [lindex $overlay_grapheme_control_list $gci] + lassign $gc type item + + #emit plaintext chars first using existing SGR codes from under/over stack as appropriate + #then check if the following code is a cursor movement within the line and adjust index if so + #foreach ch $overlay_graphemes {} + switch -- $type { + g { + set ch $item + incr idx_over; #idx_over (until unapplied reached anyway) is per *grapheme* in the overlay - not per col. + if {($idx < ($opt_colstart -1))} { + incr idx [grapheme_width_cached $ch] + continue + } + #set within_undercols [expr {$idx <= [llength $undercols]-1}] ;#within our active data width + set within_undercols [expr {$idx <= $colwidth-1}] + + #https://www.enigma.com/resources/blog/the-secret-world-of-newline-characters + #\x85 NEL in the c1 control set is treated by some terminal emulators (e.g Hyper) as a newline, + #on some it's invisble but doesn't change the line, on some it's a visible glyph of width 1. + #This is hard to process in any standard manner - but I think the Hyper behaviour of doing what it was intended is perhaps most reasonable + #We will map it to the same behaviour as lf here for now... but we need also to consider the equivalent ANSI sequence: \x1bE + + set chtest [string map [list \n \x85 \b \r \v \x7f ] $ch] + #puts --->chtest:$chtest + #specials - each shoud have it's own test of what to do if it happens after overflow_idx reached + switch -- $chtest { + "" { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + if {$idx == 0} { + #puts "---a at col 1" + #linefeed at column 1 + #leave the overflow_idx ;#? review + set instruction lf_start ;#specific instruction for newline at column 1 + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } elseif {$overflow_idx != -1 && $idx == $overflow_idx} { + #linefeed after final column + #puts "---c at overflow_idx=$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_overflow ;#only special treatment is to give it it's own instruction in case caller needs to handle differently + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + #linefeed occurred in middle or at end of text + #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } + + } + "" { + #will we want/need to use raw for keypresses in terminal? (terminal with LNM in standard reset mode means enter= this is the usual config for terminals) + #So far we are assuming the caller has translated to and handle above.. REVIEW. + + #consider also the old space-carriagereturn softwrap convention used in some terminals. + #In the context of rendering to a block of text - this works similarly in that the space gets eaten so programs emitting space-cr at the terminal width col will pretty much get what they expect. + set idx [expr {$opt_colstart -1}] + set cursor_column $opt_colstart ;#? + } + "" { + #literal backspace char - not necessarily from keyboard + #review - backspace effect on double-width chars - we are taking a column-editing perspective in overtype + #(important for -transparent option - hence replacement chars for half-exposed etc) + #review - overstrike support as per nroff/less (generally considered an old technology replaced by unicode mechanisms and/or ansi SGR) + if {$idx > ($opt_colstart -1)} { + incr idx -1 + incr cursor_column -1 + } else { + set flag 0 + if $flag { + #review - conflicting requirements? Need a different sequence for destructive interactive backspace? + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction backspace_at_start + break + } + } + } + "" { + #literal del character - some terminals send just this for what is generally expected to be a destructive backspace + #We instead treat this as a pure delete at current cursor position - it is up to the repl or terminal to remap backspace key to a sequence that has the desired effect. + priv::render_delchar $idx + } + "" { + #end processing this overline. rest of line is remainder. cursor for column as is. + #REVIEW - this theoretically depends on terminal's vertical tabulation setting (name?) + #e.g it could be configured to jump down 6 rows. + #On the other hand I've seen indications that some modern terminal emulators treat it pretty much as a linefeed. + #todo? + incr cursor_row + set overflow_idx $idx + #idx_over has already been incremented as this is both a movement-control and in some sense a grapheme + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction vt + break + } + default { + if {$overflow_idx != -1} { + #review - how to check arbitrary length item such as tab is going to overflow .. before we get to overflow_idx? + #call grapheme_width_cached on each ch, or look for tab specifically as it's currently the only known reason to have a grapheme width > 2? + #we need to decide what a tab spanning the overflow_idx means and how it affects wrap etc etc + if {$idx == $overflow_idx-1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 2} { + #review split 2w overflow? + #we don't want to make the decision here to split a 2w into replacement characters at end of line and beginning of next line + #better to consider the overlay char as unable to be applied to the line + #render empty string to column(?) - and reduce overlay grapheme index by one so that the current ch goes into unapplied + #throwing back to caller with instruction complicates its job - but is necessary to avoid making decsions for it here. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #change the overflow_idx + set overflow_idx $idx + incr idx + incr idx_over -1 ;#set overlay grapheme index back one so that sgr stack from previous overlay grapheme used + priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#note $gci-1 instead of just gci + #throw back to caller's loop - add instruction to caller as this is not the usual case + #caller may for example choose to render a single replacement char to this line and omit the grapheme, or wrap it to the next line + set instruction overflow_splitchar + break + } elseif {$owidth > 2} { + #? tab? + #TODO! + puts stderr "overtype::renderline long overtext grapheme '[ansistring VIEW -lf 1 -vt 1 $ch]' not handled" + #tab of some length dependent on tabstops/elastic tabstop settings? + } + } elseif {$idx == $overflow_idx} { + #jmn? + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci-1]] + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #don't incr idx beyond the overflow_idx + #idx_over already incremented - decrement so current overlay grapheme stacks go to unapplied + incr idx_over -1 + #priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#back one index here too + priv::render_this_unapplied $overlay_grapheme_control_list $gci ;# + set instruction overflow + break + } + } else { + #review. + #This corresponds to opt_overflow being true (at least until overflow_idx is in some cases forced to a value when throwing back to calling loop) + } + + if {($do_transparency && [regexp $opt_transparent $ch])} { + #pre opt_colstart is effectively transparent (we have applied padding of required number of columns to left of overlay) + if {$idx > [llength $outcols]-1} { + lappend outcols " " + #dict set understacks $idx [list] ;#review - use idx-1 codestack? + lset understacks $idx [list] + incr idx + incr cursor_column + } else { + #todo - punk::char::char_width + set g [lindex $outcols $idx] + set uwidth [grapheme_width_cached $g] + if {[lindex $outcols $idx] eq ""} { + #2nd col of 2-wide char in underlay + incr idx + incr cursor_column + } elseif {$uwidth == 0} { + #e.g control char ? combining diacritic ? + incr idx + incr cursor_column + } elseif {$uwidth == 1} { + set owidth [grapheme_width_cached $ch] + incr idx + incr cursor_column + if {$owidth > 1} { + incr idx + incr cursor_column + } + } elseif {$uwidth > 1} { + if {[grapheme_width_cached $ch] == 1} { + if {!$insert_mode} { + #normal singlewide transparent overlay onto double-wide underlay + set next_pt_overchar [string index $pt_overchars $idx_over+1] ;#lookahead of next plain-text char in overlay + if {$next_pt_overchar eq ""} { + #special-case trailing transparent - no next_pt_overchar + incr idx + incr cursor_column + } else { + if {[regexp $opt_transparent $next_pt_overchar]} { + incr idx + incr cursor_column + } else { + #next overlay char is not transparent.. first-half of underlying 2wide char is exposed + #priv::render_addchar $idx $opt_exposed1 [dict get $overstacks $idx_over] [dict get $overstacks_gx $idx_over] $insert_mode + priv::render_addchar $idx $opt_exposed1 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } + } else { + #? todo - decide what transparency even means for insert mode + incr idx + incr cursor_column + } + } else { + #2wide transparency over 2wide in underlay - review + incr idx + incr cursor_column + } + } + } + } else { + + set idxchar [lindex $outcols $idx] + #non-transparent char in overlay or empty cell + if {$idxchar eq "\u0000"} { + #empty/erased cell indicator + set uwidth 1 + } else { + set uwidth [grapheme_width_cached $idxchar] + } + if {$within_undercols} { + if {$idxchar eq ""} { + #2nd col of 2wide char in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 + #JMN - this has to expose if our startposn chopped an underlay - but not if we already overwrote the first half of the widechar underlay grapheme + #e.g renderline \uFF21\uFF21--- a\uFF23\uFF23 + #vs + # renderline -startcolumn 2 \uFF21---- \uFF23 + if {[lindex $outcols $idx-1] != ""} { + #verified it's an empty following a filled - so it's a legit underlay remnant (REVIEW - when would it not be??) + #reset previous to an exposed 1st-half - but leave understacks code as is + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 + } + incr idx + } else { + set prevcolinfo [lindex $outcols $idx-1] + #for insert mode - first replace the empty 2ndhalf char with exposed2 before shifting it right + #REVIEW - this leaves a replacement character permanently in our columns.. but it is consistent regarding length (?) + #The alternative is to disallow insertion at a column cursor that is at 2nd half of 2wide char + #perhaps by inserting after the char - this may be worthwhile - but may cause other surprises + #It is perhaps best avoided at another level and try to make renderline do exactly as it's told + #the advantage of this 2w splitting method is that the inserted character ends up in exactly the column we expect. + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 ;#replace not insert + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 1 ;#insert - same index + if {$prevcolinfo ne ""} { + #we've split the 2wide - it may already have been rendered as an exposed1 - but not for example if our startcolumn was current idx + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 ;#replace not insert + } ;# else?? + incr idx + } + if {$cursor_column < [llength $outcols] || $overflow_idx == -1} { + incr cursor_column + } + } elseif {$uwidth == 0} { + #what if this is some other c0/c1 control we haven't handled specifically? + + #by emitting a preceding empty-string column - we associate whatever this char is with the preceeding non-zero-length character and any existing zero-lengths that follow it + #e.g combining diacritic - increment before over char REVIEW + #arguably the previous overchar should have done this - ie lookahead for combiners? + #if we can get a proper grapheme_split function - this should be easier to tidy up. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column 2 + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } elseif {$uwidth == 1} { + #includes null empty cells + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #if next column in underlay empty - we've overwritten first half of underlying 2wide grapheme + #replace with rhs exposure in case there are no more overlay graphemes coming - use underlay's stack + if {([llength $outcols] >= $idx +2) && [lindex $outcols $idx+1] eq ""} { + priv::render_addchar [expr {$idx+1}] $opt_exposed2 [lindex $understacks $idx+1] [lindex $understacks_gx $idx+1] $insert_mode + } + incr idx + } + if {($cursor_column < [llength $outcols]) || $overflow_idx == -1 || $test_mode} { + incr cursor_column + } + } elseif {$uwidth > 1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + #1wide over 2wide in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #don't incr idx - we are just putting a broken-indication in the underlay - which may get overwritten by next overlay char + } else { + #insert mode just pushes all to right - no exposition char here + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } else { + #2wide over 2wide + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx 2 + incr cursor_column 2 + } + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + if {$overflow_idx !=-1 && !$test_mode} { + #overflow + if {$cursor_column > [llength $outcols]} { + set cursor_column [llength $outcols] + } + } + } + } + } + } ;# end switch + + + } + other { + set code $item + #since this element isn't a grapheme - advance idx_over to next grapheme overlay when about to fill 'unapplied' + + set re_mode {\x1b\[\?([0-9]*)(h|l)} ;#e.g DECAWM + set re_col_move {\x1b\[([0-9]*)(C|D|G)$} + set re_row_move {\x1b\[([0-9]*)(A|B)$} + set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} ;# or "f" ? + set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} + set re_cursor_save {\x1b\[s$} + set re_cursor_restore {\x1b\[u$} + set re_cursor_save_dec {\x1b7$} + set re_cursor_restore_dec {\x1b8$} + set re_other_single {\x1b(D|M|E)$} + set re_decstbm {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)r$} ;#DECSTBM set top and bottom margins + set matchinfo [list] + + #remap of DEC cursor_save/cursor_restore from ESC sequence to equivalent CSI + #probably not ideal - consider putting cursor_save/cursor_restore in functions so they can be called from the appropriate switch branch instead of using this mapping + #review - cost/benefit of function calls within these switch-arms instead of inline code? + set code [string map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $code] + + + set c1 [string index $code 0] + set c1c2 [string range $code 0 1] + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + set leadernorm [string range [string map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\] 7OSC\ + \x9d 8OSC\ + \x1b 7ESC\ + ] $c1c2] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + #we leave the tail of the code unmapped for now + switch -- $leadernorm { + 7CSI - 7OSC { + set codenorm [string cat $leadernorm [string range $code 2 end]] + } + 7ESC { + set codenorm [string cat $leadernorm [string range $code 1 end]] + } + 8CSI - 8OSC { + set codenorm [string cat $leadernorm [string range $code 1 end]] + } + default { + #we haven't made a mapping for this + set codenorm $code + } + } + + #we've mapped 7 and 8bit escapes to values we can handle as literals in switch statements to take advantange of jump tables. + switch -- $leadernorm { + {7CSI} - {8CSI} { + set param [string range $codenorm 4 end-1] + #puts stdout "--> CSI [string index $leadernorm 0] bit param:$param" + switch -- [string index $codenorm end] { + D { + #Col move + #puts stdout "<-back" + #cursor back + #left-arrow/move-back when ltr mode + set num $param + if {$num eq ""} {set num 1} + + set version 2 + if {$version eq "2"} { + #todo - startcolumn offset! + if {$cursor_column - $num >= 1} { + incr idx -$num + incr cursor_column -$num + } else { + if {!$autowrap_mode} { + set cursor_column 1 + set idx 0 + } else { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + incr cursor_column -$num + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmovebackward + break + } + } + } else { + incr idx -$num + incr cursor_column -$num + if {$idx < $opt_colstart-1} { + #wrap to previous line and position cursor at end of data + set idx [expr {$opt_colstart-1}] + set cursor_column $opt_colstart + } + } + } + C { + #Col move + #puts stdout "->forward" + #todo - consider right-to-left cursor mode (e.g Hebrew).. some day. + #cursor forward + #right-arrow/move forward + set num $param + if {$num eq ""} {set num 1} + + #todo - retrict to moving 1 position past datalen? restrict to column width? + #should ideally wrap to next line when interactive and not on last row + #(some ansi art seems to expect this behaviour) + #This presumably depends on the terminal's wrap mode + #e.g DECAWM autowrap mode + # CSI ? 7 h - set: autowrap (also tput smam) + # CSI ? 7 l - reset: no autowrap (also tput rmam) + set version 2 + if {$version eq "2"} { + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$test_mode && $cursor_column == $max+1} { + #move_forward while in overflow + incr cursor_column -1 + } + + if {($cursor_column + $num) <= $max} { + incr idx $num + incr cursor_column $num + } else { + if {$autowrap_mode} { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #jmn + if {$idx == $overflow_idx} { + incr num + } + + #horizontal movement beyond line extent needs to wrap - throw back to caller + #we may have both overflow_rightand unapplied data + #(can have overflow_right if we were in insert_mode and processed chars prior to this movement) + #leave row as is - caller will need to determine how many rows the column-movement has consumed + incr cursor_column $num ;#give our caller the necessary info as columns from start of row + #incr idx_over + #should be gci following last one applied + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmoveforward + break + } else { + set cursor_column $max + set idx [expr {$cursor_column -1}] + } + } + } else { + if {!$opt_overflow || ($cursor_column + $num) <= [llength $outcols+1]} { + incr idx $num + incr cursor_column $num + } else { + if {!$insert_mode} { + #block editing style with arrow keys + #overtype mode + set idxstart $idx + set idxend [llength $outcols] + set moveend [expr {$idxend - $idxstart}] + if {$moveend < 0} {set moveend 0} ;#sanity? + #puts "idxstart:$idxstart idxend:$idxend outcols[llength $outcols] undercols:[llength $undercols]" + incr idx $moveend + incr cursor_column $moveend + #if {[dict exists $understacks $idx]} { + # set stackinfo [dict get $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + #} else { + # set stackinfo [list] + #} + if {$idx < [llength $understacks]} { + set stackinfo [lindex $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + } else { + set stackinfo [list] + } + if {$idx < [llength $understacks_gx]} { + #set gxstackinfo [dict get $understacks_gx $idx] + set gxstackinfo [lindex $understacks_gx $idx] + } else { + set gxstackinfo [list] + } + #pad outcols + set movemore [expr {$num - $moveend}] + #assert movemore always at least 1 or we wouldn't be in this branch + for {set m 1} {$m <= $movemore} {incr m} { + incr idx + incr cursor_column + priv::render_addchar $idx " " $stackinfo $gxstackinfo $insert_mode + } + } else { + #normal - insert + incr idx $num + incr cursor_column $num + if {$idx > [llength $outcols]} { + set idx [llength $outcols];#allow one beyond - for adding character at end of line + set cursor_column [expr {[llength $outcols]+1}] + } + } + } + } + } + G { + #Col move + #move absolute column + #adjust to colstart - as column 1 is within overlay + #??? + set idx [expr {$param + $opt_colstart -1}] + set cursor_column $param + error "renderline absolute col move ESC G unimplemented" + } + A { + #Row move - up + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set num $param + if {$num eq ""} {set num 1} + incr cursor_row -$num + + if {$cursor_row < 1} { + set cursor_row 1 + } + + #ensure rest of *overlay* is emitted to remainder + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up + #retain cursor_column + break + } + B { + #Row move - down + set num $param + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move down + if {$num eq ""} {set num 1} + incr cursor_row $num + + + incr idx_over ;#idx_over hasn't encountered a grapheme and hasn't advanced yet + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + H - f { + #$re_both_move + lassign [split $param {;}] row col + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #lassign $matchinfo _match row col + + if {$col eq ""} {set col 1} + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$col > $max} { + set cursor_column $max + } else { + set cursor_column $col + } + set idx [expr {$cursor_column -1}] + + if {$row eq ""} {set row 1} + set cursor_row $row + if {$cursor_row < 1} { + set cursor_row 1 + } + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move + break + + } + r { + #$re_decstbm + #https://www.vt100.net/docs/vt510-rm/DECSTBM.html + #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins + lassign [split $param {;}] margin_top margin_bottom + + #todo - return these for the caller to process.. + puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + set cursor_column 1 + set cursor_row 1 + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move ;#own instruction? decstbm? + break + } + s { + #$re_cursor_save + #cursor save could come after last column + if {$overflow_idx != -1 && $idx == $overflow_idx} { + #bartman2.ans test file - fixes misalignment at bottom of dialog bubble + #incr cursor_row + #set cursor_column 1 + #bwings1.ans test file - breaks if we actually incr cursor (has repeated saves) + set cursor_saved_position [list row [expr {$cursor_row+1}] column 1] + } else { + set cursor_saved_position [list row $cursor_row column $cursor_column] + } + #there may be overlay stackable codes emitted that aren't in the understacks because they come between the last emmited character and the cursor_save control. + #we need the SGR and gx overlay codes prior to the cursor_save + + #a real terminal would not be able to know the state of the underlay.. so we should probably ignore it. + #set sgr_stack [lindex $understacks $idx] + #set gx_stack [lindex $understacks_gx $idx] ;#not actually a stack - just a boolean state (for now?) + + set sgr_stack [list] + set gx_stack [list] + + #we shouldn't need to scan for intermediate cursor save/restores - as restores would throw-back to the calling loop - so our overlay 'line' is since those. + #The overlay_grapheme_control_list had leading resets from previous lines - so we go back to the beginning not just the first grapheme. + + foreach gc [lrange $overlay_grapheme_control_list 0 $gci-1] { + lassign $gc type code + #types g other sgr gx0 + switch -- $type { + gx0 { + #code is actually a stand-in for the graphics on/off code - not the raw code + #It is either gx0_on or gx0_off + set gx_stack [list $code] + } + sgr { + #code is the raw code + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #jmn + set sgr_stack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set sgr_stack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #often we don't get resets - and codes just pile up. + #as a first step to simplifying - at least remove earlier straight up dupes + set dup_posns [lsearch -all -exact $sgr_stack $code] ;#needs -exact - codes have square-brackets (glob chars) + set sgr_stack [lremove $sgr_stack {*}$dup_posns] + lappend sgr_stack $code + } + } + } + } + set cursor_saved_attributes "" + switch -- [lindex $gx_stack 0] { + gx0_on { + append cursor_saved_attributes "\x1b(0" + } + gx0_off { + append cursor_saved_attributes "\x1b(B" + } + } + #append cursor_saved_attributes [join $sgr_stack ""] + append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] + + #as there is apparently only one cursor storage element we don't need to throw back to the calling loop for a save. + + #don't incr index - or the save will cause cursor to move to the right + #carry on + + } + u { + #$re_cursor_restore + #we are going to jump somewhere.. for now we will assume another line, and process accordingly. + #The caller has the cursor_saved_position/cursor_saved_attributes if any (?review - if we always pass it back it, we could save some calls for moves in same line) + #don't set overflow at this point. The existing underlay to the right must be preserved. + #we only want to jump and render the unapplied at the new location. + + #lset overstacks $idx_over [list] + #set replay_codes_overlay "" + + #if {$cursor_saved_attributes ne ""} { + # set replay_codes_overlay $cursor_saved_attributes ;#empty - or last save if it happend in this input chunk + #} else { + #jj + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay "" + #} + + #like priv::render_unapplied - but without the overlay's ansi reset or gx stacks from before the restore code + incr idx_over + + set unapplied "" + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + append unapplied "\x1b(0" + } elseif {$item eq "gx0_off"} { + append unapplied "\x1b(B" + } + } else { + append unapplied $item + } + #incr idx_over + } + #if the save occured within this line - that's ok - it's in the return value list and caller can prepend for the next loop. + set instruction restore_cursor + break + } + ~ { + #$re_vt_sequence + #lassign $matchinfo _match key mod + lassign [split $param {;}] key mod + + #Note that f1 to f4 show as ESCOP|Q|R|S (VT220?) but f5+ show as ESC\[15~ + # + #e.g esc \[2~ insert esc \[2;2~ shift-insert + #mod - subtract 1, and then use bitmask + #shift = 1, (left)Alt = 2, control=4, meta=8 (meta seems to do nothing on many terminals on windows? Intercepted by windows?) + #puts stderr "vt key:$key mod:$mod code:[ansistring VIEW $code]" + if {$key eq "1"} { + #home + } elseif {$key eq "2"} { + #Insert + if {$mod eq ""} { + #no modifier key + set insert_mode [expr {!$insert_mode}] + #rather than set the cursor - we return the insert mode state so the caller can decide + } + } elseif {$key eq "3"} { + #Delete - presumably this shifts other chars in the line, with empty cells coming in from the end + switch -- $mod { + "" { + priv::render_delchar $idx + } + "5" { + #ctrl-del - delete to end of word (pwsh) - possibly word on next line if current line empty(?) + } + } + } elseif {$key eq "4"} { + #End + } elseif {$key eq "5"} { + #pgup + } elseif {$key eq "6"} { + #pgDn + } elseif {$key eq "7"} { + #Home + #?? + set idx [expr {$opt_colstart -1}] + set cursor_column 1 + } elseif {$key eq "8"} { + #End + } elseif {$key eq "11"} { + #F1 - or ESCOP or e.g shift F1 ESC\[1;2P + } elseif {$key eq "12"} { + #F2 - or ESCOQ + } elseif {$key eq "13"} { + #F3 - or ESCOR + } elseif {$key eq "14"} { + #F4 - or ESCOS + } elseif {$key eq "15"} { + #F5 or shift F5 ESC\[15;2~ + } elseif {$key eq "17"} { + #F6 + } elseif {$key eq "18"} { + #F7 + } elseif {$key eq "19"} { + #F8 + } elseif {$key eq "20"} { + #F9 + } elseif {$key eq "21"} { + #F10 + } elseif {$key eq "23"} { + #F11 + } elseif {$key eq "24"} { + #F12 + } + + } + h - l { + #we are matching only last char to get to this arm - but are there other sequences ending in h|l we need to handle? + + #$re_mode if first after CSI is "?" + #some docs mention ESC=h|l - not seen on windows terminals.. review + #e.g https://www2.math.upenn.edu/~kazdan/210/computer/ansi.html + if {[string index $codenorm 4] eq "?"} { + set num [string range $codenorm 5 end-1] ;#param between ? and h|l + #lassign $matchinfo _match num type + switch -- $num { + 5 { + #DECSNM - reverse video + #How we simulate this to render within a block of text is an open question. + #track all SGR stacks and constantly flip based on the current SGR reverse state? + #It is the job of the calling loop to do this - so at this stage we'll just set the states + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set reverse_mode 1 + } else { + #reset (disable) + set reverse_mode 0 + } + + } + 7 { + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set autowrap_mode 1 + if {$opt_width ne "\uFFEF"} { + set overflow_idx $opt_width + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + #review - can idx ever be beyond overflow_idx limit when we change e.g with a width setting and cursor movements? presume not - but sanity check for now. + if {$idx >= $overflow_idx} { + puts stderr "renderline error - idx '$idx' >= overflow_idx '$overflow_idx' - unexpected" + } + } else { + #reset (disable) + set autowrap_mode 0 + set overflow_idx -1 + } + } + 25 { + if {$type eq "h"} { + #visible cursor + + } else { + #invisible cursor + + } + } + } + + } else { + puts stderr "overtype::renderline CSI...h|l code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + default { + puts stderr "overtype::renderline CSI code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + } + 7ESC { + #$re_other_single + switch -- [string index $codenorm end] { + D { + #\x84 + #index (IND) + #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" + puts stderr "ESC D not fully implemented" + incr cursor_row + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + M { + #\x8D + #Reverse Index (RI) + #vt102-docs: "Moves cursor up one line in same column. If cursor is at top margin, screen performs a scroll-down" + puts stderr "ESC M not fully implemented" + + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move up + incr cursor_row -1 + if {$cursor_row < 1} { + set cursor_row 1 + } + #ensure rest of *overlay* is emitted to remainder + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up ;#need instruction for scroll-down? + #retain cursor_column + break + } + E { + #\x85 + #review - is behaviour different to lf? + #todo - possibly(?) same logic as handling above. i.e return instruction depends on where column_cursor is at the time we get NEL + #leave implementation until logic for is set in stone... still under review + #It's arguable NEL is a pure cursor movement as opposed to the semantic meaning of crlf or lf in a file. + # + #Next Line (NEL) "Move the cursor to the left margin on the next line. If the cursor is at the bottom margin, scroll the page up" + puts stderr "ESC E unimplemented" + + } + default { + puts stderr "overtype::renderline ESC code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + + } + } + + #switch -regexp -matchvar matchinfo -- $code\ + #$re_mode { + #}\ + #default { + # puts stderr "overtype::renderline code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + #} + + } + default { + #don't need to handle sgr or gx0 types + #we have our sgr gx0 codes already in stacks for each overlay grapheme + } + } + } + + #-------- + if {$opt_overflow == 0} { + #need to truncate to the width of the original undertext + #review - string_width vs printing_length here. undertext requirement to be already rendered therefore punk::char::string_width ok? + #set num_under_columns [punk::char::string_width $pt_underchars] ;#plaintext underchars + } + if {$overflow_idx == -1} { + #overflow was initially unlimited and hasn't been overridden + } else { + + } + #-------- + + + #coalesce and replay codestacks for outcols grapheme list + set outstring "" ;#output prior to overflow + set overflow_right "" ;#remainder after overflow point reached + set i 0 + set cstack [list] + set prevstack [list] + set prev_g0 [list] + #note overflow_idx may already have been set lower if we had a row move above due to \v or ANSI moves + set in_overflow 0 ;#used to stop char-width scanning once in overflow + if {$overflow_idx == 0} { + #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW + set in_overflow 1 + } + foreach ch $outcols { + #puts "---- [ansistring VIEW $ch]" + + set gxleader "" + if {$i < [llength $understacks_gx]} { + #set g0 [dict get $understacks_gx $i] + set g0 [lindex $understacks_gx $i] + if {$g0 ne $prev_g0} { + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } else { + set gxleader "\x1b(B" + } + } + set prev_g0 $g0 + } else { + set prev_g0 [list] + } + + set sgrleader "" + if {$i < [llength $understacks]} { + #set cstack [dict get $understacks $i] + set cstack [lindex $understacks $i] + if {$cstack ne $prevstack} { + if {[llength $prevstack] && ![llength $cstack]} { + #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. But why is cstack empty? + append sgrleader \033\[m + } else { + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + } + } + set prevstack $cstack + } else { + set prevstack [list] + } + + + + if {$in_overflow} { + if {$i == $overflow_idx} { + set 0 [lindex $understacks_gx $i] + set gxleader "" + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } elseif {$g0 eq [list "gx0_off"]} { + set gxleader "\x1b(B" + } + append overflow_right $gxleader + set cstack [lindex $understacks $i] + set sgrleader "" + #whether cstack is same or differs from previous char's stack - we must have an output at the start of the overflow_right + #if {[llength $prevstack] && ![llength $cstack]} { + # append sgrleader \033\[m + #} + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + append overflow_right $sgrleader + append overflow_right $ch + } else { + append overflow_right $gxleader + append overflow_right $sgrleader + append overflow_right $ch + } + } else { + if {$overflow_idx != -1 && $i+1 == $overflow_idx} { + #one before overflow + #will be in overflow in next iteration + set in_overflow 1 + if {[grapheme_width_cached $ch]> 1} { + #we overflowed with second-half of a double-width char - replace first-half with user-supplied exposition char (should be 1 wide) + set ch $opt_exposed1 + } + } + append outstring $gxleader + append outstring $sgrleader + if {$idx+1 < $cursor_column} { + append outstring [string map [list "\u0000" " "] $ch] + } else { + append outstring $ch + } + } + incr i + } + #flower.ans good test for null handling - reverse line building + if {![ansistring length $overflow_right]} { + set outstring [string trimright $outstring "\u0000"] + } + set outstring [string map [list "\u0000" " "] $outstring] + set overflow_right [string trimright $overflow_right "\u0000"] + set overflow_right [string map [list "\u0000" " "] $overflow_right] + + set replay_codes "" + if {[llength $understacks] > 0} { + if {$overflow_idx == -1} { + #set tail_idx [dict size $understacks] + set tail_idx [llength $understacks] + } else { + set tail_idx [llength $undercols] + } + if {$tail_idx-1 < [llength $understacks]} { + #set replay_codes [join [lindex $understacks $tail_idx-1] ""] ;#tail replay codes + set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes + } + if {$tail_idx-1 < [llength $understacks_gx]} { + set gx0 [lindex $understacks_gx $tail_idx-1] + if {$gx0 eq [list "gx0_on"]} { + #if it was on, turn gx0 off at the point we stop processing overlay + append outstring "\x1b(B" + } + } + } + if {[string length $overflow_right]} { + #puts stderr "remainder:$overflow_right" + } + #pdict $understacks + if {[punk::ansi::ta::detect_sgr $outstring]} { + append outstring [punk::ansi::a] + #close off any open gx? + #probably should - and overflow_right reopen? + } + + if {$opt_returnextra} { + #replay_codes is the codestack at the boundary - used for ellipsis colouring to match elided text - review + #replay_codes_underlay is the set of codes in effect at the very end of the original underlay + + #review + #replay_codes_overlay is the set of codes in effect at the very end of the original overlay (even if not all overlay was applied) + #todo - replay_codes for gx0 mode + + #overflow_idx may change during ansi & character processing + if {$overflow_idx == -1} { + set overflow_right_column "" + } else { + set overflow_right_column [expr {$overflow_idx+1}] + } + set result [dict create\ + result $outstring\ + visualwidth [punk::ansi::printing_length $outstring]\ + instruction $instruction\ + stringlen [string length $outstring]\ + overflow_right_column $overflow_right_column\ + overflow_right $overflow_right\ + unapplied $unapplied\ + insert_mode $insert_mode\ + autowrap_mode $autowrap_mode\ + insert_lines_above $insert_lines_above\ + insert_lines_below $insert_lines_below\ + cursor_saved_position $cursor_saved_position\ + cursor_saved_attributes $cursor_saved_attributes\ + cursor_column $cursor_column\ + cursor_row $cursor_row\ + opt_overflow $opt_overflow\ + replay_codes $replay_codes\ + replay_codes_underlay $replay_codes_underlay\ + replay_codes_overlay $replay_codes_overlay\ + ] + if {$opt_returnextra == 1} { + return $result + } else { + #human/debug - map special chars to visual glyphs + set viewop VIEW + switch -- $opt_returnextra { + 2 { + #codes and character data + set viewop VIEWCODES ;#ansi colorisation of codes - green for SGR, blue/blue reverse for cursor_save/cursor_restore, cyan for movements, orange for others + } + 3 { + set viewop VIEWSTYLE ;#ansi colorise the characters within the output with preceding codes, stacking codes only within each dict value - may not be same SGR effect as the effect in-situ. + } + } + dict set result result [ansistring $viewop -lf 1 -vt 1 [dict get $result result]] + dict set result overflow_right [ansistring VIEW -lf 1 -vt 1 [dict get $result overflow_right]] + dict set result unapplied [ansistring VIEW -lf 1 -vt 1 [dict get $result unapplied]] + dict set result replay_codes [ansistring $viewop -lf 1 -vt 1 [dict get $result replay_codes]] + dict set result replay_codes_underlay [ansistring $viewop -lf 1 -vt 1 [dict get $result replay_codes_underlay]] + dict set result replay_codes_overlay [ansistring $viewop -lf 1 -vt 1 [dict get $result replay_codes_overlay]] + dict set result cursor_saved_attributes [ansistring $viewop -lf 1 -vt 1 [dict get $result cursor_saved_attributes]] + return $result + } + } else { + return $outstring + } + #return [join $out ""] +} +proc overtype::test_renderline {} { + set t \uFF5E ;#2-wide tilde + set u \uFF3F ;#2-wide underscore + set missing \uFFFD + return [list $t $u A${t}B] +} + +#maintenance warning +#same as textblock::size - but we don't want that circular dependency +#block width and height can be tricky. e.g \v handled differently on different terminal emulators and can affect both +proc overtype::blocksize {textblock} { + if {$textblock eq ""} { + return [dict create width 0 height 1] ;#no such thing as zero-height block - for consistency with non-empty strings having no line-endings + } + if {[string first \t $textblock] >= 0} { + if {[info exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set textblock [textutil::tabify::untabify2 $textblock $tw] + } + #stripansi on entire block in one go rather than line by line - result should be the same - review - make tests + if {[punk::ansi::ta::detect $textblock]} { + set textblock [punk::ansi::stripansi $textblock] + } + if {[string first \n $textblock] >= 0} { + set num_le [expr {[string length $textblock]-[string length [string map [list \n {}] $textblock]]}] ;#faster than splitting into single-char list + set width [tcl::mathfunc::max {*}[lmap v [split $textblock \n] {::punk::char::ansifreestring_width $v}]] + } else { + set num_le 0 + set width [punk::char::ansifreestring_width $textblock] + } + #our concept of block-height is likely to be different to other line-counting mechanisms + set height [expr {$num_le + 1}] ;# one line if no le - 2 if there is one trailing le even if no data follows le + + return [dict create width $width height $height] ;#maintain order in 'image processing' standard width then height - caller may use lassign [dict values [blocksize ]] width height +} + +namespace eval overtype::priv { + variable cache_is_sgr [dict create] + + #we are likely to be asking the same question of the same ansi codes repeatedly + #caching the answer saves some regex expense - possibly a few uS to lookup vs under 1uS + #todo - test if still worthwhile after a large cache is built up. (limit cache size?) + proc is_sgr {code} { + variable cache_is_sgr + if {[dict exists $cache_is_sgr $code]} { + return [dict get $cache_is_sgr $code] + } + set answer [punk::ansi::codetype::is_sgr $code] + dict set cache_is_sgr $code $answer + return $answer + } + proc render_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + #append unapplied [join [lindex $overstacks $idx_over] ""] + #append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $overstacks $idx_over]] + append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + append unapplied "\x1b(0" + } + "gx0_off" { + append unapplied "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + append unapplied "\x1b(0" + } elseif {$item eq "gx0_off"} { + append unapplied "\x1b(B" + } + } else { + append unapplied $item + } + } + } + + #clearer - renders the specific gci forward as unapplied - prefixed with it's merged sgr stack + proc render_this_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + append unapplied "\x1b(0" + } + "gx0_off" { + append unapplied "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + append unapplied "\x1b(0" + } elseif {$item eq "gx0_off"} { + append unapplied "\x1b(B" + } + } else { + append unapplied $item + } + } + } + proc render_delchar {i} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + set nxt [llength $o] + if {$i < $nxt} { + set o [lreplace $o $i $i] + set ustacks [lreplace $ustacks $i $i] + set gxstacks [lreplace $gxstacks $i $i] + } else { + + } + } + #is actually addgrapheme? + proc render_addchar {i c sgrstack gx0stack {insert_mode 0}} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + + if 0 { + if {$c eq "c"} { + puts "i:$i c:$c sgrstack:[ansistring VIEW $sgrstack]" + puts "understacks:[ansistring VIEW $ustacks]" + upvar overstacks overstacks + puts "overstacks:[ansistring VIEW $overstacks]" + puts "info level 0:[info level 0]" + } + } + + set nxt [llength $o] + if {!$insert_mode} { + if {$i < $nxt} { + #These lists must always be in sync + lset o $i $c + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + lset ustacks $i $sgrstack + lset gxstacks $i $gx0stack + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } else { + #insert of single-width vs double-width when underlying is double-width? + if {$i < $nxt} { + set o [linsert $o $i $c] + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + set ustacks [linsert $ustacks $i $sgrstack] + set gxstacks [linsert $gxstacks $i $gx0stack] + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } + } + +} + + + +# -- --- --- --- --- --- --- --- --- --- --- +namespace eval overtype { + interp alias {} ::overtype::center {} ::overtype::centre +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide overtype [namespace eval overtype { + variable version + set version 1.6.0 +}] +return + +#*** !doctools +#[manpage_end] diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm index 210161d..05e3e4b 100644 --- a/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -50,6 +50,7 @@ package require Tcl 8.6- package require punk::char +package require punk::assertion #*** !doctools #[item] [package {Tcl 8.6-}] #[item] [package {punk::char}] @@ -69,8 +70,9 @@ package require punk::char namespace eval punk::ansi::class { if {![llength [info commands class_ansi]]} { + oo::class create class_ansi { - variable o_raw + variable o_ansistringobj variable o_render_dimensions ;#last dimensions at which we rendered variable o_rendered @@ -79,12 +81,16 @@ namespace eval punk::ansi::class { if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } - set o_rendered_what "" + + #a straight string compare may be faster.. but a checksum is much smaller in memory, so we'll use that by default. + set o_rendered_what "" + #There may also be advantages to renering to a class_ansistring class object + set o_render_dimensions $dimensions - set o_raw $ansitext + set o_ansistringobj [ansistring NEW $ansitext] } - method rawdata {} { - return $o_raw + method get {} { + return [$o_ansistringobj get] } method render {{dimensions ""}} { if {$dimensions eq ""} { @@ -93,24 +99,140 @@ namespace eval punk::ansi::class { if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } - if {$o_rendered_what ne $o_raw || $dimensions ne $o_render_dimensions} { - set b [textblock::block $w $h " "] - set o_rendered [overtype::left $b $o_raw] - set o_rendered_what $o_raw + set cksum "not-done" + if {$dimensions ne $o_render_dimensions || $o_rendered_what ne [set cksum [$o_ansistringobj checksum]]} { + #some ansi layout/art relies on wrapping at the width-dimension to display properly + #this includes cursor movements ie right arrow can move cursor to columns in lines below + #overflow is a different concept - perhaps not particularly congruent with the idea of the textblock as a mini terminal emulator. + #overflow effectively auto-expands the block(terminal?) width + #overflow and wrap both being true won't make sense unless we implement a max_overflow concept + set o_rendered [overtype::left -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] + if {$cksum eq "not-done"} { + #if dimensions changed - the checksum won't have been done + set o_rendered_what [$o_ansistringobj checksum] + } else { + set o_rendered_what $cksum + } set o_render_dimensions $dimensions } #todo - store rendered and allow partial rendering of new data lines? return $o_rendered } + method rendertest {{dimensions ""}} { + if {$dimensions eq ""} { + set dimensions $o_render_dimensions + } + if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { + error "class_ansi::render dimensions must be of the form x" + } + set o_dimensions $dimensions + + + set rendered [overtype::left -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] + return $rendered + } + method render_to_input_line {args} { + if {[llength $args] < 1} { + puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" + } + set x [lindex $args end] + set arglist [lrange $args 0 end-1] + if {[llength $arglist] %2 != 0} { + puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" + } + set defaults [dict create\ + -dimensions 80x24\ + -minus 0\ + ] + dict for {k v} $arglist { + switch -- $k { + -dimensions - -minus { } + default { + puts stderr "render_to_input_line unexpected argument '$k' usage: ?-dimensions WxH? ?-minus charcount? x" + } + } + } + set opts [dict merge $defaults $arglist] + set opt_dimensions [dict get $opts -dimensions] + set opt_minus [dict get $opts -minus] + lassign [split $opt_dimensions x] w h + if {![string is integer -strict $w] || ![string is integer -strict $h] || $w < 1 || $h < 1} { + puts stderr "render_to_input_line WxH width & height must be positive integer values usage: ?-dimensions WxH? ?-minus charcount? x" + } + if {![string is integer -strict $opt_minus]} { + puts stderr "render_to_input_line -minus must be positive integer value representing number of chars to exclude from end. usage: ?-dimensions WxH? ?-minus charcount? x" + } + + package require textblock + set lfvis [ansistring VIEW -lf 1 \n] + set maplf [list \n "[a+ green bold reverse]${lfvis}[a]\n"] ;#a mapping to highlight newlines + + set lines [split [$o_ansistringobj get] \n] + set rlines [lrange $lines 0 $x] + set chunk [::join $rlines \n] + append chunk \n + if {$opt_minus ne "0"} { + set chunk [string range $chunk 0 end-$opt_minus] + } + set rendered [overtype::left -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" $chunk] + set marker "" + for {set i 1} {$i <= $w} {incr i} { + if {$i % 10 == 0} { + ::append marker "|" + } elseif {$i % 5 == 0} { + ::append marker * + } else { + ::append marker "." + } + } + ::append rendered \n $marker + set xline [lindex $rlines $x]\n + set xlinev [ansistring VIEWSTYLE $xline] + set xlinev [string map $maplf $xlinev] + set xlinedisplay [overtype::left -wrap 1 -width $w -height 1 "" $xlinev] + ::append rendered \n $xlinedisplay + + set chunk [ansistring VIEWSTYLE $chunk] + set chunk [string map $maplf $chunk] + #keep chunkdisplay narrower - leave at 80 or it will get unwieldy for larger image widths + set chunkdisplay [overtype::left -wrap 1 -width 80 -height 1 "" $chunk] + set renderheight [llength [split $rendered \n]] + set chunkdisplay_lines [split $chunkdisplay \n] + set chunkdisplay_tail [lrange $chunkdisplay_lines end-$renderheight end] + set chunkdisplay_block [join $chunkdisplay_tail \n] + #the input chunk lines are often much longer than the output.. resulting in main content being way up the screen. It's often impractical to view more than the tail of the chunkdisplay. + textblock::join $rendered $chunkdisplay_block + } + + method checksum {} { + return [$o_ansistringobj checksum] + } + method checksum_last_rendered_input {} { + return $o_rendered_what + } + #todo - fix class_ansistring so the ansistring methods can be called directly method viewlines {} { - return [ansistring VIEW $o_raw] + return [ansistring VIEW [$o_ansistringobj get]] } method viewcodes {} { - return [ansistring VIEWCODES $o_raw] + return [ansistring VIEWCODES [$o_ansistringobj get]] } method viewchars {} { - return [punk::ansi::stripansiraw $o_raw] + return [punk::ansi::stripansiraw [$o_ansistringobj get]] + } + method viewstyle {} { + return [ansistring VIEWSTYLE [$o_ansistringobj get]] + } + method append_noreturn {ansistring} { + $o_ansistringobj append $ansistring + #don't return the raw data - it may be big and probably won't play nicely with default terminal dimensions etc. + return + } + #like Tcl append - returns the result + #Tcl's append changes a variable state, this changes the object state + method append {ansistring} { + $o_ansistringobj append $ansistring } } @@ -125,6 +247,83 @@ namespace eval punk::ansi { #[para] Core API functions for punk::ansi #[list_begin definitions] + #old-school ansi graphics - C0 control glyphs. + variable cp437_map + #for cp437 images we need to map these *after* splitting ansi, to single-width unicode chars + #It would also probably be problematic to map \u000A to the glyph - as this is the newline - it included in the map anyway for completeness. The caller may have to manually carve that or other specific c0 controls out of the map to use it depending on the situation(?) + #Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs + #most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too + #by mapping these we can display regardless. + #nul char - no cp437 image but commonly used as space in ansi graphics. + #(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW + dict set cp437_map \u0000 " " ;#space + dict set cp437_map \u0001 \u263A ;#smiley + dict set cp437_map \u0002 \u263B ;#smiley-filled + dict set cp437_map \u0003 \u2665 ;#heart + dict set cp437_map \u0004 \u2666 ;#diamond + dict set cp437_map \u0005 \u2663 ;#club + dict set cp437_map \u0006 \u2660 ;#spade + dict set cp437_map \u0007 \u2022 ;#dot + dict set cp437_map \u0008 \u25D8 ;#square hollow dot + dict set cp437_map \u0009 \u25CB ;#hollow dot + dict set cp437_map \u000A \u25D9 ;#square and dot (\n) + dict set cp437_map \u000B \u2642 ;#male + dict set cp437_map \u000C \u2640 ;#female + dict set cp437_map \u000D \u266A ;#note1 (\r) + dict set cp437_map \u000E \u266B ;#note2 + dict set cp437_map \u000F \u263C ;#sun + dict set cp437_map \u0010 \u25BA ;#right arrow triangle + dict set cp437_map \u0011 \u25CA ;#left arrow triangle + dict set cp437_map \u0012 \u2195 ;#updown arrow + dict set cp437_map \u0013 \u203C ;#double bang + dict set cp437_map \u0014 \u00B6 ;#pilcrow (paragraph mark / blind P) + dict set cp437_map \u0015 \u00A7 ;#Section Sign + dict set cp437_map \u0016 \u25AC ;#Heavy horizontal? + dict set cp437_map \u0017 \u21A8 ;#updown arrow 2 ? + dict set cp437_map \u0018 \u2191 ;#up arrow + dict set cp437_map \u0019 \u2193 ;#down arrow + dict set cp437_map \u001A \u2192 ;#right arrow + dict set cp437_map \u001B \u2190 ;#left arrow + dict set cp437_map \u001C \u221F ;#bottom left corner + dict set cp437_map \u001D \u2194 ;#left-right arrow + dict set cp437_map \u001E \u25B2 ;#up arrow triangle + dict set cp437_map \u001F \u25BC ;#down arrow triangle + + variable map_special_graphics + #DEC Special Graphics set https://en.wikipedia.org/wiki/DEC_Special_Graphics + #AKA IBM Code page 1090 + dict set map_special_graphics _ \u00a0 ;#no-break space + dict set map_special_graphics "`" \u25c6 ;#black diamond + dict set map_special_graphics a \u2592 ;#shaded block (checkerboard stipple), medium shade - Block Elements + dict set map_special_graphics b \u2409 ;#symbol for HT + dict set map_special_graphics c \u240c ;#symbol for FF + dict set map_special_graphics d \u240d ;#symbol for CR + dict set map_special_graphics e \u240a ;#symbol for LF + dict set map_special_graphics f \u00b0 ;#degree sign + dict set map_special_graphics g \u00b1 ;#plus-minus sign + dict set map_special_graphics h \u2424 ;#symbol for NL + dict set map_special_graphics i \u240b ;#symbol for VT + dict set map_special_graphics j \u2518 ;#brc, light up and left - box drawing + dict set map_special_graphics k \u2510 ;#trc, light down and left - box drawing + dict set map_special_graphics l \u250c ;#tlc, light down and right - box drawing + dict set map_special_graphics m \u2514 ;#blc, light up and right - box drawing + dict set map_special_graphics n \u253c ;#light vertical and horizontal - box drawing + dict set map_special_graphics o \u23ba ;#horizontal scan line-1 + dict set map_special_graphics p \u23bb ;#horizontal scan line-3 + dict set map_special_graphics q \u2500 ;#light horizontal - box drawing + dict set map_special_graphics r \u23bc ;#horizontal scan line-7 + dict set map_special_graphics s \u23bd ;#horizontal scan line-9 + dict set map_special_graphics t \u251c ;#light vertical and right - box drawing + dict set map_special_graphics u \u2524 ;#light vertical and left - box drawing + dict set map_special_graphics v \u2534 ;#light up and horizontal - box drawing + dict set map_special_graphics w \u252c ;#light down and horizontal - box drawing + dict set map_special_graphics x \u2502 ;#light vertical - box drawing + dict set map_special_graphics y \u2264 ;#less than or equal + dict set map_special_graphics z \u2265 ;#greater than or equal + dict set map_special_graphics "\{" \u03c0 ;#greek small letter pi + dict set map_special_graphics "|" \u2260 ;#not equal to + dict set map_special_graphics "\}" \u00a3 ;#pound sign + dict set map_special_graphics ~ \u00b7 ;#middle dot #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control @@ -173,22 +372,107 @@ namespace eval punk::ansi { ] + # -------------------------------------- + #comparitive test (performance) string-append vs 2-object (with existing splits) append + proc test_cat1 {ansi1 ansi2} { + #make sure objects have splits + set s1 [ansistring NEW $ansi1] + namespace eval [info object namespace $s1] {my MakeSplit} + set s2 [ansistring NEW $ansi2] + namespace eval [info object namespace $s2] {my MakeSplit} + + #operation under test + # -- + #standard string append + $s1 append $ansi2 + # -- + $s2 destroy + + #$s1 append \033\[31mX ;#redX + return $s1 + } + proc test_cat2 {ansi1 ansi2} { + #make sure objects have splits + set s1 [ansistring NEW $ansi1] + namespace eval [info object namespace $s1] {my MakeSplit} + set s2 [ansistring NEW $ansi2] + namespace eval [info object namespace $s2] {my MakeSplit} + + #operation under test + # -- + #ansistring object append + $s1 appendobj $s2 + # -- + $s2 destroy + #$s1 append \033\[31mX ;#redX + return $s1 + } + # -------------------------------------- + + #review - We have file possibly encoded directly in another codepage such as 437 - or utf8,utf16 etc, but then still needing post conversion to e.g cp437? - proc readfile {fname} { + #In testing old ansi graphics files available on the web, some files need encoding {utf-8 cp437} some just cp437 + proc readfile {fname {encoding cp437}} { #todo #1- look for BOM - read according to format given by BOM #2- assume utf-8 #3- if errors - assume cp437? - set data [fcat $fname] - if {[file extension $fname] eq ".ans"} { - set ansidata [encoding convertfrom cp437 $data] + if {[llength $encoding] == 1} { + set ansidata [fcat -encoding $encoding $fname] + set obj [punk::ansi::class::class_ansi new $ansidata] + } elseif {[llength $encoding] == 2} { + set ansidata [fcat -encoding [lindex $encoding 0] $fname] + set ansidata [encoding convertfrom [lindex $encoding 1] $ansidata] + set obj [punk::ansi::class::class_ansi new $ansidata] } else { - set ansidata $data + error "encoding list '$encoding' not supported. Use 1 or 2 encodings (first for file read, second as encoding convertfrom)" } - set obj [punk::ansi::class::class_ansi new $ansidata] return $obj } + proc ansicat {fname args} { + set encnames [encoding names] + set encoding "" + set dimensions "" + set test_mode 0 + foreach a $args { + if {$a eq "test_mode"} { + set test_mode 1 + } elseif {$a in $encnames} { + set encoding $a + } else { + if {[regexp {[0-9]+(?:x|X)[0-9]+} $a]} { + set dimensions $a + } + } + } + if {$encoding eq ""} { + set encoding cp437 + } + + if {$dimensions eq ""} { + set dimensions 80x24 + } + + set ansidata [fcat -encoding $encoding $fname] + set obj [punk::ansi::class::class_ansi new $ansidata] + if {$test_mode} { + set result [$obj rendertest $dimensions] + } else { + set result [$obj render $dimensions] + } + $obj destroy + return $result + } + #utf-8/ascii encoded cp437 + proc ansicat2 {fname {encoding utf-8}} { + set data [fcat -encoding $encoding $fname] + set ansidata [encoding convertfrom cp437 $data] + set obj [punk::ansi::class::class_ansi new $ansidata] + set result [$obj render] + $obj destroy + return $result + } proc is_utf8_char {char} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) [\x00-\x7F] | # Single-byte chars (ASCII range) @@ -208,6 +492,64 @@ namespace eval punk::ansi { } $text completeChars return $completeChars } + proc example {} { + #todo - review dependency on punk::repo ? + package require textblock + package require punk::repo + package require punk::console + + set fnames [list belinda.ans bot.ans flower.ans fish.ans] + set base [punk::repo::find_project] + set ansibase [file join $base src/testansi] + if {![file exists $ansibase]} { + puts stderr "Missing testansi folder at $base/src/testansi" + puts stderr "Ensure ansi test files exist: $fnames" + #error "punk::ansi::example Cannot find example files" + } + set missingbase [a+ yellow][textblock::block 80 23 ?][a] + set pics [list] + foreach f $fnames { + if {![file exists $ansibase/$f]} { + set p [overtype::left $missingbase "[a+ red bold]\nMissing file\n$ansibase/$f[a]"] + lappend pics [dict create filename $f pic $p status missing] + } else { + set img [join [lines_as_list -line trimline -block trimtail [ansicat $ansibase/$f]] \n] + lappend pics [dict create filename $f pic $img status ok] + } + } + + set termsize [punk::console:::get_size] + set margin 4 + set freewidth [expr {[dict get $termsize columns]-$margin}] + set per_row [expr {$freewidth / 80}] + + set rowlist [list] + set row [list] + set i 1 + foreach picinfo $pics { + set subtitle "" + if {[dict get $picinfo status] ne "ok"} { + set subtitle [dict get $picinfo status] + } + set title [dict get $picinfo filename] + lappend row [textblock::frame -subtitle $subtitle -title $title [dict get $picinfo pic]] + if {$i % $per_row == 0} { + lappend rowlist $row + set row [list] + } elseif {$i == [llength $pics]} { + lappend rowlist $row + } + incr i + } + + set result "" + foreach r $rowlist { + append result [textblock::join {*}$r] \n + } + + + return $result + } #control strings #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf # @@ -345,17 +687,17 @@ namespace eval punk::ansi { } #review - what happens when no terminator? - #todo - map other chars to unicode equivs + #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set # esc) ?? proc convert_g0 {text} { + variable map_special_graphics + #using not \033 inside to stop greediness - review how does it compare to ".*?" #variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #set re {\033\(0[^\033]*\033\(B} #set re {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} - - - set re2 {\033\(0(.*)\033\(B} ;#capturing + #set re2 {\033\(0(.*)\033\(B} ;#capturing #puts --$g-- #box sample @@ -363,29 +705,29 @@ namespace eval punk::ansi { #x x #mqj #m = boxd_lur - #set map [list l \u250f k \u2513] ;#heavy - set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines - #todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html + set re_g0_open_or_close {\x1b\(0|\x1b\(B} set parts [::punk::ansi::ta::_perlish_split $re_g0_open_or_close $text] set out "" set g0_on 0 - foreach {pt g} $parts { + foreach {other g} $parts { if {$g0_on} { #split for non graphics-set codes - set othersplits [punk::ansi::ta::split_codes $pt] ;#we don't need single codes here - foreach {innerpt innercodes} $othersplits { - append out [string map $map $innerpt] - append out $innercodes ;#Simplifying assumption - ST codes, titlesets etc don't require/use g0 content + set othersplits [punk::ansi::ta::split_codes $other] ;#we don't need single codes here + foreach {inner_plaintext inner_codes} $othersplits { + append out [string map $map_special_graphics $inner_plaintext] $inner_codes + #Simplifying assumption: no mapping required on any inner_codes - ST codes, titlesets etc don't require/use g0 content } } else { - append out $pt ;#may include other codes - put it all through. + append out $other ;#may be a mix of plaintext and other ansi codes - put it all through. } - if {$g ne ""} { - if {[punk::ansi::codetype::is_gx_open $g]} { + #trust our splitting regex has done the work to leave us with only \x1b\(0 or \x1b(B - test last char rather than use punk::ansi::codetype::is_gx_open/is_gx_close + switch -- [string index $g end] { + 0 { set g0_on 1 - } elseif {[punk::ansi::codetype::is_gx_close $g]} { + } + B { set g0_on 0 } } @@ -421,6 +763,8 @@ namespace eval punk::ansi { } return $out } + + #Wrap text in ansi codes to switch to DEC alternate graphics character set. proc g0 {text} { return \x1b(0$text\x1b(B } @@ -905,28 +1249,77 @@ namespace eval punk::ansi { #*** !doctools #[call [fun cursor_save]] #[para] equivalent term::ansi::code::ctrl::sc + #[para] This is the ANSI/SCO cursor save as opposed to the DECSC version + #[para] On many terminals either will work - but cursor_save_dec is shorter and perhaps more widely supported return \x1b\[s } proc cursor_restore {} { #*** !doctools #[call [fun cursor_restore]] #[para] equivalent term::ansi::code::ctrl::rc + #[para] ANSI/SCO - see also cursor_restore_dec for the DECRC version return \x1b\[u } - proc cursor_save_attributes {} { + proc cursor_save_dec {} { #*** !doctools - #[call [fun cursor_save_attributes]] + #[call [fun cursor_save_dec]] #[para] equivalent term::ansi::code::ctrl::sca + #[para] DECSC return \x1b7 } - proc cursor_restore_attributes {} { + proc cursor_restore_dec {} { #*** !doctools #[call [fun cursor_restore_attributes]] - #[para] equivalent term::ansi::code::ctrl::rca + #[para] equivalent term::ansi::code::ctrl::rca + #[para] DECRC return \x1b8 } - # -- --- --- --- --- + + #DECAWM - automatic line wrapping + proc enable_line_wrap {} { + #*** !doctools + #[call [fun enable_line_wrap]] + #[para] enable automatic line wrapping when characters entered beyond rightmost column + #[para] This will also allow forward movements to move to subsequent lines + #[para] This is DECAWM - and is the same sequence output by 'tput smam' + return \x1b\[?7h + } + proc disable_line_wrap {} { + #*** !doctools + #[call [fun disable_line_wrap]] + #[para] disable automatic line wrapping + #[para] reset DECAWM - same sequence output by 'tput rmam' + #tput rmam + return \x1b\[?7l + } + proc query_mode_line_wrap {} { + #*** !doctools + #[call [fun query_mode_line_wrap]] + #[para] DECRQM to query line-wrap state + #[para] The punk::ansi::query_mode_ functions just emit the ansi query sequence. + return \x1b\[?7\$p + } + #DECRPM responses e.g: + # \x1b\[?7\;1\$y + # \x1b\[?7\;2\$y + #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) + + + #Alt screen buffer + proc enable_alt_screen {} { + #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? + #\x1b\[?1049h ;#xterm + return \x1b\[?47h + } + proc disable_alt_screen {} { + #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] + #\x1b\[?1049l + return \x1b\[?47l + } + + # -- --- --- + proc erase_line {} { #*** !doctools #[call [fun erase_line]] @@ -1034,6 +1427,15 @@ namespace eval punk::ansi { #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin return \x1b\[2\$w } + proc set_tabstop {} { + return \x1bH + } + proc clear_tabstop {} { + return \x1b\[g + } + proc clear_all_tabstops {} { + return \x1b\[3g + } #alternative to string terminator is \007 - @@ -1061,7 +1463,8 @@ namespace eval punk::ansi { #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. proc printing_length {line} { - if {[string first \n $line] >= 0} { + #string last faster than string first for long strings anyway + if {[string last \n $line] >= 0} { error "line_print_length must not contain newline characters" } #what if line has \v (vertical tab) ie more than one logical screen line? @@ -1087,8 +1490,13 @@ namespace eval punk::ansi { #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces #normalize tabs to an appropriate* width #*todo - handle terminal/context where tabwidth != the default 8 spaces - if {[string first \t $line] >= 0} { - set line [textutil::tabify::untabify2 $line] + if {[string last \t $line] >= 0} { + if {[info exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set line [textutil::tabify::untabify2 $line $tw] } #NOTE - this is non-destructive backspace as it occurs in text blocks - and is likely different to the sequence coming from a terminal or editor which generally does a destructive backspace @@ -1155,6 +1563,35 @@ namespace eval punk::ansi { return [punk::char::ansifreestring_width [join $outchars ""]] } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #with thanks to Helmut Giese and other Authors of tcllib textutil + #this version is adjusted to handle ANSI SGR strings + #TODO! ANSI aware version + + proc untabifyLine { line num } { + variable Spaces + + set currPos 0 + while { 1 } { + set currPos [string first \t $line $currPos] + if { $currPos == -1 } { + # no more tabs + break + } + + # how far is the next tab position ? + set dist [expr {$num - ($currPos % $num)}] + # replace '\t' at $currPos with $dist spaces + set line [string replace $line $currPos $currPos $Spaces($dist)] + + # set up for next round (not absolutely necessary but maybe a trifle + # more efficient) + incr currPos $dist + } + return $line + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi ---}] @@ -1272,6 +1709,381 @@ namespace eval punk::ansi { #regexp {\x1b\(B|\x1b\)B} $code regexp {\x1b(?:\(B|\)B)} $code } + #input assumed to be single codes - simple test for 2nd char left bracket and trailing m is done anyway - codes not matching are ignored and passed through + #This is not order-preserving if non-sgr codes are present as they are tacked on to the end even if they initially were before all SGR codes + + variable codestate_empty + set codestate_empty [dict create] + dict set codestate_empty rst "" ;#0 (or empty) + dict set codestate_empty intensity "" ;#1 bold, 2 dim, 22 normal + dict set codestate_empty italic "" ;#3 on 23 off + dict set codestate_empty underline "" ;#4 on 24 off + + #nonstandard 4:3,4:4,4:5 + dict set codestate_empty curlyunderline "" + dict set codestate_empty dottedunderline "" + dict set codestate_empty dashedunderline "" + + dict set codestate_empty blink "" ;#5 or 6 for slow/fast, 25 for off + dict set codestate_empty reverse "" ;#7 on 27 off + dict set codestate_empty hide "" ;#8 on 28 off + dict set codestate_empty strike "" ;#9 on 29 off + dict set codestate_empty font "" ;#10, 11-19 10 being primary + dict set codestate_empty gothic "" ;#20 + dict set codestate_empty doubleunderline "" ;#21 + dict set codestate_empty proportional "" ;#26 - see note below + dict set codestate_empty frame_or_circle "" ;#51,52 on - 54 off (54 off) (not generally used - mintty has repurposed for emoji variation selector) + + #ideogram rarely supported - this implementation untested - each attribute kept separate as they presumably can be applied simultaneously + dict set codestate_empty ideogram_underline "" + dict set codestate_empty ideogram_doubleunderline "" + dict set codestate_empty ideogram_overline "" + dict set codestate_empty ideogram_doubleoverline "" + dict set codestate_empty ideogram_clear "" + + dict set codestate_empty overline "" ;#53 on 55 off - probably not supported - pass through. + dict set codestate_empty underlinecolour "" ;#58 - same arguments as 256color and rgb (nonstandard - in Kitty ,VTE,mintty and iTerm2) + + # -- mintty? + dict set codestate_empty superscript "" ;#73 + dict set codestate_empty subscript "" ;#74 + dict set codestate_empty nosupersub "" ;#75 + # -- + + dict set codestate_empty fg "" ;#30-37 + 90-97 + dict set codestate_empty bg "" ;#40-47 + 100-107 + + + #as a common case optimisation - it will not merge a single element list, even if that code contains redundant elements + proc sgr_merge_list {args} { + if {[llength $args] == 0} { + return "" + } elseif {[llength $args] == 1} { + return [lindex $args 0] + } + variable codestate_empty + set othercodes [list] + + set codestate $codestate_empty + set codestate_initial $codestate_empty ;#keep a copy for resets. + set did_reset 0 + + #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? + #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? + #we will output 7bit merge of the SGRs even if some or all were 8bit CSi + #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals + #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. + #review - consider a higher-level option for always emitting 8bit or always 7bit + #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. + + #We still output any non SGR codes in the list as they came in - preserving their CSI + + foreach c $args { + #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes + #.. but preserve original c + set cnorm [string map [list \x9b {8[} ] $c] + switch -- [string index $cnorm 1][string index $cnorm end] { + {[m} { + set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m + + #some systems use colon for 256 colors or RGB or nonstandard subparameters + #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. + # - will break mintty? set params [string map [list : {;}] $params] + set plist [split $params {;}] + if {![llength $plist]} { + #if there was nothing - it must be a reset - we need it in the list + lappend plist "" + } + #we shouldn't get an empty or zero param beyond index 0 - but it's possible + #some codes have additional parameters - e.g rgb colours so we need to jump forward in the parameter list sometimes. + for {set i 0} {$i < [llength $plist]} {incr i} { + set p [lindex $plist $i] + set paramsplit [split $p :] + #for some cases we passthrough $p instead of just the number - in case another implementation uses the colon subparameters + #e.g see https://github.com/mintty/mintty/wiki/Tips#text-attributes-and-rendering + #this may have originated with kitty? + #windows terminal seems to be implementing it too + #however, they can be completely repurposed - so we probably need to specifically support them.. REVIEW. + + #review - what about \x1b\[000m + #we need to accept/ignore leading zeros - we can't just pass to expr - as some tcl versions still see leading zero as octal + set codeint [string trimleft [lindex $paramsplit 0] 0] + switch -- $codeint { + "" - 0 { + set codestate $codestate_initial + set did_reset 1 + } + 1 { + #bold + if {[llength $paramsplit] == 1} { + dict set codestate intensity $p + } else { + if {[lindex $paramsplit 1] eq "2"} { + dict set codestate shadowed "1:2" ;#turn off also with 22 + } + } + } + 2 { + #dim + dict set codestate intensity 2 + } + 3 { + dict set codestate italic 3 + } + 4 { + if {[llength $paramsplit] == 1} { + dict set codestate underline 4 + } else { + switch -- [lindex $paramsplit 1] { + 0 { + #no underline + dict set codestate underline 24 + dict set codestate curlyunderline "" + dict set codestate dottedunderline "" + dict set codestate dashedunderline "" + } + 1 { + dict set codestate underline 4 ;#straight underline + } + 2 { + dict set codestate doubleunderline 21 + } + 3 { + dict set codestate curlyunderline "4:3" + } + 4 { + dict set codestate dottedunderline "4:4" + } + 5 { + dict set codestate dashedunderline "4:5" + } + } + + } + } + 5 - 6 { + dict set codestate blink $p + } + 7 { + dict set codestate reverse 7 + } + 8 { + dict set codestate hide 8 + } + 9 { + dict set codestate strike 9 + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 { + dict set codestate font $p + } + 20 { + dict set codestate gothic 20 + } + 21 { + #ECMA-48 double underline - some terminals use as not-bold. For now we won't support that. + dict set doubleunderline 21 + } + 22 { + #normal intensity + dict set codestate intensity 22 + dict set codestate shadowed "" + } + 23 { + #? wikipedia mentions blackletter - review + dict set codestate italic 23 + } + 24 { + dict set codestate underline 24 ;#off + dict set codestate curlyunderline "" + dict set codestate dottedunderline "" + dict set codestate dashedunderline "" + } + 25 { + dict set codestate blink 25 ;#off + } + 26 { + #not known to be used in terminals.. could it be used with elastic tabstops? - review + dict set codestate proportional 26 + } + 27 { + dict set codestate reverse 27 ;#off + } + 28 { + dict set codestate hide 28 ;#reveal + } + 29 { + dict set codestate strik 29;#off + } + 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 { + dict set codestate fg $p ;#foreground colour + } + 38 { + #256 color or rgb + #check if subparams supplied as colon separated + if {[string first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + dict set codestate fg "38\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + dict set codestate fg "38\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + #apparently subparameters can be left empty - and there are other subparams like transparency and color-space + #we should only need to pass it all through for the terminal to understand + #review + dict set codestate fg $p + } + } + 39 { + dict set codestate fg 39 ;#default foreground + } + 40 - 41 - 42 - 43 - 44 - 45 - 46 - 47 { + dict set codestate bg $p ;#background colour + } + 48 { + #256 color or rgb + if {[string first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + dict set codestate bg "48\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + dict set codestate bg "48\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + dict set codestate bg $p + } + } + 49 { + dict set codestate bg 49 ;#default background + } + 50 { + dict set codestate proportional 50 ;#off - see 26 + } + 51 - 52 { + dict set codestate frame_or_circle 51 + } + 53 { + dict set codestate overline 53 ;#not supported in terminals? pass through anyway + } + 54 { + dict set codestate frame_or_circle 54 ;#off + } + 55 { + dict set codestate overline 55; #off + } + 58 { + #nonstandard + #256 color or rgb + if {[string first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + dict set codestate underlinecolour "58\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + dict set codestate underlinecolour "58\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + dict set codestate underlinecolour $p + } + } + 59 { + #nonstandard - default underlinecolour + dict set codestate underlinecolour 59 + } + 60 { + dict set codestate ideogram_underline 60 + dict set codestate ideogram_clear "" + } + 61 { + dict set codestate ideogram_doubleunderline 61 + dict set codestate ideogram_clear "" + } + 62 { + dict set codestate ideogram_overline 62 + dict set codestate ideogram_clear "" + } + 63 { + dict set codestate ideogram_doubleoverline 63 + dict set codestate ideogram_clear "" + } + 64 { + dict set codestate ideogram_stress 64 + dict set codestate ideogram_clear "" + } + 65 { + dict set codestate ideogram_clear 65 + #review - we still need to pass through the ideogram_clear in case something understands it + dict set codestate ideogram_underline "" + dict set codestate ideogram_doubleunderline "" + dict set codestate ideogram_overline "" + dict set codestate ideogram_doubleoverline "" + } + 73 { + #mintty only? + #can be combined with subscript + dict set codestate superscript 73 + dict set codestate nosupersub "" + } + 74 { + dict set codestate subscript 74 + dict set codestate nosupersub "" + } + 75 { + dict set codestate nosupersub 75 + dict set codestate superscript "" + dict set codestate subcript "" + } + 90 - 91 - 92 - 93 - 94 - 95 - 96 - 97 { + dict set codestate fg $p + } + 100 - 101 - 102 - 103 - 104 - 105 - 106 - 107 { + dict set codestate bg $p + } + + } + } + } + default { + lappend othercodes $c + } + } + + } + + set codemerge "" + dict for {k v} $codestate { + switch -- $v { + "" { + + } + default { + append codemerge "${v}\;" + } + } + } + set codemerge [string trimright $codemerge {;}] + if {$did_reset} { + set codemerge "0\;$codemerge" + } + return "\x1b\[${codemerge}m[join $othercodes ""]" + } #has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? @@ -1541,7 +2353,910 @@ namespace eval punk::ansi::ta { #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } # -- --- --- --- --- --- --- --- --- --- --- +namespace eval punk::ansi::class { + #assertions specifically for punk::ansi::class namespace + namespace import ::punk::assertion::assert + punk::assertion::active 1 + + namespace eval renderer { + oo::class create base_renderer { + variable o_width o_wrap o_overflow o_appendlines o_looplimit + + variable o_cursor_column o_cursor_row + #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered + variable o_rendereditems + + variable o_from_ansistring o_to_ansistring + variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed + constructor {args} { + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports) + set nspath [namespace path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + namespace path $nspath + #-- -- + if {[llength $args] < 2} { + error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + } + lassign [lrange $args end-1 end] from_ansistring to_ansistring + set defaults [dict create\ + -width \uFFEF\ + -wrap 1\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + ] + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set argsflags [lrange $args 0 end-2] + dict for {k v} $argsflags { + switch -- $k { + -width - -wrap - -overflow - -appendlines - -looplimit - -experimental {} + default { + set known_opts [dict keys $defaults] + #don't use [self class] - or we'll get the superclass + error "[info object class [self]] unknown option '$k'. Known options: $known_opts" + } + } + } + set opts [dict merge $defaults $argsflags] + set o_width [dict get $opts -width] + set o_wrap [dict get $opts -wrap] + set o_overflow [dict get $opts -overflow] + set o_appendlines [dict get $opts -appendlines] + set o_looplimit [dict get $opts -looplimit] + set o_cursor_column [dict get $opts -cursor_column] + set o_cursor_row [dict get $opts -cursor_row] + + set o_from_ansistring $from_ansistring + set o_ns_from [info object namespace $o_from_ansistring] + set o_to_ansistring $to_ansistring + set o_ns_to [info object namespace $o_to_ansistring] + #set o_render_index -1 ;#zero based. -1 indicates nothing yet rendered. + set o_rendereditems [list] ;#graphemes + controls + individual ansi codes from input $o_from_ansistring + } + #temporary test method + method eval_in {script} { + eval $script + } + method cursor_column {{col ""}} { + if {$col eq ""} { + return $o_cursor_column + } + if {$col < 1} { + error "Minimum cursor_column is 1" + } + set o_cursor_column $col + } + method cursor_row {{row ""}} { + if {$row eq ""} { + return $o_cursor_row + } + if {$row < 1} { + error "Minimum cursor_row is 1" + } + set o_cursor_row $row + } + + #consider scroll area + #we need to render to something with a concept of viewport, offscreen above,below,left,right? + method rendernext {} { + upvar ${o_ns_from}::o_ansisplits from_ansisplits + upvar ${o_ns_from}::o_elements from_elements + upvar ${o_ns_from}::o_splitindex from_splitindex + + #if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' + if {![llength $from_ansisplits]} { + namespace eval $o_ns_from {my MakeSplit} + } + + set eidx [llength $o_rendereditems] + + #compare what we've rendered so far to our source to confirm they're still in sync + if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} { + puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]" + puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]" + error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring" + } + if {$eidx == [llength $from_elements]} { + #nothing new available + return [dict create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0] + } + + set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + #we need to render in pt code chunks - not each grapheme element individually + #translate from element index to ansisplits index + set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to + + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo type_rendered item + #we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc) + #review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ? + #if so - we should report a list of the grapheme types that were rendered in a pt block + #as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway + #e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation. + + #we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element(?) + + set newtext "" + set rendercount 0 + if {$type_rendered eq "g"} { + + set e_splitindex $process_splitindex + while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} { + append newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + + incr eidx + set e_splitindex [lindex $from_splitindex $eidx] + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo _type item + } + } else { + #while not g ? render however many ansi sequences are in a row? + set newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + } + + set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}] + assert {$rendercount == $count_rendered} + + #todo - renderline equivalent? + + $o_to_ansistring append $newtext + + return [dict create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] + } + + } + #name all with prefix class_ for rendertype detection + oo::class create class_cp437 { + superclass base_renderer + } + oo::class create class_editbuf { + superclass base_renderer + } + } + + #As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string. + #oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics + oo::class create class_ansistring { + variable o_cksum_command o_string o_count + + #this is the main state we keep of the split apart string + #we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext + variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes + variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split + + + #State regarding output renderstring (if any) + variable o_renderout ;#another class_ansistring instance + variable o_renderer ;# punk::ansi::class::renderer::class_ instance + variable o_renderwidth + variable o_rendertype + + # -- per element lookups -- + # llengths should all be the same + # we maintain 4 lookups per entry rather than a single nested list + # it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing. + variable o_elements ;#elements contains entry for each grapheme/control + each ansi code + variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes. + variable o_gx0states ;#0|1 for alternate graphics gx0 + variable o_splitindex ;#entry for each element indicating the index of the split it belongs to. + # -- -- + + constructor {string} { + set o_string $string + + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports) + set nspath [namespace path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + namespace path $nspath + #-- -- + + #we choose not to generate an internal split-state for the initial string - which may potentially be large. + #there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it. + #The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary + + set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string) + + set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc. + set o_ptlist [list] + #o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built. + + set o_elements [list] + set o_sgrstacks [list] + set o_gx0states [list] + set o_splitindex [list] + + set o_cksum_command [list sha1::sha1 -hex] + + + #empty if no render methods used + # -- + set o_renderer "" + set o_renderout "" ;#class_ansistring + # -- + + set o_renderwidth 80 + set o_rendertype cp437 + } + + #temporary test method + method eval_in {script} { + eval $script + } + method checksum {} { + if {[catch { + package require sha1 + } errM]} { + error "sha1 package unavailable" + } + return [{*}$o_cksum_command $o_string] + } + #todo - allow setting checksum algorithm and/or command + + method show_state {{verbose 0}} { + #show some state info - without updating anything + #only use 'my' methods that don't update the state e.g has_ansi + set result "" + if {![llength $o_ansisplits]} { + append result "No internal splits. " + append result \n "has ansi : [my has_ansi]" + append result \n "Tcl string length raw string: [string length $o_string]" + } else { + append result \n "has ansi : [my has_ansi]" + append result \n "ansisplit list len: [llength $o_ansisplits]" + append result \n "plaintext list len: [llength $o_ptlist]" + append result \n "cached count : $o_count" + append result \n "Tcl string length raw string : [string length $o_string]" + append result \n "Tcl string length plaintext parts: [string length [join $o_ptlist ""]]" + if {[llength $o_ansisplits] %2 == 0} { + append result \n -------------------------------------------------- + append result \n Warning - ansisplits appears to be invalid length + append result \n Use show_state 1 to view + append result \n -------------------------------------------------- + } + } + if {$o_renderer ne ""} { + append result \n " renderer obj: $o_renderer" + append result \n " renderer class: [info object class $o_renderer]" + } + if {$o_renderout ne ""} { + append result \n " render target ansistring: $o_renderout" + append result \n " render target has ansi : [$o_renderout has_ansi]" + append result \n " render target count : [$o_renderout count]" + } + if {$verbose} { + append result \n "ansisplits listing" + #we will use a foreach with a single var rather than foreach {pt code} - so that if something goes wrong it's clearer. + #(using foreach {pt code} on an odd element list will give a spurious empty code at the end) + set i 0 + foreach item $o_ansisplits { + if {$i % 2 == 0} { + set type "pt " + } else { + set type code + } + append result \n "$type: [ansistring VIEW $item]" + incr i + } + append result \n "Last element of ansisplits should be of type pt" + } + return $result + } + + #private method + method MakeSplit {} { + #The split with each code as it's own element is more generally useful. + set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; + set o_ptlist [list] + set codestack [list] + set gx0_state 0 ;#default off + set current_split_index 0 ;#incremented for each pt block, incremented for each code + if {$o_count eq ""} { + set o_count 0 + } + foreach {pt code} $o_ansisplits { + lappend o_ptlist $pt + foreach grapheme [punk::char::grapheme_split $pt] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + incr o_count + } + #after handling the pt block - incr the current_split_index + incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry + #we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end) + if {$code ne ""} { + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + + #maintenance warning - dup in append! + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list "\x1b\[m"] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + lappend o_elements [list sgr $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx0_state 1 + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx0_state 0 + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend o_elements [list other $code] + } + } + #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index + incr current_split_index + } + } + #assertion every grapheme and every individual code has been added to o_elements + #every element has an entry in o_sgrstacks + #every element has an entry in o_gx0states + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + } + method convert_altg {} { + #do we need a method to retrieve without converting in the object? + puts "unimplemented" + } + method strippedlength {} { + if {![llength $o_ansisplits]} {my MakeSplit} + + } + #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already + method stripped {} { + if {![llength $o_ansisplits]} {my MakeSplit} + return [join $o_ptlist ""] + } + + #does not affect object state + method DoCount {plaintext} { + #- combiners/diacritics just map them away here - but for consistency we need to combine unicode grapheme clusters too. + #todo - joiners 200d? zwnbsp + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + + #we want length to return number of glyphs + normal controls such as newline.. not screen width. Has to be consistent with index function + return [string length [regsub -all $re_diacritics $plaintext ""]] + } + + #This is the count of visible graphemes + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!! + method count {} { + if {$o_count eq ""} { + #only initial string present + if {$o_string eq ""} { + set o_count 0 + return 0 + } + my MakeSplit + #set o_count [my DoCount [join $o_ptlist ""]] + } + return $o_count + } + #this is the equivalent of Tcl string length on the ansistripped string + method length {} { + if {![llength $o_ansisplits]} { + if {[punk::ansi::ta::detect $o_string]} { + my MakeSplit + } else { + return [string length $o_string] + } + } elseif {[llength $o_ansisplits] == 1} { + #single split always means no ansi + return string length $o_string + } + return [string length [join $o_ptlist ""]] + } + method length_raw {} { + return [string length $o_string] + } + + #channels for stream in/out.. these are vaguely analogous to the input/output between a shell and a PTY Slave - but this is not intended to be a full pseudoterminal + #renderstream_to_render (private?) + # write end held by outer ansistring? read end by inner render ansistring ? + #renderstream_from_render (public?) + + method rendertypes {} { + set classes [info commands ::punk::ansi::class::renderer::class_*] + #strip off class_ + set ctypes [lmap v $classes {string range [namespace tail $v] 6 end}] + } + method rendertype {{rtype ""}} { + if {$rtype eq ""} { + return $o_rendertype + } + set rtypes [my rendertypes] + if {$rtype ni $rtypes} { + error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" + } + if {$o_renderout eq ""} { + #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? + set o_renderout [punk::ansi::class::class_ansistring new ""] + } + if {$o_renderer ne ""} { + set oinfo [info object class $o_renderer] + set tail [namespace tail $oinfo] + set currenttype [string range $tail 6 end] + if {$rtype ne $currenttype} { + puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" + $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + } else { + return $currenttype + } + } else { + puts "creating first renderer" + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + } + } + #--- progressive rendering buffer - another ansistring object + method renderwidth {{rw ""}} { + #report or set the renderwidth - may invalidate existing render progress? restart? + if {$rw eq ""} { + return $o_renderwidth + } + if {$rw eq $o_renderwidth} { + return $o_renderwidth + } + #re-render if needed? + + + set o_renderwidth $rw + } + method render_state {} { + #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary + #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. + #A single number representing the count of graphemes and individual ANSI codes (from the input ansistring) rendered might work + } + method renderbuf {} { + #get the underlying renderobj - if any + return $o_renderout ;#also class_ansistring + } + method render {} { + #full render - return buffer ansistring + } + method rendernext {} { + #render next available pt/code chunk only - not to end of available input + if {$o_renderer eq ""} { + my rendertype $o_rendertype ;#review - proper way to initialise rendering + } + $o_renderer rendernext + } + method render_cursorstate {{row_x_col ""}} { + #report /set? cursor posn + if {$o_renderer eq ""} { + error "No renderer. Call render methods first" + } + return [dict create row [$o_renderer cursor_row] column [$o_renderer cursor_column]] + } + #--- + + method get {} { + return $o_string + } + method has_ansi {} { + if {![llength $o_ansisplits]} { + #initial string - for large strings,it's faster to run detect than update the internal split-state. + return [punk::ansi::ta::detect $o_string] + } else { + #string will continue to have a single o_ansisplits element if only non-ansi appended + return [expr {[llength $o_ansisplits] != 1}] + } + } + #todo - has_ansi_movement ? + #If an arbirary ANSI string has movement/cursor restore - then the number of apparent rows in the input will potentially bear no relation to the number of lines of output. + #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows + #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + + #analagous to Tcl string append + #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient + method append {args} { + set catstr [join $args ""] + if {$catstr eq ""} { + return $o_string + } + + if {![punk::ansi::ta::detect $catstr]} { + #ansi-free additions + #if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state + if {![llength $o_ansisplits]} { + #initialise o_count because we need to add to it. + #The count method will do this by calling Makesplit only if it needs to. (which will create ansisplits for anything except empty string) + my count + } + append o_string $catstr;# only append after updating using my count above + if {![llength $o_ptlist]} { + #If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits + #even though we can use lset to add to a list - we can't for empty + lappend o_ptlist $catstr + #assertion - if o_ptlist is empty so is o_ansisplits + lappend o_ansisplits $catstr + } else { + lset o_ptlist end [string cat [lindex $o_ptlist end] $catstr] + lset o_ansisplits end [string cat [lindex $o_ansisplits end] $catstr] + } + set last_codestack [lindex $o_sgrstacks end] + set last_gx0state [lindex $o_gx0states end] + set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex + foreach grapheme [punk::char::grapheme_split $catstr] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $last_codestack + lappend o_gx0states $last_gx0state + lappend o_splitindex $current_split_index + incr o_count + } + #incr o_count [my DoCount $catstr] ;#from before we were doing grapheme split.. review + } else { + if {![llength $o_ansisplits]} { + #if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append + append o_string $catstr ;#append before split and count on whole lot + my MakeSplit ;#update o_count + #set combined_plaintext [join $o_ptlist ""] + #set o_count [my DoCount $combined_plaintext] + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + return $o_string + } else { + #update each element of internal state incrementally without reprocessing what is already there. + append o_string $catstr + set newsplits [punk::ansi::ta::split_codes_single $catstr] + set ptnew "" + set codestack [lindex $o_sgrstacks end] + set gx0_state [lindex $o_gx0states end] + set current_split_index [lindex $o_splitindex end] + #first pt must be merged with last element of o_ptlist + set new_pt_list [list] + foreach {pt code} $newsplits { + lappend new_pt_list $pt + append ptnew $pt + foreach grapheme [punk::char::grapheme_split $pt] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + incr o_count + } + incr current_split_index ;#increment 1 of 2 within each loop + if {$code ne ""} { + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + #maintenance - dup in MakeSplit! + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list "\x1b\[m"] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + lappend o_elements [list sgr $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx0_state 1 + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx0_state 0 + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend o_elements [list other $code] + } + } + incr current_split_index ;#increment 2 of 2 + } + } + lset o_ptlist end [string cat [lindex $o_ptlist end] [lindex $new_pt_list 0]] + lappend o_ptlist {*}[lrange $new_pt_list 1 end] + lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $newsplits 0]] + lappend o_ansisplits {*}[lrange $newsplits 1 end] + + #if {$o_count eq ""} { + # #we have splits - but didn't count graphemes? + # set o_count [my DoCount [join $o_ptlist ""]] ;#o_ptlist already has ptnew parts + #} else { + # incr o_count [my DoCount $ptnew] + #} + + } + } + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + return $o_string + } + + #we are currently assuming that the component strings have complete graphemes ie no split clusters - and therefore we don't attempt to check for and combine at the string catenation points. + #This is 'often'? likely to be true - We don't have grapheme cluster support yet anyway. review. + method appendobj {args} { + if {![llength $o_ansisplits]} { + my MakeSplit + } + foreach a $args { + set ns [info object namespace $a] + upvar ${ns}::o_ansisplits new_ansisplits + upvar ${ns}::o_count new_count + if {![llength $new_ansisplits] || $new_count eq ""} { + namespace eval $ns {my MakeSplit} + } + upvar ${ns}::o_ptlist new_ptlist + upvar ${ns}::o_string new_string + upvar ${ns}::o_elements new_elements + upvar ${ns}::o_sgrstacks new_sgrstacks + upvar ${ns}::o_gx0states new_gx0states + upvar ${ns}::o_splitindex new_splitindex + + lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] + lset o_ptlist end [string cat [lindex $o_ptlist end] [lindex $new_ptlist 0]] + lappend o_ptlist {*}[lrange $new_ptlist 1 end] + + append o_string $new_string + lappend o_elements {*}$new_elements + + #prepend the previous sgr stack to all stacks in the new list. + #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. + #ie just call sgr_merge_list once now. + set laststack [lindex $o_sgrstacks end] + set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + foreach n $new_sgrstacks { + lappend o_sgrstacks [list $mergedtail {*}$n] + } + + + lappend o_gx0states {*}$new_gx0states + + #first and last of ansisplits splits merge + set lastidx [lindex $o_splitindex end] + set firstnewidx [lindex $new_splitindex 0] + set diffidx [expr {$lastidx - $firstnewidx}] ;#may be negative + foreach v $new_splitindex { + lappend o_splitindex [expr {$v + $diffidx}] + } + + incr o_count $new_count + } + return $o_count + } + + + #method append_and_render - append and render up to end of appended data at same time + + method view {args} { + if {$o_string eq ""} { + return "" + } + #ansistring VIEW relies only on the raw ansi input as it is essentially just a string map. + #We don't need to force an ansisplit if we happen to have an unsplit initial string + ansistring VIEW $o_string + } + method viewcodes {args} { + if {$o_string eq ""} { + return "" + } + if {![llength $o_ansisplits]} {my MakeSplit} + + set redb [a+ red bold] ;#osc/apm ? anything with potential security risks or that is unusual + set greenb [a+ green bold] ;#SGR + set cyanb [a+ cyan bold] ;#col,row movement + set blueb [a+ blue bold] ;# + set blueb_r [a+ blue bold reverse] + set whiteb [a+ white bold] ;#SGR reset (or highlight first part if leading reset) + set GX [a+ black White bold] ;#alt graphics + set unk [a+ yellow bold] ;#unknown/unhandled + set RST [a] + + set re_col_move {\x1b\[([0-9]*)(C|D|G)$} + set re_row_move {\x1b\[([0-9]*)(A|B)$} + set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} + set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} + set re_cursor_save {\x1b\[s$} + set re_cursor_restore {\x1b\[u$} + set re_cursor_save_dec {\x1b7$} + set re_cursor_restore_dec {\x1b8$} + + set arrow_left \u2190 + set arrow_right \u2192 + set arrow_up \u2191 + set arrow_down \u2193 + set arrow_lr \u2194 + set arrow_du \u2195 + #2024 - there is no 4-arrow symbol or variations (common cursor and window icon) in unicode - despite requests and argument from the community that this has been in use for decades. + #They are probably too busy with stupid emoji additions to add this or c1 visualization glyphs. + + #don't split into lines first - \n is valid within ST sections + set output "" + #set splits [punk::ansi::ta::split_codes_single $string] + + foreach {pt code} $o_ansisplits { + append output [ansistring VIEW {*}$args $pt] + + #map DEC cursor_save/restore to CSI version + set code [string map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $code] + + + set c1 [string index $code 0] + set c1c2 [string range $code 0 1] + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + set leadernorm [string range [string map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\] 7OSC\ + \x1b\( 7GFX\ + \x9d 8OSC\ + \x1b 7ESC\ + ] $c1c2] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + #we leave the tail of the code unmapped for now + switch -- $leadernorm { + 7CSI - 7OSC { + set codenorm [string cat $leadernorm [string range $code 2 end]] + } + 7ESC { + set codenorm [string cat $leadernorm [string range $code 1 end]] + } + 8CSI - 8OSC { + set codenorm [string cat $leadernorm [string range $code 1 end]] + } + default { + #we haven't made a mapping for this + set codenorm $code + } + } + + switch -- $leadernorm { + {7CSI} - {8CSI} { + set param [string range $codenorm 4 end-1] + #puts stdout "--> CSI [string index $leadernorm 0] bit param:$param" + switch -- [string index $codenorm end] { + m { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set displaycode [ansistring VIEW $code] + append output ${whiteb}$displaycode$RST + } else { + set displaycode [ansistring VIEW $code] + if {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + #highlight the esc & leftbracket in white as indication there is a leading reset + set cposn [string first ";" $displaycode] + append output ${whiteb}[string range $displaycode 0 $cposn]$RST${greenb}[string range $displaycode $cposn+1 end]$RST + } else { + append output ${greenb}$displaycode$RST + } + } + } + A - B { + #row move + set displaycode [ansistring VIEW $code] + set displaycode [string map [list A "A$arrow_up" B "B$arrow_down"] $displaycode] + append output ${cyanb}$displaycode$RST + + } + C - D - G { + #set num [string range $codenorm 4 end-1] + set displaycode [ansistring VIEW $code] + set displaycode [string map [list C "C$arrow_right" D "D$arrow_left" G "G$arrow_lr"] $displaycode] + append output ${cyanb}$displaycode$RST + } + H - f { + set params [string range $codenorm 4 end-1] + lassign [split $params {;}] row col + #lassign $matchinfo _match row col + set displaycode [ansistring VIEW $code] + if {$col eq ""} { + #row only move + set map [list H "H${arrow_lr}" f "f${arrow_lr}] + } else { + #row and col move + set map [list H "H${arrow_lr}${arrow_du}" f "${arrow_lr}${arrow_du}"] + } + set displaycode [string map $map $displaycode] + append output ${cyanb}$displaycode$RST + } + s { + append output ${blueb}[ansistring VIEW $code]$RST + } + u { + append output ${blueb_r}[ansistring VIEW $code]$RST + } + default { + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + } + } + 7GFX { + switch -- [string index $codenorm 4] { + "0" { + append output ${GX}GX+$RST + } + "B" { + append output ${GX}GX-$RST + } + } + } + 7ESC { + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + default { + #if the code is a PM (or other encapsulation type code e.g terminated by ST) we want to see linefeeds as visual representation character + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + } + } + return $output + } + + method viewstyle {args} { + if {$o_string eq ""} { + return "" + } + if {![llength $o_ansisplits]} {my MakeSplit} + + #set splits [punk::ansi::ta::split_codes_single $string] + set output "" + set codestack [list] + set gx_stack [list] ;#not actually a stack + set cursor_saved "" + foreach {pt code} $o_ansisplits { + if {[llength $args]} { + set pt [ansistring VIEW {*}$args $pt] + } + append output [punk::ansi::codetype::sgr_merge_list {*}$codestack]$pt + if {$code ne ""} { + append output [a][ansistring VIEW -lf 1 $code] + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first.. straight dups + set dup_posns [lsearch -all -exact $codestack $code] ;#-exact because of square-bracket glob chars + #lremove not present in pre 8.7! + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } elseif {[regexp {\x1b7|\x1b\[s} $code]} { + #cursor_save + set cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$codestack] + } elseif {[regexp {\x1b8|\x1b\[u} $code]} { + #cursor_restore + set codestack [list $cursor_saved] + } else { + #leave SGR stack as is + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx_stack [list] + } + } + } + } + return $output + + } + } +} namespace eval punk::ansi::ansistring { #*** !doctools #[subsection {Namespace punk::ansi::ansistring}] @@ -1552,7 +3267,7 @@ namespace eval punk::ansi::ansistring { namespace path [list ::punk::ansi ::punk::ansi::ta] namespace ensemble create - namespace export length length1 trim trimleft trimright index VIEW VIEWCODES INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX + namespace export length trim trimleft trimright INDEX COUNT VIEW VIEWCODES VIEWSTYLE INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX NEW #todo - expose _splits_ methods so caller can work efficiently with the splits themselves #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single @@ -1666,6 +3381,184 @@ namespace eval punk::ansi::ansistring { # E03E 9E PM PM Symbol for Privacy Message # E03F 9F APC AP Symbol for Application Program Command + variable debug_visuals + #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) + + #Goal is not to map every control character? + #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly + #ETX -ctrl-c + #EOT ctrl-d (EOF?) + #SYN ctrl-v + #SUB ctrl-z + #CAN ctrl-x + #FS ctrl-\ (SIGQUIT) + set visuals_interesting [dict create\ + NUL [list \x00 \u2400]\ + ETX [list \x03 \u2403]\ + EOT [list \x04 \u2404]\ + BEL [list \x07 \u2407]\ + SYN [list \x16 \u2416]\ + CAN [list \x18 \u2418]\ + SUB [list \x1a \u241a]\ + FS [list \x1c \u241c]\ + SOS [list \x98 \ue038]\ + CSI [list \x9b \ue03b]\ + ST [list \x9c \ue03c]\ + PM [list \x9e \ue03e]\ + APC [list \x9f \ue03f]\ + ] + #it turns out we need pretty much everything for debugging + set visuals_c0 [dict create\ + NUL [list \x00 \u2400]\ + SOH [list \x01 \u2401]\ + STX [list \x02 \u2402]\ + ETX [list \x03 \u2403]\ + EOT [list \x04 \u2404]\ + ENQ [list \x05 \u2405]\ + ACK [list \x06 \u2406]\ + BEL [list \x07 \u2407]\ + FF [list \x0c \u240c]\ + SO [list \x0e \u240e]\ + SF [list \x0f \u240f]\ + DLE [list \x10 \u2410]\ + DC1 [list \x11 \u2411]\ + DC2 [list \x12 \u2412]\ + DC3 [list \x13 \u2413]\ + DC4 [list \x14 \u2414]\ + NAK [list \x15 \u2415]\ + SYN [list \x16 \u2416]\ + ETB [list \x17 \u2417]\ + CAN [list \x18 \u2418]\ + EM [list \x19 \u2419]\ + SUB [list \x1a \u241a]\ + FS [list \x1c \u241c]\ + GS [list \x1d \u241d]\ + RS [list \x1e \u241e]\ + US [list \x1f \u241f]\ + DEL [list \x7f \u2421]\ + ] + #alternate symbols for space + # \u2422 Blank Symbol (b with forwardslash overly) + # \u2423 Open Box (square bracket facing up like a tray/box) + + # \u2424 Symbol for Newline (small "NL") + + # \u2425 Symbol for Delete Form Two (some sort of fat forward-slash thing) + + # \u2426 Symbol for Substitute Form Two (backwards question mark) + + #these are in the PUA (private use area) unicode block - seem to be rarely supported + #the unicode consortium has apparently neglected to provide separate visual representation codepoints for not only the c1 controls (some of which ARE still used e.g in sixels) but various other non-printing chars such as BOM + #The debugging/analysis usecase is an important one - surely moreso that some of the emoji stuff coming out of there. + #we'll hack in some stuff as needed - may override some of the visuals_c1 which is usually just empty/substitute glyphs + #Being repurposed - these could potentially be confused with actual characters depending on the debugging context + #To minimize potential confusion - we'll use a longer replacement sequence - which is not ideal from the perspective of terminal column layout debugging + #A single unique glyph would be better - although the bracketing for 8-bit codes is a useful visual indicator + #(review - BOM should use different brackets to c1?) + + #todo - regularly check if unicode has improved in this area - though with requests for c1 visuals dating back to at least 2011 - it's doubtful. + #for 8-bit controls - we will standardize on a fixed width of 4 bracketing with: + #\u2987 and \u2988 from Miscellaneous Mathematical Symbols-B (D or fractional-moon shaped brackets) + #\u2987 - Z Notation Left Image Bracket + #\u2988 - Z Notation Right Image Bracket + #selection of these is also based on them being seemingly reasonably widely available in fonts.. review + #my apologies if you're debugging z-notation strings! + #If only column's-worth of symbol/char needed between the brackets - pad with a space before the closing bracket + + #8-bit brackets + set ob8 \u2987; set cb8 \u2988 ;#z-notation image brackets + + #miscellaneous debug code brackets + set obm \u27e6 ;set cbm \u27e7 ;#square double brackets from Miscellaneous Mathematical Symbols-A + + #this private range so rarely supported in fonts - and visuals are unknown, so we will make up some 2-letter codes for now + #set visuals_c1 [dict create\ + # BPH [list \x82 "${ob8}\ue022 $cb8"]\ + # NBH [list \x83 "${ob8}\ue023 $cb8"]\ + # IND [list \x84 "${ob8}\ue024 $cb8"]\ + # NEL [list \x85 "${ob8}\ue025 $cb8"]\ + # SSA [list \x86 "${ob8}\ue026 $cb8"]\ + # ESA [list \x87 "${ob8}\ue027 $cb8"]\ + # HTS [list \x88 "${ob8}\ue028 $cb8"]\ + # HTJ [list \x89 "${ob8}\ue029 $cb8"]\ + # VTS [list \x8a "${ob8}\ue02a $cb8"]\ + # PLD [list \x8b "${ob8}\ue02a $cb8"]\ + # PLU [list \x8c "${ob8}\ue02c $cb8"]\ + # RI [list \x8d "${ob8}\ue02d $cb8"]\ + # SS2 [list \x8e "${ob8}\ue02e $cb8"]\ + # SS3 [list \x8f "${ob8}\ue02f $cb8"]\ + # DCS [list \x90 "${ob8}\ue030 $cb8"]\ + # PU1 [list \x91 "${ob8}\ue031 $cb8"]\ + # PU2 [list \x92 "${ob8}\ue032 $cb8"]\ + # STS [list \x93 "${ob8}\ue033 $cb8"]\ + # CCH [list \x94 "${ob8}\ue034 $cb8"]\ + # MW [list \x95 "${ob8}\ue035 $cb8"]\ + # SPA [list \x96 "${ob8}\ue036 $cb8"]\ + # EPA [list \x97 "${ob8}\ue037 $cb8"]\ + # SOS [list \x98 "${ob8}\ue038 $cb8"]\ + # SCI [list \x9a "${ob8}\ue03a $cb8"]\ + # CSI [list \x9b "${ob8}\ue03b $cb8"]\ + # ST [list \x9c "${ob8}\ue03c $cb8"]\ + # OSC [list \x9d "${ob8}\ue03d $cb8"]\ + # PM [list \x9e "${ob8}\ue03e $cb8"]\ + # APC [list \x9f "${ob8}\ue03f $cb8"]\ + #] + + #these 2 letter codes only need to disambiguate within the c1 set - they're not great. + #these sit within the Latin-1 Supplement block + set visuals_c1 [dict create\ + PAD [list \x80 "${ob8}PD$cb8"]\ + HOP [list \x81 "${ob8}HP$cb8"]\ + BPH [list \x82 "${ob8}BP$cb8"]\ + NBH [list \x83 "${ob8}NB$cb8"]\ + IND [list \x84 "${ob8}IN$cb8"]\ + NEL [list \x85 "${ob8}NE$cb8"]\ + SSA [list \x86 "${ob8}SS$cb8"]\ + ESA [list \x87 "${ob8}ES$cb8"]\ + HTS [list \x88 "${ob8}HS$cb8"]\ + HTJ [list \x89 "${ob8}HT$cb8"]\ + VTS [list \x8a "${ob8}VT$cb8"]\ + PLD [list \x8b "${ob8}PD$cb8"]\ + PLU [list \x8c "${ob8}PU$cb8"]\ + RI [list \x8d "${ob8}RI$cb8"]\ + SS2 [list \x8e "${ob8}S2$cb8"]\ + SS3 [list \x8f "${ob8}S3$cb8"]\ + DCS [list \x90 "${ob8}DC$cb8"]\ + PU1 [list \x91 "${ob8}P1$cb8"]\ + PU2 [list \x92 "${ob8}P2$cb8"]\ + STS [list \x93 "${ob8}SX$cb8"]\ + CCH [list \x94 "${ob8}CC$cb8"]\ + MW [list \x95 "${ob8}MW$cb8"]\ + SPA [list \x96 "${ob8}SP$cb8"]\ + EPA [list \x97 "${ob8}EP$cb8"]\ + SOS [list \x98 "${ob8}SO$cb8"]\ + SCI [list \x9a "${ob8}SC$cb8"]\ + CSI [list \x9b "${ob8}CS$cb8"]\ + ST [list \x9c "${ob8}ST$cb8"]\ + OSC [list \x9d "${ob8}OS$cb8"]\ + PM [list \x9e "${ob8}PM$cb8"]\ + APC [list \x9f "${ob8}AP$cb8"]\ + ] + + + set hack [dict create] + dict set hack BOM1 [list \uFEFF "${obm}\U1f4a3$cbm"] ;#byte order mark/ ZWNBSP (ZWNBSP usage generally deprecated) - a picture of a bomb(2wide glyph) + #review - other boms? Encoding dependent? + + dict set hack DCS [list \x90 "${ob8}\u2328 $cb8"] ;#keyboard from Miscellaneous Technical - 1 wide + pad. + dict set hack SOS [list \x98 "${ob8}\u2380 $cb8"] ;#Insertion Symbol from Miscellaneous Technical - 1 wide + pad + dict set hack ST [list \x9c "${ob8}\u2383 $cb8"] ;#Emphasis Symbol from Miscellaneous Technical - 1 wide + pad (graphically related to \u2380) + dict set hack CSI [list \x9b "${ob8}\u2386 $cb8"] ;#Enter Symbol from Miscellaneous Technical - 1 wide + pad + dict set hack OSC [list \x9d "${ob8}\u2b55$cb8"] ;#bright red ring from Miscellaneous Symbols and Arrows - 2 wide (OSC could be used for clipboard or other potentially security sensitive functions) + dict set hack PM [list \x9e "${ob8}PM$cb8"] + dict set hack APC [list \x9f "${ob8}\U1f534$cb8"] ;#bright red ball from Miscellaneoust Symbols and Pictographs - 2 wide (APC also noted as a potential security risk) + + set debug_visuals [dict merge $visuals_c0 $visuals_c1 $hack] + + #for repeated interaction with the same ANSI string - a mechanism to store state is more efficient + proc NEW {string} { + punk::ansi::class::class_ansistring new $string + } proc VIEW {args} { #*** !doctools #[call [fun VIEW] [arg string]] @@ -1674,6 +3567,8 @@ namespace eval punk::ansi::ansistring { #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. + variable debug_visuals + if {![llength $args]} { return "" } @@ -1704,92 +3599,6 @@ namespace eval punk::ansi::ansistring { # -- --- --- --- --- - #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) - - #Goal is not to map every control character? - #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly - #ETX -ctrl-c - #EOT ctrl-d (EOF?) - #SYN ctrl-v - #SUB ctrl-z - #CAN ctrl-x - #FS ctrl-\ (SIGQUIT) - set visuals_interesting [dict create\ - NUL [list \x00 \u2400]\ - ETX [list \x03 \u2403]\ - EOT [list \x04 \u2404]\ - BEL [list \x07 \u2407]\ - SYN [list \x16 \u2416]\ - CAN [list \x18 \u2418]\ - SUB [list \x1a \u241a]\ - FS [list \x1c \u241c]\ - SOS [list \x98 \ue038]\ - CSI [list \x9b \ue03b]\ - ST [list \x9c \ue03c]\ - PM [list \x9e \ue03e]\ - APC [list \x9f \ue03f]\ - ] - #it turns out we need pretty much everything for debugging - set visuals_c0 [dict create\ - NUL [list \x00 \u2400]\ - SOH [list \x01 \u2401]\ - STX [list \x02 \u2402]\ - ETX [list \x03 \u2403]\ - EOT [list \x04 \u2404]\ - ENQ [list \x05 \u2405]\ - ACK [list \x06 \u2406]\ - BEL [list \x07 \u2407]\ - FF [list \x0c \u240c]\ - SO [list \x0e \u240e]\ - SF [list \x0f \u240f]\ - DLE [list \x10 \u2410]\ - DC1 [list \x11 \u2411]\ - DC2 [list \x12 \u2412]\ - DC3 [list \x13 \u2413]\ - DC4 [list \x14 \u2414]\ - NAK [list \x15 \u2415]\ - SYN [list \x16 \u2416]\ - ETB [list \x17 \u2417]\ - CAN [list \x18 \u2418]\ - EM [list \x19 \u2419]\ - SUB [list \x1a \u241a]\ - FS [list \x1c \u241c]\ - GS [list \x1d \u241d]\ - RS [list \x1e \u241e]\ - US [list \x1f \u241f]\ - DEL [list \x7f \u2421]\ - ] - set visuals_c1 [dict create\ - BPH [list \x82 \ue022]\ - NBH [list \x83 \ue023]\ - IND [list \x84 \ue024]\ - NEL [list \x85 \ue025]\ - SSA [list \x86 \ue026]\ - ESA [list \x87 \ue027]\ - HTS [list \x88 \ue028]\ - HTJ [list \x89 \ue029]\ - VTS [list \x8a \ue02a]\ - PLD [list \x8b \ue02a]\ - PLU [list \x8c \ue02c]\ - RI [list \x8d \ue02d]\ - SS2 [list \x8e \ue02e]\ - SS3 [list \x8f \ue02f]\ - DCS [list \x90 \ue030]\ - PU1 [list \x91 \ue031]\ - PU2 [list \x92 \ue032]\ - STS [list \x93 \ue033]\ - CCH [list \x94 \ue034]\ - MW [list \x95 \ue035]\ - SPA [list \x96 \ue036]\ - EPA [list \x97 \ue037]\ - SOS [list \x98 \ue038]\ - SCI [list \x9a \ue03a]\ - CSI [list \x9b \ue03b]\ - ST [list \x9c \ue03c]\ - OSC [list \x9d \ue03d]\ - PM [list \x9e \ue03e]\ - APC [list \x9f \ue03f]\ - ] set visuals_opt [dict create] if {$opt_esc} { @@ -1814,7 +3623,7 @@ namespace eval punk::ansi::ansistring { dict set visuals_opt SP [list \x20 \u2420] } - set visuals [dict merge $visuals_opt $visuals_c0 $visuals_c1] + set visuals [dict merge $visuals_opt $debug_visuals] set charmap [list] dict for {nm chars} $visuals { lappend charmap {*}$chars @@ -1825,45 +3634,51 @@ namespace eval punk::ansi::ansistring { #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs #return [string map [list \033 \U2296 \007 \U237E] $string] } - proc VIEWCODES {string} { - if {![llength $string]} { + + #The implementation of viewcodes,viewstyle is more efficiently done in an object for the case where repeated calls of various methods can re-use the internal splits. + #for oneshots here - there is only minor overhead to use and destroy the object here. + proc VIEWCODES {args} { + set string [lindex $args end] + if {$string eq ""} { return "" } - set redb [a+ red bold] - set greenb [a+ green bold] - set GX [a+ black White bold] - set unk [a+ yellow bold] - set RST [a] - - #don't split into lines first - \n is valid within ST sections - set output "" - set splits [punk::ansi::ta::split_codes_single $string] - foreach {pt code} $splits { - append output "$pt" - if {[punk::ansi::codetype::is_sgr_reset $code]} { - append output ${greenb}RST$RST - } elseif {[punk::ansi::codetype::is_gx_open $code]} { - append output ${GX}GX+$RST - } elseif {[punk::ansi::codetype::is_gx_close $code]} { - append output ${GX}GX-$RST - } elseif {[punk::ansi::codetype::is_sgr $code]} { - append output ${greenb}[ansistring VIEW $code]$RST - } else { - append output ${unk}[ansistring VIEW $code]$RST - } + set arglist [lrange $args 0 end-1] + set ansistr [ansistring NEW $string] + set result [$ansistr viewcodes {*}$arglist] + $ansistr destroy + return $result + } + #an attempt to show the codes and colour/style of the *input* + #ie we aren't looking at the row/column positioning - but we do want to keep track of cursor attribute saves and restores + proc VIEWSTYLE {args} { + set string [lindex $args end] + if {$string eq ""} { + return "" } - return $output + set arglist [lrange $args 0 end-1] + set ansistr [ansistring NEW $string] + set result [$ansistr viewstyle {*}$arglist] + $ansistr destroy + return $result } - proc length {string} { + + #todo - change to COUNT to emphasize the difference between this and doing a Tcl string length on the ansistriped string! + #review. Tabs/elastic tabstops. Do we want to count a tab as one element? Probably so if we are doing so for \n etc and not counting 2W unicode. + #Consider leaving tab manipualation for a width function which determines columns occupied for all such things. + proc COUNT {string} { #*** !doctools - #[call [fun length] [arg string]] - #[para]Returns the length of the string without ansi codes + #[call [fun COUNT] [arg string]] + #[para]Returns the count of visible graphemes and non-ansi control characters + #[para]Incomplete! grapheme clustering support not yet implemented - only diacritics are currently clustered to count as one grapheme. #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. - #[para]This is equivalent to calling string length on the result of stripansi $string - #[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal. + #[para]This is not quite equivalent to calling string length on the result of stripansi $string due to diacritics and/or grapheme combinations + #[para]Note that this returns the number of characters in the payload (after applying combiners) + #It is not always the same as the width of the string as rendered on a terminal due to 2wide Unicode characters and the usual invisible control characters such as \r and \n #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. + #stripping diacritics only makes sense if we are counting them as combiners and also treating unicode grapheme combinations as single entities. + #as Our ansistring INDEX function returns the character with diacritics, and will ultimately return grapheme clusters as a single element - we strip theme here as not counted. #todo - combiners/diacritics? just map them away here? set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set string [regsub -all $re_diacritics $string ""] @@ -1873,15 +3688,87 @@ namespace eval punk::ansi::ansistring { } #included as a test/verification - slightly slower. #grapheme split version may end up being used once it supports unicode grapheme clusters - proc length2 {string} { - #we want length to return number of glyphs.. not screen width. Has to be consistent with index function + proc count2 {string} { + #we want count to return number of glyphs.. not screen width. Has to be consistent with index function return [llength [punk::char::grapheme_split [stripansi $string]]] } - + + proc length {string} { + string length [stripansi $string] + } + + proc _splits_trimleft {sclist} { + set intext 0 + set outlist [list] + foreach {pt ansiblock} $sclist { + if {$ansiblock ne ""} { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" $ansiblock + } else { + lappend outlist [string trimleft $pt] $ansiblock + set intext 1 + } + } else { + lappend outlist $pt $ansiblock + } + } else { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" + } else { + lappend outlist [string trimleft $pt] + set intext 1 + } + } else { + lappend outlist $pt + } + } + } + return $outlist + } + proc _splits_trimright {sclist} { + set intext 0 + set outlist [list] + #we need to account for empty ansiblock var caused by dual-var iteration over odd length list + foreach {pt ansiblock} [lreverse $sclist] { + if {$ansiblock ne ""} { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" $ansiblock + } else { + lappend outlist [string trimright $pt] $ansiblock + set intext 1 + } + } else { + lappend outlist $pt $ansiblock + } + } else { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" + } else { + lappend outlist [string trimright $pt] + set intext 1 + } + } else { + lappend outlist $pt + } + } + } + return [lreverse $outlist] + } + + proc _splits_trim {sclist} { + return [_splits_trimright [_splits_trimleft $sclist]] + } + + #Note that trim/trimleft/trimright will trim spaces at the extremities that are styled with background colour, underline etc + #that may be unexpected, but it's probably the only thing that makes sense. Plain string trim can chop off whitespace that is extraneous to the ansi entirely. proc trimleft {string args} { set intext 0 set out "" - #for split_codes only first or last pt can be empty string + #for split_codes only first or last pt can be empty string - but we can also get an empty ansiblock by using foreach with 2 vars on an odd-length list foreach {pt ansiblock} [split_codes $string] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { @@ -1898,7 +3785,7 @@ namespace eval punk::ansi::ansistring { } proc trimright {string} { if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing - set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] + set rtrimmed_list [_splits_trimright [split_codes $string]] return [join $rtrimmed_list ""] } proc trim {string} { @@ -1908,7 +3795,8 @@ namespace eval punk::ansi::ansistring { join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" } - proc index {string index} { + #Capitalised because it's the clustered grapheme/controlchar index - not the tcl string index + proc INDEX {string index} { #*** !doctools #[call [fun index] [arg string] [arg index]] #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) @@ -1916,7 +3804,6 @@ namespace eval punk::ansi::ansistring { #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. - #[para]todo: SGR codes within ST-terminated strings not yet ignored properly #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. #[para]As any operation using end-+ will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. @@ -1999,7 +3886,8 @@ namespace eval punk::ansi::ansistring { set pt_index -2 set pt_found -1 set char "" - set codes_in_effect "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) foreach {pt code} $ansisplits { @@ -2021,21 +3909,23 @@ namespace eval punk::ansi::ansistring { } if {[punk::ansi::codetype::is_sgr_reset $code]} { - #we can throw away previous codes_in_effect - set codes_in_effect "" + #we can throw away previous codestack + set codestack [list] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] } else { - #may have partial resets - but we don't want to track individual states of SGR features - #A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. + #may have partial resets + #sgr_merge_list will handle at end #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. #Review - consider if any other types of code make sense to retain in the output in this context. if {[punk::ansi::codetype::is_sgr $code]} { - append codes_in_effect $code + lappend codestack $code } } } if {$pt_found >= 0} { - return $codes_in_effect$char + return [punk::ansi::codetype::sgr_merge_list {*}$codestack]$char } else { return "" } @@ -2045,6 +3935,7 @@ namespace eval punk::ansi::ansistring { #return empty string for each index that is out of range #review - this is possibly too slow to be very useful as is. # consider converting to oo and maintaining state of ansisplits so we don't repeat relatively expensive operations for same string + #see also punk::list_index_resolve / punk::list_index_get for ways to handle tcl list/string indices without parsing them. proc INDEXABSOLUTE {string args} { set payload_len -1 ;# -1 as token to indicate we haven't calculated it yet (only want to call it once at most) set testindices [list] @@ -2117,7 +4008,7 @@ namespace eval punk::ansi::ansistring { } } } - #assert - we made exactly one append to testindices if there was no error + #assertion - we made exactly one append to testindices if there was no error } #we now have numeric or empty string indices - but haven't fully checked they are within the underlying payload length @@ -2144,7 +4035,10 @@ namespace eval punk::ansi::ansistring { } - #Todo - rows! + #Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc. + #The column/row concept works for an ansistring that has been 'rendered' to some defined area. + #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows. + #return pair of column extents occupied by the character index supplied. #single-width grapheme will return pair of integers of equal value #doulbe-width grapheme will return a pair of consecutive indices @@ -2226,7 +4120,7 @@ namespace eval punk::ansi::ansistring { set col2 "" foreach {pt code} $ansisplits { if {$pt ne ""} { - if {[string first \n $pt] < 0} { + if {[string last \n $pt] < 0} { set graphemes [punk::char::grapheme_split $pt] set lowindex [expr {$highindex + 1}] ;#last high set lowc [expr {$highc + 1}] @@ -2238,17 +4132,17 @@ namespace eval punk::ansi::ansistring { } elseif {$col == $highc} { return $highindex } - set index [expr {$lowindex + 1}] + set index [expr {$lowindex -1}] set str "" foreach g $graphemes { + incr index append str $g set width [punk::char::ansifreestring_width $str] - if {$lowc + $width >= $col} { + if {$lowc-1 + $width >= $col} { return $index } - incr index } - error "ansistring COLUMNINDEX '$string' $col not found" ;#assert - shouldn't happen + error "ansistring COLUMNINDEX '$string' $col not found" ;#assertion - shouldn't happen } } else { error "ansistring COLUMNINDEX multiline not implemented" @@ -2257,43 +4151,6 @@ namespace eval punk::ansi::ansistring { } } - proc _splits_trimleft {sclist} { - set intext 0 - set outlist [list] - foreach {pt ansiblock} $sclist { - if {!$intext} { - if {$pt eq "" || [regexp {^\s+$} $pt]} { - lappend outlist "" $ansiblock - } else { - lappend outlist [string trimleft $pt] $ansiblock - set intext 1 - } - } else { - lappend outlist $pt $ansiblock - } - } - return $outlist - } - proc _splits_trimright {sclist} { - set intext 0 - set outlist [list] - foreach {pt ansiblock} [lreverse $sclist] { - if {!$intext} { - if {$pt eq "" || [regexp {^\s+$} $pt]} { - lappend outlist "" $ansiblock - } else { - lappend outlist [string trimright $pt] $ansiblock - set intext 1 - } - } else { - lappend outlist $pt $ansiblock - } - } - return [lreverse $outlist] - } - proc _splits_trim {sclist} { - return [_splits_trimright [_splits_trimleft $sclist]] - } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] diff --git a/src/bootsupport/modules/punk/args-0.1.0.tm b/src/bootsupport/modules/punk/args-0.1.0.tm index f8b008f..6884662 100644 --- a/src/bootsupport/modules/punk/args-0.1.0.tm +++ b/src/bootsupport/modules/punk/args-0.1.0.tm @@ -391,7 +391,7 @@ namespace eval punk::args { } } set opts [dict merge $defaults_dict_opts $checked_args] - #assert - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options + #assertion - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options set values [dict merge $defaults_dict_values $values_dict] diff --git a/src/bootsupport/modules/punk/cap-0.1.0.tm b/src/bootsupport/modules/punk/cap-0.1.0.tm index 8eeef7f..3f3556f 100644 --- a/src/bootsupport/modules/punk/cap-0.1.0.tm +++ b/src/bootsupport/modules/punk/cap-0.1.0.tm @@ -249,7 +249,7 @@ namespace eval punk::cap { puts stderr "register_capabilityname cannot register capability:$capname with handler:$capnamespace. There is already a registered handler:$hdlr" return } - #assert: capnamespace may or may not be empty string, capname may or may not already exist in caps dict, caps $capname providers may have existing entries. + #assertion: capnamespace may or may not be empty string, capname may or may not already exist in caps dict, caps $capname providers may have existing entries. dict set caps $capname handler $capnamespace if {![dict exists $caps $capname providers]} { dict set caps $capname providers [list] diff --git a/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm b/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm index 7841c2d..f3e75ea 100644 --- a/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm +++ b/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm @@ -698,8 +698,8 @@ namespace eval punk::cap::handlers::templates { } } - #assert path is first key of itemdict {callers are allowed to rely on it being first} - #assert itemdict has keys path,basefolder,sourceinfo + #assertion path is first key of itemdict {callers are allowed to rely on it being first} + #assertion itemdict has keys path,basefolder,sourceinfo set result [dict create] set keys [lreverse [dict keys $itemdict]] foreach k $keys { diff --git a/src/bootsupport/modules/punk/char-0.1.0.tm b/src/bootsupport/modules/punk/char-0.1.0.tm index fb1ea12..700a585 100644 --- a/src/bootsupport/modules/punk/char-0.1.0.tm +++ b/src/bootsupport/modules/punk/char-0.1.0.tm @@ -57,6 +57,7 @@ package require Tcl 8.6- #dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review +package require textutil package require textutil::wcswidth #*** !doctools @@ -921,7 +922,7 @@ namespace eval punk::char { set start [dict get [lindex $ranges 0] start] set end [dict get [lindex $ranges 0] end] if {![dict exists $charset_extents_startpoints $start] || $end ni [dict get $charset_extents_startpoints $start]} { - #assert if end wasn't in startpoits list - then start won't be in endpoints list + #assertion if end wasn't in startpoits list - then start won't be in endpoints list dict lappend charset_extents_startpoints $start $end dict lappend charset_extents_endpoints $end $start } @@ -934,7 +935,7 @@ namespace eval punk::char { set start [dict get $range start] set end [dict get $range end] if {![dict exists $charset_extents_startpoints $start] || $end ni [dict get $charset_extents_startpoints $start]} { - #assert if end wasn't in startpoits list - then start won't be in endpoints list + #assertion if end wasn't in startpoits list - then start won't be in endpoints list dict lappend charset_extents_startpoints $start $end dict lappend charset_extents_endpoints $end $start } @@ -1871,7 +1872,7 @@ namespace eval punk::char { if {[punk::ansi::ta::detect $text]} { puts stderr "string_width detected ANSI!" } - if {[string first \n $text] >= 0} { + if {[string last \n $text] >= 0} { error "string_width accepts only a single line" } tailcall ansifreestring_width $text @@ -1900,7 +1901,7 @@ namespace eval punk::char { return [tcl::mathop::+ {*}$widths] } - #prerequisites - no ansi escapes - no newlines + #prerequisites - no ansi escapes - no newlines - utf8 encoding assumed #review - what about \r \t \b ? #NO processing of \b - already handled in ansi::printing_length which then calls this #this version breaks string into sequences of ascii vs unicode @@ -1935,20 +1936,38 @@ namespace eval punk::char { set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set text [regsub -all $re_diacritics $text ""] + # -- --- --- --- --- --- --- #review - #if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis - #as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide + #if we strip out ZWJ \u200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis + #as at 2024 - textutil::wcswidth just uses the unicode east-asian width property data and doesn't seem to handle these specially - it counts this joiner and others as one wide (also BOM \uFFEF) - #ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length + #TODO - once we have proper grapheme cluster splitting - work out which of these characters should be left in and/or when exactly their length-effects apply + # + #for now - strip them out - #ZWSP \u0200b zero width space + #ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length + #ZWSP \u200b zero width space + #\uFFEFBOM/ ZWNBSP and others that should be zero width + #todo - work out proper way to mark/group zero width. + + set text [string map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text] + # -- --- --- --- --- --- --- #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here + + #c0 controls set re_ascii_c0 {[\U0000-\U001F]} set text [regsub -all $re_ascii_c0 $text ""] + #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective + #some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all + #we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here + #they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function) + set text [regsub -all {[\u0080-\u009f]+} $text ""] + + #short-circuit basic cases #support tcl pre 2023-11 - see regexp bug below #if {![regexp {[\uFF-\U10FFFF]} $text]} { diff --git a/src/bootsupport/modules/punk/console-0.1.1.tm b/src/bootsupport/modules/punk/console-0.1.1.tm index 25aa4e8..d4c1586 100644 --- a/src/bootsupport/modules/punk/console-0.1.1.tm +++ b/src/bootsupport/modules/punk/console-0.1.1.tm @@ -30,6 +30,9 @@ if {"windows" eq $::tcl_platform(platform)} { # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::console { + variable tabwidth 8 ;#default only - will attempt to detect and set to that configured in terminal + #Note that windows terminal cooked mode seems to use 8 for interactive use even if set differently + #e.g typing tab characters may still be echoed 8-spaced while writing to stdout my obey the terminal's tab stops. variable has_twapi 0 variable previous_stty_state_stdin "" variable previous_stty_state_stdout "" @@ -572,7 +575,7 @@ namespace eval punk::console { if {$waitvar($callid) ne "timedout"} { after cancel $cancel_timeout_id } else { - puts stderr "timeout in get_ansi_response_payload" + puts stderr "timeout in get_ansi_response_payload. Ansi request was:[ansistring VIEW $query]" } if {$was_raw == 0} { @@ -881,6 +884,70 @@ namespace eval punk::console { set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] return $payload } + proc get_tabstops {{inoutchannels {stdin stdout}}} { + #DECTABSR \x1b\[2\$w + #response example " ^[P2$u9/17/25/33/41/49/57/65/73/81^[\ " (where ^[ is \x1b) + #set capturingregex {(.*)(\x1b\[P2$u()\x1b\[\\)} + #set capturingregex {(.*)(\x1bP2$u((?:[0-9]+)*(?:\/[0-9]+)*)\x1b\\)$} + set capturingregex {(.*)(\x1bP2\$u(.*)\x1b\\)$} + set request "\x1b\[2\$w" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + set tabstops [split $payload "/"] + return $tabstops + } + + #a simple estimation of tab-width under assumption console is set with even spacing. + #It's known this isn't always the case - but things like textutil::untabify2 take only a single value + #on some systems test_char_width is a similar speed to get_tabstop_apparent_width - but on some test_char_width is much slower + #we will use test_char_width as a fallback + proc get_tabstop_apparent_width {} { + set tslist [get_tabstops] + if {![llength $tslist]} { + #either terminal failed to report - or none set. + set testw [test_char_width \t] + if {[string is integer -strict $testw]} { + return $testw + } + #We don't support none - default to 8 + return 8 + } + #we generally expect to see a tabstop at column 1 - but it may not be set. + if {[lindex $tslist 0] eq "1"} { + if {[llength $tslist] == 1} { + set testw [test_char_width \t] + if {[string is integer -strict $testw]} { + return $testw + } + return 8 + } else { + set next [lindex $tslist 1] + return [expr {$next - 1}] + } + } else { + #simplistic guess at width - review - do we need to consider leftmost tabstops as more likely to be non-representative and look further into the list? + if {[llength $tslist] == 1} { + return [lindex $tslist 0] + } else { + return [expr {[lindex $tslist 1] - [lindex $tslist 0]}] + } + } + } + #default to 8 just because it seems to be most common default in terminals + proc set_tabstop_width {{w 8}} { + set tsize [get_size] + set width [dict get $tsize columns] + set mod [expr {$width % $w}] + set max [expr {$width - $mod}] + set tstops "" + set c 1 + while {$c <= $max} { + append tstops [string repeat " " $w][punk::ansi::set_tabstop] + incr c $w + } + set punk::console::tabwidth $w ;#we also attempt to read terminal's tabstops and set tabwidth to the apparent spacing of first non-1 value in tabstops list. + catch {textutil::tabify::untabify2 "" $w} ;#textutil tabify can end up uninitialised and raise errors like "can't read Spaces().." after a tabstop change This call seems to keep tabify happy - review. + puts -nonewline "[punk::ansi::clear_all_tabstops]\n[punk::ansi::set_tabstop]$tstops" + } proc get_cursor_pos_list {} { @@ -888,12 +955,14 @@ namespace eval punk::console { } proc get_size {} { if {[catch { - puts -nonewline [punk::ansi::cursor_off][punk::ansi::cursor_save][punk::ansi::move 2000 2000] + #some terminals (conemu on windows) scroll the viewport when we make a big move down like this - a move to 1 1 immediately after cursor_save doesn't seem to fix that. + #This issue also occurs when switching back from the alternate screen buffer - so perhaps that needs to be addressed elsewhere. + puts -nonewline [punk::ansi::cursor_off][punk::ansi::cursor_save_dec][punk::ansi::move 2000 2000] lassign [get_cursor_pos_list] lines cols puts -nonewline [punk::ansi::cursor_restore][punk::console::cursor_on];flush stdout set result [list columns $cols rows $lines] } errM]} { - puts -nonewline [punk::ansi::cursor_restore] + puts -nonewline [punk::ansi::cursor_restore_dec] puts -nonewline [punk::ansi::cursor_on] error "$errM" } else { @@ -912,6 +981,22 @@ namespace eval punk::console { lassign [split $payload {;}] rows cols return [list columns $cols rows $rows] } + proc get_mode_line_wrap {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[\?7;([0-9]+)\$y)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[?7\$p" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + + #Terminals generally default to LNM being reset (off) ie enter key sends a lone + #Terminals tested on windows either don't respond to this query, or respond with 0 (meaning mode not understood) + #I presume from this that almost nobody is using LNM 1 (which sends both and ) + proc get_mode_LNM {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[\?20;([0-9]+)\$y)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[?20\$p" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } #terminals lie. This should be a reasonable (albeit relatively slow) test of actual width - but some terminals seem to miscalculate. #todo - a visual interactive test/questionnaire to ask user if things are lining up or if the terminal is telling fibs about cursor position. @@ -1131,10 +1216,10 @@ namespace eval punk::console { move $orig_row $orig_col } proc scroll_up {n} { - puts -nonewline stdout [punk::ansi::scroll_up] + puts -nonewline stdout [punk::ansi::scroll_up $n] } proc scroll_down {n} { - puts -nonewline stdout [punk::ansi::scroll_down] + puts -nonewline stdout [punk::ansi::scroll_down $n] } #review - worth the extra microseconds to inline? might be if used in for example prompt on every keypress. @@ -1149,6 +1234,18 @@ namespace eval punk::console { #[call [fun cursor_restore]] puts -nonewline \x1b\[u } + #DEC equivalents of cursor_save/cursor_restore - perhaps more widely supported? + proc cursor_save_dec {} { + #*** !doctools + #[call [fun cursor_save_dec]] + puts -nonewline \x1b7 + } + proc cursor_restore_dec {} { + #*** !doctools + #[call [fun cursor_restore_dec]] + puts -nonewline \x1b8 + } + proc insert_spaces {count} { puts -nonewline stdout \x1b\[${count}@ } @@ -1175,6 +1272,8 @@ namespace eval punk::console { namespace import ansi::move_row namespace import ansi::cursor_save namespace import ansi::cursor_restore + namespace import ansi::cursor_save_dec + namespace import ansi::cursor_restore_dec namespace import ansi::scroll_down namespace import ansi::scroll_up namespace import ansi::insert_spaces @@ -1193,7 +1292,7 @@ namespace eval punk::console { #set blanks [string repeat " " [expr {$col + $tw}]] #puts -nonewline [punk::ansi::erase_eol]$blanks;move_emit_return this $col $text #puts -nonewline [move_emit_return this $col [punk::ansi::insert_spaces 150]$text] - cursor_save + cursor_save_dec #move_emit_return this $col [punk::ansi::move_forward 50][punk::ansi::insert_spaces 150][punk::ansi::move_back 50][punk::ansi::move_forward $col]$text #puts -nonewline [punk::ansi::insert_spaces 150][punk::ansi::move_column $col]$text puts -nonewline [punk::ansi::erase_eol][punk::ansi::move_column $col]$text @@ -1216,16 +1315,16 @@ namespace eval punk::console { puts -nonewline stdout $commands return "" } - #we can be faster and more efficient if we use the consoles cursor_save command - but each savecursor overrides any previous one. + #we can be faster and more efficient if we use the consoles cursor_save_dec command - but each savecursor overrides any previous one. #leave cursor_off/cursor_on to caller who can wrap more efficiently.. proc cursorsave_move_emit_return {row col data args} { set commands "" - append commands [punk::ansi::cursor_save] + append commands [punk::ansi::cursor_save_dec] append commands [punk::ansi::move_emit $row $col $data] foreach {row col data} $args { append commands [punk::ansi::move_emit $row $col $data] } - append commands [punk::ansi::cursor_restore] + append commands [punk::ansi::cursor_restore_dec] puts -nonewline stdout $commands; flush stdout } proc move_emitblock_return {row col textblock} { @@ -1242,12 +1341,12 @@ namespace eval punk::console { } proc cursorsave_move_emitblock_return {row col textblock} { set commands "" - append commands [punk::ansi::cursor_save] + append commands [punk::ansi::cursor_save_dec] foreach ln [split $textblock \n] { append commands [punk::ansi::move_emit $row $col $ln] incr row } - append commands [punk::ansi::cursor_restore] + append commands [punk::ansi::cursor_restore_dec] puts -nonewline stdout $commands;flush stdout return } @@ -1481,7 +1580,7 @@ namespace eval punk::console { set cix 0 foreach c [split $charline {}] { if {$c} { - append output [punk::ansi::move_emit [expr {$row + $line}] [expr {$col + $charno * 8 + $cix}] "[a reverse] [a noreverse]"] + append output [punk::ansi::move_emit [expr {$row + $line}] [expr {$col + $charno * 8 + $cix}] "[a+ reverse] [a+ noreverse]"] #curses attr on reverse #curses move [expr $row + $line] [expr $col + $charno * 8 + $cix] #curses puts " " @@ -1493,6 +1592,11 @@ namespace eval punk::console { } return $output } + proc get_time {} { + overtype::left -width 70 "" [bigstr [clock format [clock seconds] -format %H:%M:%S] 1 1] + } + + proc display1 {} { #punk::console::clear punk::console::move_call_return 20 20 {punk::console::clear_above} diff --git a/src/bootsupport/modules/punk/fileline-0.1.0.tm b/src/bootsupport/modules/punk/fileline-0.1.0.tm index fdf9167..44b1f9c 100644 --- a/src/bootsupport/modules/punk/fileline-0.1.0.tm +++ b/src/bootsupport/modules/punk/fileline-0.1.0.tm @@ -745,7 +745,7 @@ namespace eval punk::fileline::class { lassign [my numeric_linerange $startidx $endidx] startidx endidx set chunkstart [dict get $o_linemap $startidx start] set chunkend [dict get $o_linemap $endidx end] - set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines + set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assertion - no need to view truncations as we've picked start and end of complete lines #verify sanity set l_start [lindex $line_list 0] if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { @@ -983,9 +983,9 @@ namespace eval punk::fileline::class { lappend infolist $last } ########################### - #assert all records have is_truncated key. - #assert if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right - #assert If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys. + #assertion all records have is_truncated key. + #assertion if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right + #assertion If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys. return $infolist } diff --git a/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/bootsupport/modules/punk/lib-0.1.1.tm index 5de7ca5..08632b1 100644 --- a/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -599,7 +599,6 @@ namespace eval punk::lib { } return $prefix } - #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var proc swapnumvars {namea nameb} { upvar $namea a $nameb b @@ -916,6 +915,11 @@ namespace eval punk::lib { set codestack [list $code] } else { if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code } ;#else gx0 or other code - we don't want to stack it with SGR codes } @@ -931,7 +935,9 @@ namespace eval punk::lib { } } - set newreplay [join $codestack ""] + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + if {$line_has_sgr && $newreplay ne $replaycodes} { #adjust if it doesn't already does a reset at start if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { @@ -1264,7 +1270,7 @@ namespace eval punk::lib { } } set opts [dict merge $defaults_dict_opts $checked_args] - #assert - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options + #assertion - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options set values [dict merge $defaults_dict_values $values_dict] @@ -1294,15 +1300,20 @@ namespace eval punk::lib { set allow_ansi 0 } if {!$allow_ansi} { - foreach e $vlist { - if {[punk::ansi::ta::detect $e]} { - error "Option $o for $caller contains ansi - but -allow_ansi is false. Received: '$e'" - } + #detect should work fine directly on whole list + if {[punk::ansi::ta::detect $vlist]} { + error "Option $o for $caller contains ansi - but -allow_ansi is false. Received: [ansistring VIEW $vlist]" } + #foreach e $vlist { + # if {[punk::ansi::ta::detect $e]} { + # error "Option $o for $caller contains ansi - but -allow_ansi is false. Received: '$e'" + # } + #} } set vlist_check [list] foreach e $vlist { + #could probably stripansi entire list safely in one go? - review if {$validate_without_ansi} { lappend vlist_check [punk::ansi::stripansi $e] } else { @@ -1529,6 +1540,9 @@ namespace eval punk::lib { return "$average +/- $sigma microseconds per iteration" } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version proc switch_char_test {c} { set dec [scan $c %c] foreach t [list 1 2 3] { @@ -1545,6 +1559,7 @@ namespace eval punk::lib { } } + #tcl 8.6/8.7 (at least) #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable switch -- $c { a { diff --git a/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm b/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm index 8b1f40e..39e9b09 100644 --- a/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm +++ b/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm @@ -466,7 +466,7 @@ namespace eval punk::mix::commandset::loadedlib { puts stdout "---" set question "Proceed to create ${pkgtail}-${ver}.tm module? Y|N" set answer [punk::lib::askuser $question] ;#takes account of previous stdin state and terminal raw vs line state - if {$answer ne "y"} { + if {[string tolower $answer] ne "y"} { puts stderr "mix libcopy.asmodule aborting due to user response '$answer' (required Y|y to proceed) use -askme 0 to avoid prompts." return } @@ -486,7 +486,7 @@ namespace eval punk::mix::commandset::loadedlib { if {$opt_askme} { set question "Copy anyway? Y|N" set answer [punk::lib::askuser $question] - if {$answer ne "y"} { + if {[string tolower $answer] ne "y"} { puts stderr "mix libcopy.asmodule aborting due to user response '$answer' (required Y|y to proceed) use -askme 0 to avoid prompts." return } diff --git a/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm b/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm index 6379b28..5ed28ff 100644 --- a/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm +++ b/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm @@ -213,493 +213,493 @@ namespace eval punk::mix::commandset::scriptwrap { #if {[string match "rem *" $trimln] || [string match "@rem *" $trimln] || [string match "REM *" $trimln] || [string match "@REM *" $trimln]} {} #ignore things that look like a call that are beind a REM switch -glob -nocase -- $trimln { - "rem *" - - "@rem *" {} - default { + "rem *" - "@rem *" { - #todo - better callsite analysis. There can be data between @GOTO or @CALL and : other than just whitespace! - - #todo - allow analysis of colon-less call. May need to check list of internal commands - but what about external ones? - #foreach search_regex [list {(.*\s+|^)(@*call\s*:*)(\S.*)} {(.*\s+|^)(@*CALL\s*:*)(\S.*)} {(.*\s+|^)(@*goto\s*:*)(\S.*)} {(.*\s+|^)(@*GOTO\s*:*)(\S.*)}] {} - foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s*|.*\s+|^)(@*GOTO\s*:)(\S.*)} {(.*\|\|.*)(@*GOTO\s*:)(\S.*)}] { - if {[regexp $search_regex $callingline_payload _m precall call labelplus]} { - #todo further checks to see if it's actually a batch script line - # - - - - work out what cmd.exe considers start of 512B boundaries when scanning from a callsite - #callposn affected by newlines? - #set callposn [expr {$file_offset + [string length $callingline_payload]}] ;#take callposn as end of line .. review - multiline statements? - set callposn [expr {$file_offset + $callingline_len}] - - #Note there are anomalies around target labels in bracketed sections such as IF blocks - #this is bad practice - as it can lead to unbalanced braces - but batch files can still work under cmd.exe with them in some cases - #e.g unbalanced trailing bracket may be ignored. - #A working script with target-labels in braces can fail due to boundary issues we don't detect (callsite for boundary counting may need to be at end of entire multiline if block??) - #For now - just make sure punk templates don't do this - but it would be nice to be able to detect. - - #set callposn $file_offset - #set callposn [expr {$file_offset + [string length $precall]}] - # - - - - - break - } } - set callsite_labelfound 0 ;#until proven - if {$callposn != -1} { - set callposn_lineindex [lindex [$objFile chunkrange_to_linerange $callposn $callposn] 0] - #the line represented by callposn may actually be beyond the calling_line_index - set labelinfo [batchlib::get_callsite_label $labelplus] - if {[dict get $labelinfo labelfound]} { - set callsite_labelfound 1 - set label [dict get $labelinfo label] - set call_label_record [list label $label line $callingline_num] - dict lappend call_labels_found $label $call_label_record - } else { - puts stderr "[a+ yellow bold]WARNING - apparent callsite $callposn but couldn't verify label[a]" - puts stderr "Line:\n$trimln" + default { + #todo - better callsite analysis. There can be data between @GOTO or @CALL and : other than just whitespace! + + #todo - allow analysis of colon-less call. May need to check list of internal commands - but what about external ones? + #foreach search_regex [list {(.*\s+|^)(@*call\s*:*)(\S.*)} {(.*\s+|^)(@*CALL\s*:*)(\S.*)} {(.*\s+|^)(@*goto\s*:*)(\S.*)} {(.*\s+|^)(@*GOTO\s*:*)(\S.*)}] {} + foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s*|.*\s+|^)(@*GOTO\s*:)(\S.*)} {(.*\|\|.*)(@*GOTO\s*:)(\S.*)}] { + if {[regexp $search_regex $callingline_payload _m precall call labelplus]} { + #todo further checks to see if it's actually a batch script line + # - - - - work out what cmd.exe considers start of 512B boundaries when scanning from a callsite + #callposn affected by newlines? + #set callposn [expr {$file_offset + [string length $callingline_payload]}] ;#take callposn as end of line .. review - multiline statements? + set callposn [expr {$file_offset + $callingline_len}] + + #Note there are anomalies around target labels in bracketed sections such as IF blocks + #this is bad practice - as it can lead to unbalanced braces - but batch files can still work under cmd.exe with them in some cases + #e.g unbalanced trailing bracket may be ignored. + #A working script with target-labels in braces can fail due to boundary issues we don't detect (callsite for boundary counting may need to be at end of entire multiline if block??) + #For now - just make sure punk templates don't do this - but it would be nice to be able to detect. + + #set callposn $file_offset + #set callposn [expr {$file_offset + [string length $precall]}] + # - - - - + break + } } - } - - #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. - if {$callsite_labelfound} { - puts stdout "[a+ bold cyan]CALLSITE on line $callingline_num ending at byte $callposn[a]" - set callsummary [string range "${call}${labelplus}" 0 100] - if {[string length $callsummary] < [string length ${call}${labelplus}]} { - puts stdout " CALLSITE: $callsummary (truncated to 100 bytes)" - } else { - puts stdout " CALLSITE: '${call}${labelplus}'" + set callsite_labelfound 0 ;#until proven + if {$callposn != -1} { + set callposn_lineindex [lindex [$objFile chunkrange_to_linerange $callposn $callposn] 0] + #the line represented by callposn may actually be beyond the calling_line_index + set labelinfo [batchlib::get_callsite_label $labelplus] + if {[dict get $labelinfo labelfound]} { + set callsite_labelfound 1 + set label [dict get $labelinfo label] + set call_label_record [list label $label line $callingline_num] + dict lappend call_labels_found $label $call_label_record + } else { + puts stderr "[a+ yellow bold]WARNING - apparent callsite $callposn but couldn't verify label[a]" + puts stderr "Line:\n$trimln" + } } - puts stdout " [a+ cyan]FULLINE: $callingline_payload[a]" + + #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. + if {$callsite_labelfound} { + puts stdout "[a+ bold cyan]CALLSITE on line $callingline_num ending at byte $callposn[a]" + set callsummary [string range "${call}${labelplus}" 0 100] + if {[string length $callsummary] < [string length ${call}${labelplus}]} { + puts stdout " CALLSITE: $callsummary (truncated to 100 bytes)" + } else { + puts stdout " CALLSITE: '${call}${labelplus}'" + } + puts stdout " [a+ cyan]FULLINE: $callingline_payload[a]" - ################################## - #set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split - #NOTE it is invalid to assume label always terminated by space - pair of % characters (variable substitution) can contain a space without terminating label - #set word1 [lindex $labelpluswords 0] + ################################## + #set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split + #NOTE it is invalid to assume label always terminated by space - pair of % characters (variable substitution) can contain a space without terminating label + #set word1 [lindex $labelpluswords 0] - ################################## + ################################## - set labelsize [string length $label] - #scan forward for labels at boundaries - set forward_chunk [$objFile chunk $callposn end] - set forward_chunk_base $callposn ;#name for clarity + set labelsize [string length $label] + #scan forward for labels at boundaries + set forward_chunk [$objFile chunk $callposn end] + set forward_chunk_base $callposn ;#name for clarity - incr callid - set callvar "call-${callid}_fromline-${callingline_num}" - upvar 0 $callvar objForwardScan - set objForwardScan [fileline::textinfo new $forward_chunk] + incr callid + set callvar "call-${callid}_fromline-${callingline_num}" + upvar 0 $callvar objForwardScan + set objForwardScan [fileline::textinfo new $forward_chunk] - ################################################################################################################################## - #Forward scan 1 - check at normal line boundaries - and see if collides with a chunk boundary - and if the label is obscured or ok - set dsize [$objForwardScan chunklen] - set num_boundaries [expr {$dsize / 512} ] - puts "scanning $dsize forward bytes in file starting at $forward_chunk_base for label '$label' - num_boundaries: $num_boundaries" - set total_offset $file_offset - set found_forward_label 0 - foreach scanlineinfo [$objForwardScan lineinfolist 0 end] { - set scanline_start [dict get $scanlineinfo start] - set scanline_bytes [dict get $scanlineinfo linelen] - set scanline [dict get $scanlineinfo payload] + ################################################################################################################################## + #Forward scan 1 - check at normal line boundaries - and see if collides with a chunk boundary - and if the label is obscured or ok + set dsize [$objForwardScan chunklen] + set num_boundaries [expr {$dsize / 512} ] + puts "scanning $dsize forward bytes in file starting at $forward_chunk_base for label '$label' - num_boundaries: $num_boundaries" + set total_offset $file_offset + set found_forward_label 0 + foreach scanlineinfo [$objForwardScan lineinfolist 0 end] { + set scanline_start [dict get $scanlineinfo start] + set scanline_bytes [dict get $scanlineinfo linelen] + set scanline [dict get $scanlineinfo payload] - set line_start_global [expr {$forward_chunk_base + $scanline_start}] - set line_index_global [lindex [$objFile chunkrange_to_linerange $line_start_global $line_start_global] 0] - set line_num_global [expr {$line_index_global + 1}] + set line_start_global [expr {$forward_chunk_base + $scanline_start}] + set line_index_global [lindex [$objFile chunkrange_to_linerange $line_start_global $line_start_global] 0] + set line_num_global [expr {$line_index_global + 1}] - set trimscanline [string trim $scanline] + set trimscanline [string trim $scanline] - set found_targetlabel_at_line 0 ;# until disproven - if {[string first : $scanline] >= 0} { - set labelinfo [batchlib::get_target_label_from_line $scanline] - if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { - #add to target_labels_found record below - set scan_target_label_record [list label $label line $line_num_global] - set found_targetlabel_at_line 1 - } - } - - if {$found_targetlabel_at_line} { - set scan_target_label_same_line_seen false - if {[dict exists $target_labels_found $label]} { - set thislabel_records [dict get $target_labels_found $label] - foreach previous $thislabel_records { - if {[dict get $previous line] eq $line_num_global} { - set scan_target_label_same_line_seen true - } + set found_targetlabel_at_line 0 ;# until disproven + if {[string first : $scanline] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $scanline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + #add to target_labels_found record below + set scan_target_label_record [list label $label line $line_num_global] + set found_targetlabel_at_line 1 } - } - incr found_forward_label - if {!$scan_target_label_same_line_seen} { - set label_posn_in_line [string first : $scanline] - set labelposn [expr {$scanline_start + $label_posn_in_line}] - #we only really care about exactly landing on a boundary or else the next 512 byte boundaries above the labelposn - if {($labelposn % 512) == 0} { - set ubound [expr {($labelposn / 512) * 512}] - } else { - set ubound [expr {(($labelposn / 512)+1) * 512}] + } + + if {$found_targetlabel_at_line} { + set scan_target_label_same_line_seen false + if {[dict exists $target_labels_found $label]} { + set thislabel_records [dict get $target_labels_found $label] + foreach previous $thislabel_records { + if {[dict get $previous line] eq $line_num_global} { + set scan_target_label_same_line_seen true + } + } } - set lbound [expr {$ubound - $labelsize}] - if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { - dict set scan_target_label_record error linestart_and_call_offset_bytes - lappend error_labels [list label $label linestart_and_call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $callingline_num] bad_target_line $line_num_global] - puts stdout "[a+ bold red]ERROR: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" - puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" - puts [$objForwardScan chunk_boundary_display [dict get $scanlineinfo start] [dict get $scanlineinfo end] 512 -linebase $callposn_lineindex+1 -limit 1] ;#+1 on callposn_linindex to do editor-style linenums + incr found_forward_label + if {!$scan_target_label_same_line_seen} { + set label_posn_in_line [string first : $scanline] + set labelposn [expr {$scanline_start + $label_posn_in_line}] + #we only really care about exactly landing on a boundary or else the next 512 byte boundaries above the labelposn + if {($labelposn % 512) == 0} { + set ubound [expr {($labelposn / 512) * 512}] + } else { + set ubound [expr {(($labelposn / 512)+1) * 512}] + } + set lbound [expr {$ubound - $labelsize}] + if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { + dict set scan_target_label_record error linestart_and_call_offset_bytes + lappend error_labels [list label $label linestart_and_call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $callingline_num] bad_target_line $line_num_global] + puts stdout "[a+ bold red]ERROR: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" + puts [$objForwardScan chunk_boundary_display [dict get $scanlineinfo start] [dict get $scanlineinfo end] 512 -linebase $callposn_lineindex+1 -limit 1] ;#+1 on callposn_linindex to do editor-style linenums + } else { + dict set scan_target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + } + dict lappend target_labels_found $label $scan_target_label_record } else { - dict set scan_target_label_record ok 1 - puts stdout "[a+ bold green]OK: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + puts stdout "OK - seen label $label on $line_num_global before" } - dict lappend target_labels_found $label $scan_target_label_record - } else { - puts stdout "OK - seen label $label on $line_num_global before" - } - } - incr total_offset $scanline_bytes - } - ################################################################################################################################## - - - #todo - #forward scan 2 - check any boundaries missed above because the label isn't at the begining of a line - #these are potentially hidden labels that could activate without requiring the label be at the beginning of a line - #check boundary spans relative to start of this objForwardScan chunk - - #adjust boundary-search by resetting counter each time crlf encountered - set forward_lines [$objForwardScan chunkrange_to_lineinfolist 0 end] - set boundary_positions [list 0] - set scanner_offset 0 - set scanner_position 0 - foreach forwardbline_info $forward_lines { - #review - do we need to check the payload in case we have configured the textinfo object to split the file only on lf - (not true by default) - set forwardbline_len [dict get $forwardbline_info linelen] - set forwardbline_spaninfo [fileline::range_spans_chunk_boundaries [expr {$scanner_position + $scanner_offset}] [expr {$scanner_position + $scanner_offset + $forwardbline_len}] 512] - set forwardbline_boundaries [dict get $forwardbline_spaninfo boundaries] - - foreach b $forwardbline_boundaries { - set relb [expr $b + $scanner_offset] - if {$relb <= [dict get $forwardbline_info end]} { - lappend boundary_positions $relb - } else { - #leave it for the next line - as we may need to adjust offset anyway - break } + incr total_offset $scanline_bytes } - if {[dict get $forwardbline_info le] eq "crlf"} { - set scanner_offset [expr {[dict get $forwardbline_info end] - [lindex $boundary_positions end]}] ;#reset on crlf - #puts "+++++ set scanner_offset $scanner_offset" - } - set scanner_position [dict get $forwardbline_info end] - } - set boundary_positions [lsearch -all -not -inline $boundary_positions 0] - if {[llength $boundary_positions]} { - puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, boundaries (possibly with offsets) to check $boundary_positions[a]" - } else { - puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, No boundaries to check (generally expected for files with crlf line endings and no extremely long lines)[a]" - } + ################################################################################################################################## - if {[llength $boundary_positions]} { - puts stdout "line $callingline_num scan from call label $label ending at position $callposn. Next Callsite-relative boundary [lindex $boundary_positions 0]" - - for {set i 0} {$i < [llength $boundary_positions]} {incr i} { - set b [lindex $boundary_positions $i] - if {$i < [llength $boundary_positions]-1} { - set nextb [lindex $boundary_positions $i+1] - set top $nextb - } else { - set top end - } + #todo + #forward scan 2 - check any boundaries missed above because the label isn't at the begining of a line + #these are potentially hidden labels that could activate without requiring the label be at the beginning of a line + #check boundary spans relative to start of this objForwardScan chunk - set forwardbline_infolist [$objForwardScan chunkrange_to_lineinfolist $b $top -show_truncated 1] - set forwardbline_info [lindex $forwardbline_infolist 0] - if {[dict get $forwardbline_info is_truncated]} { - set payload_from_boundary [dict get $forwardbline_info truncated] - } else { - set payload_from_boundary [dict get $forwardbline_info payload] - } + #adjust boundary-search by resetting counter each time crlf encountered + set forward_lines [$objForwardScan chunkrange_to_lineinfolist 0 end] + set boundary_positions [list 0] + set scanner_offset 0 + set scanner_position 0 + foreach forwardbline_info $forward_lines { + #review - do we need to check the payload in case we have configured the textinfo object to split the file only on lf - (not true by default) set forwardbline_len [dict get $forwardbline_info linelen] - set forwardbline_index [dict get $forwardbline_info lineindex] - set forwardbline_start [dict get $forwardbline_info start] - set forwardbline_start_global [expr {$forward_chunk_base + $forwardbline_start}] - set forwardbline_index_global [lindex [$objFile chunkrange_to_linerange $forwardbline_start_global $forwardbline_start_global] 0] - set forwardbline_num_global [expr {$forwardbline_index_global + 1}] - - set found_targetlabel_at_boundary 0 - if {[string first : $payload_from_boundary] >= 0} { - #puts stdout "Possible label at boundary $b - testing" - set labelinfo [batchlib::get_target_label_from_line $payload_from_boundary] - if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { - incr found_forward_label - set found_targetlabel_at_boundary 1 - } elseif {[dict get $labelinfo labelfound]} { - set unsearched_label [dict get $labelinfo label] - puts stderr "[a+ cyan]Line $forwardbline_num_global: Found an item that cmd may interpret as a target label because of its location at a boundary $b - but it doesn't seem to be the one we are looking for. Looking for '$label' Found: '[dict get $labelinfo label]'[a]" - puts stderr "[a+ yellow]Warning - if the label '$unsearched_label' on line $forwardbline_num_global isn't meant to be a target - it may be safest to make sure batch script isn't using CALL or GOTO with target :$unsearched_label" - puts stdout "linedata:\n" - #puts stdout "'$payload_from_boundary'" - puts [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] - - #dubious value to check call_labels_found - as we didn't run through and find all call labels first! - if {$unsearched_label in [dict keys $call_labels_found]} { - set boundary_target_label_record [list label $unsearched_label line $forwardbline_num_global error found_via_boundary_check_on_a_different_call_label] - dict lappend warning_target_labels_found $unsearched_label $boundary_target_label_record - } else { - set possible_target_label_record [list label $unsearched_label line $forwardbline_num_global] - dict lappend possible_target_labels_found $unsearched_label $possible_target_label_record - } + set forwardbline_spaninfo [fileline::range_spans_chunk_boundaries [expr {$scanner_position + $scanner_offset}] [expr {$scanner_position + $scanner_offset + $forwardbline_len}] 512] + set forwardbline_boundaries [dict get $forwardbline_spaninfo boundaries] + + foreach b $forwardbline_boundaries { + set relb [expr $b + $scanner_offset] + if {$relb <= [dict get $forwardbline_info end]} { + lappend boundary_positions $relb } else { - set note "" - if {[dict exists $labelinfo note]} { - set note [dict get $labelinfo note] - } - if {$note ne "prefix_fail"} { - puts stdout "no label detected at boundary $b - probably ok. Note from target-label scanner: $note" - } - } - if {$found_targetlabel_at_boundary} { - set target_label_record [list label $label line $forwardbline_num_global error call_offset_bytes] - dict lappend target_labels_found $label $target_label_record - set note "possibly unreliable or dangerous target-label at line $forwardbline_num_global may execute line [expr {$forwardbline_num_global +1}].\n" - append note "Target label not at line start but was found by scanning 512byte chunks from callsite with count resets at any crlf encountered\n" - append note "Adjust spacing between line $callingline_num and $forwardbline_num_global to avoid the 512 boundary - and re-test for other boundary problems" - lappend error_labels [list label $label call_offset_bytes $b callsite [list call ${call}${labelplus} call_linenum $callingline_num] note $note] - puts stdout "[a+ bold red]ERROR: line $forwardbline_num_global target-label [dict get $labelinfo rawlabel] found at boundary and with byte offset from callsite: $b [a]" - puts stdout "[a+ bold red] This target-label appears to fall at or just after the 512byte boundary at byte $b[a] [a+ yellow bold]from callsite.[a]" - puts stdout "[a+ bold yellow]Code may execute at line [expr {$forwardbline_num_global + 1}] (or at next 512Byte boundary in some circumstances)[a]" - puts stdout "[a+ bold yellow]Recommend adjusting spacing between line $callingline_num and $forwardbline_num_global[a]" - puts stdout [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] - } - #if found any label - peek at next boundary - if {[dict get $labelinfo labelfound] && $i+1 < [llength $boundary_positions]} { - set next_lineinfolist [$objForwardScan chunkrange_to_lineinfolist $nextb end -show_truncated 1] - set next_lineinfo [lindex $next_lineinfolist 0] - puts "peek next boundary data - line [expr {$forwardbline_num_global + 1}]:" - #if {[dict get $next_lineinfo is_truncated]} { - # puts [dict get $next_lineinfo truncated] - #} else { - # puts [dict get $next_lineinfo payload] - #} - puts [$objForwardScan chunk_boundary_display [dict get $next_lineinfo start] [dict get $next_lineinfo end] 0 -boundaries $nextb -linebase $callposn_lineindex+1 -limit 1] + #leave it for the next line - as we may need to adjust offset anyway + break } } + if {[dict get $forwardbline_info le] eq "crlf"} { + set scanner_offset [expr {[dict get $forwardbline_info end] - [lindex $boundary_positions end]}] ;#reset on crlf + #puts "+++++ set scanner_offset $scanner_offset" + } + set scanner_position [dict get $forwardbline_info end] } - } - $objForwardScan destroy - - #scan behind for labels at boundaries - using offset from start of file - #we do a backward scan even if a forward label has been found, so that we can warn of duplicate labels. - - set prior_start 0 - set prior_end $callingline_index ;#only scan from file start to call-site - - set pline_begin 0 - set found_backward_label 0 - set p_linenum 0 - for {set pidx $prior_start} {$pidx <= $prior_end} {incr pidx} { - set plineinfo [$objFile lineinfo $pidx] - set pline [dict get $plineinfo payload] - incr p_linenum - set pline_bytes [dict get $plineinfo linelen] ;#includes lf or crlf ending bytes - set pline_start $pline_begin - if {$pline_start != [dict get $plineinfo start]} { - error "checkfile error: line $p_linenum - calculated start $pline_start not equal to stored start [dict get $plineinfo start]" - } - set pline_end [expr {$pline_begin + $pline_bytes -1}] - if {$pline_end != [dict get $plineinfo end]} { - error "checkfile error: line $p_linenum - calculated end $pline_end not equal to stored end [dict get $plineinfo end]" + set boundary_positions [lsearch -all -not -inline $boundary_positions 0] + if {[llength $boundary_positions]} { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, boundaries (possibly with offsets) to check $boundary_positions[a]" + } else { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, No boundaries to check (generally expected for files with crlf line endings and no extremely long lines)[a]" } - set trimpline [string trim $pline] - #todo - process leading part of line before : - #e.g the following are valid (leading # is not part of the examples) - # ====== : label - # also - #%=== == : label - # also - #%= ,,,, ;;; = : label - - #these token delimiters (; , = 0x0B ox0C 0xFF ) - #can also occur after the colon e.g - #: ;label - - #the following is a valid target for @GOTO :#something - #: ;#something - - #It is possible for closing bracket ) to also be invisible if there is no open ( active - #This only seems to work for a single ) at beggining of the line multiple ) even separated by spaces or ; etc seem to stop the target being found. - #The lone unbalanced ) can act like a comment in other contexts - and can appear multiple times, but only if first ) on the line is followed by a delimiter - #Essentially all characters following the first ) are ignored - but if the first is something like )) then cmd tries to interpret that as a command and fails - # e.g - #) ignored - #);)))) ignored - #)) causes error as cmd tries to run "))" as a command. - #This is a reason why *target* labels shouldn't appear in bracketed blocks - as code jumps to a point where ( ) will be unbalanced - - #target labels are literal with regards to % ie not subject to % expansion - but ^ must still be processed - if {[string first : $pline] >= 0} { - #space (and some other chars) allowed between colon and label at target - (but not at callsite) - set labelinfo [batchlib::get_target_label_from_line $pline] - if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { - set target_label_record [list label $label line $p_linenum] - puts stdout "$labelinfo" - incr found_backward_label - set prior_label_posn_in_line [string first : $pline] - set prior_label_posn [expr {$pline_begin + $prior_label_posn_in_line}] - if {($prior_label_posn % 512) == 0} { - set p_ubound [expr {($prior_label_posn / 512) * 512}] + if {[llength $boundary_positions]} { + puts stdout "line $callingline_num scan from call label $label ending at position $callposn. Next Callsite-relative boundary [lindex $boundary_positions 0]" + + for {set i 0} {$i < [llength $boundary_positions]} {incr i} { + set b [lindex $boundary_positions $i] + if {$i < [llength $boundary_positions]-1} { + set nextb [lindex $boundary_positions $i+1] + set top $nextb } else { - set p_ubound [expr {(($prior_label_posn /512) +1) * 512}] + set top end } - set p_lbound [expr {$p_ubound - $labelsize}] - if {($prior_label_posn >= $p_lbound) && ($prior_label_posn <= $p_ubound)} { - dict set target_label_record error linestart_and_overlap - lappend error_labels [list label $label linestart_and_overlap $prior_label_posn callsite [list call ${call}${labelplus} call_linenum $callingline_num]] - puts stdout "[a+ bold red]ERROR: target-label '$trimpline' at line $p_linenum and offset from file start: $prior_label_posn line start: $pline_begin[a]" - puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $p_ubound[a] [a+ yellow bold]from file start[a]" - puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 512 -linebase 1 -limit 1] + + set forwardbline_infolist [$objForwardScan chunkrange_to_lineinfolist $b $top -show_truncated 1] + set forwardbline_info [lindex $forwardbline_infolist 0] + if {[dict get $forwardbline_info is_truncated]} { + set payload_from_boundary [dict get $forwardbline_info truncated] } else { - dict set target_label_record ok 1 - puts stdout "[a+ bold green]OK: file line: $p_linenum target-label '$trimpline' before call from line $callingline_num. Target is at offset from file start: $prior_label_posn line start: $pline_begin[a]" + set payload_from_boundary [dict get $forwardbline_info payload] + } + set forwardbline_len [dict get $forwardbline_info linelen] + set forwardbline_index [dict get $forwardbline_info lineindex] + set forwardbline_start [dict get $forwardbline_info start] + set forwardbline_start_global [expr {$forward_chunk_base + $forwardbline_start}] + set forwardbline_index_global [lindex [$objFile chunkrange_to_linerange $forwardbline_start_global $forwardbline_start_global] 0] + set forwardbline_num_global [expr {$forwardbline_index_global + 1}] + + set found_targetlabel_at_boundary 0 + if {[string first : $payload_from_boundary] >= 0} { + #puts stdout "Possible label at boundary $b - testing" + set labelinfo [batchlib::get_target_label_from_line $payload_from_boundary] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + incr found_forward_label + set found_targetlabel_at_boundary 1 + } elseif {[dict get $labelinfo labelfound]} { + set unsearched_label [dict get $labelinfo label] + puts stderr "[a+ cyan]Line $forwardbline_num_global: Found an item that cmd may interpret as a target label because of its location at a boundary $b - but it doesn't seem to be the one we are looking for. Looking for '$label' Found: '[dict get $labelinfo label]'[a]" + puts stderr "[a+ yellow]Warning - if the label '$unsearched_label' on line $forwardbline_num_global isn't meant to be a target - it may be safest to make sure batch script isn't using CALL or GOTO with target :$unsearched_label" + puts stdout "linedata:\n" + #puts stdout "'$payload_from_boundary'" + puts [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + + #dubious value to check call_labels_found - as we didn't run through and find all call labels first! + if {$unsearched_label in [dict keys $call_labels_found]} { + set boundary_target_label_record [list label $unsearched_label line $forwardbline_num_global error found_via_boundary_check_on_a_different_call_label] + dict lappend warning_target_labels_found $unsearched_label $boundary_target_label_record + } else { + set possible_target_label_record [list label $unsearched_label line $forwardbline_num_global] + dict lappend possible_target_labels_found $unsearched_label $possible_target_label_record + } + } else { + set note "" + if {[dict exists $labelinfo note]} { + set note [dict get $labelinfo note] + } + if {$note ne "prefix_fail"} { + puts stdout "no label detected at boundary $b - probably ok. Note from target-label scanner: $note" + } + } + if {$found_targetlabel_at_boundary} { + set target_label_record [list label $label line $forwardbline_num_global error call_offset_bytes] + dict lappend target_labels_found $label $target_label_record + set note "possibly unreliable or dangerous target-label at line $forwardbline_num_global may execute line [expr {$forwardbline_num_global +1}].\n" + append note "Target label not at line start but was found by scanning 512byte chunks from callsite with count resets at any crlf encountered\n" + append note "Adjust spacing between line $callingline_num and $forwardbline_num_global to avoid the 512 boundary - and re-test for other boundary problems" + lappend error_labels [list label $label call_offset_bytes $b callsite [list call ${call}${labelplus} call_linenum $callingline_num] note $note] + puts stdout "[a+ bold red]ERROR: line $forwardbline_num_global target-label [dict get $labelinfo rawlabel] found at boundary and with byte offset from callsite: $b [a]" + puts stdout "[a+ bold red] This target-label appears to fall at or just after the 512byte boundary at byte $b[a] [a+ yellow bold]from callsite.[a]" + puts stdout "[a+ bold yellow]Code may execute at line [expr {$forwardbline_num_global + 1}] (or at next 512Byte boundary in some circumstances)[a]" + puts stdout "[a+ bold yellow]Recommend adjusting spacing between line $callingline_num and $forwardbline_num_global[a]" + puts stdout [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + } + #if found any label - peek at next boundary + if {[dict get $labelinfo labelfound] && $i+1 < [llength $boundary_positions]} { + set next_lineinfolist [$objForwardScan chunkrange_to_lineinfolist $nextb end -show_truncated 1] + set next_lineinfo [lindex $next_lineinfolist 0] + puts "peek next boundary data - line [expr {$forwardbline_num_global + 1}]:" + #if {[dict get $next_lineinfo is_truncated]} { + # puts [dict get $next_lineinfo truncated] + #} else { + # puts [dict get $next_lineinfo payload] + #} + puts [$objForwardScan chunk_boundary_display [dict get $next_lineinfo start] [dict get $next_lineinfo end] 0 -boundaries $nextb -linebase $callposn_lineindex+1 -limit 1] + } } - dict lappend call_labels_found $label $target_label_record } - #else - label we weren't searching for - even if at file boundary it should be picked up when actually searched? review } - set spaninfo [fileline::range_spans_chunk_boundaries $pline_start $pline_end 512] - if {[dict get $spaninfo is_span]} { - #puts stdout "boundary spanning line $p_linenum byte range $pline_start -> $pline_end [a+ bold purple]$spaninfo[a]" - #check boundaries within the line - set boundaries [dict get $spaninfo boundaries] - foreach b $boundaries { - if {$b == 0} { - #skip - beginning of line already handled (review?) - continue - } - #overlap test is just a warning - we have a label-like thing overlapping the boundary - #todo - take account of fact that target label can be ": labelname" - so using just labelsize won't detect all overlaps - #The label could even be at the end of a long line that appears at first to be a comment e.g something like - # : whatever : sneakylabel - # or - #@REM ============================================================================================================================================================ : sneakylabel - - #The fact that it overlaps - means it's probably not being found with lf line-endings - and only the label :whatever should be found with crlf endings - #- but we won't always catch that something's fishy - #review - set overlaptail [string range $pline [expr {$b - $labelsize}] [expr {($b + $labelsize) -1}]] ;#subtracting labelsize gives earliest possible overlap - if {[string match "*:$label *" $overlaptail] } { - lappend warning_labels [list label $label warning label_spanning callsite [list call ${call}${labelplus} call_linenum $callingline_num]] - puts stdout "[a+ bold yellow] WARNING: possible label $label spans boundary $b from start of file" - } + $objForwardScan destroy + + #scan behind for labels at boundaries - using offset from start of file + #we do a backward scan even if a forward label has been found, so that we can warn of duplicate labels. + + set prior_start 0 + set prior_end $callingline_index ;#only scan from file start to call-site + + set pline_begin 0 + set found_backward_label 0 + set p_linenum 0 + for {set pidx $prior_start} {$pidx <= $prior_end} {incr pidx} { + set plineinfo [$objFile lineinfo $pidx] + set pline [dict get $plineinfo payload] + incr p_linenum + set pline_bytes [dict get $plineinfo linelen] ;#includes lf or crlf ending bytes + set pline_start $pline_begin + if {$pline_start != [dict get $plineinfo start]} { + error "checkfile error: line $p_linenum - calculated start $pline_start not equal to stored start [dict get $plineinfo start]" + } + set pline_end [expr {$pline_begin + $pline_bytes -1}] + if {$pline_end != [dict get $plineinfo end]} { + error "checkfile error: line $p_linenum - calculated end $pline_end not equal to stored end [dict get $plineinfo end]" + } - set pline_tail [string range $pline $b end] - if {[string first : $pline_tail] >= 0} { - set labelinfo [batchlib::get_target_label_from_line $pline_tail] - set labelfound 0 - if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { - set labelfound 1 - } elseif {[dict get $labelinfo labelfound]} { - puts stdout "Note: detected target label [dict get $labelinfo label] at file offset $b at boundary with no preceeding newline - but it's not the one we're currently scanning for" + set trimpline [string trim $pline] + #todo - process leading part of line before : + #e.g the following are valid (leading # is not part of the examples) + # ====== : label + # also + #%=== == : label + # also + #%= ,,,, ;;; = : label + + #these token delimiters (; , = 0x0B ox0C 0xFF ) + #can also occur after the colon e.g + #: ;label + + #the following is a valid target for @GOTO :#something + #: ;#something + + #It is possible for closing bracket ) to also be invisible if there is no open ( active + #This only seems to work for a single ) at beggining of the line multiple ) even separated by spaces or ; etc seem to stop the target being found. + #The lone unbalanced ) can act like a comment in other contexts - and can appear multiple times, but only if first ) on the line is followed by a delimiter + #Essentially all characters following the first ) are ignored - but if the first is something like )) then cmd tries to interpret that as a command and fails + # e.g + #) ignored + #);)))) ignored + #)) causes error as cmd tries to run "))" as a command. + #This is a reason why *target* labels shouldn't appear in bracketed blocks - as code jumps to a point where ( ) will be unbalanced + + #target labels are literal with regards to % ie not subject to % expansion - but ^ must still be processed + if {[string first : $pline] >= 0} { + #space (and some other chars) allowed between colon and label at target - (but not at callsite) + set labelinfo [batchlib::get_target_label_from_line $pline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set target_label_record [list label $label line $p_linenum] + puts stdout "$labelinfo" + incr found_backward_label + set prior_label_posn_in_line [string first : $pline] + set prior_label_posn [expr {$pline_begin + $prior_label_posn_in_line}] + if {($prior_label_posn % 512) == 0} { + set p_ubound [expr {($prior_label_posn / 512) * 512}] + } else { + set p_ubound [expr {(($prior_label_posn /512) +1) * 512}] + } + set p_lbound [expr {$p_ubound - $labelsize}] + if {($prior_label_posn >= $p_lbound) && ($prior_label_posn <= $p_ubound)} { + dict set target_label_record error linestart_and_overlap + lappend error_labels [list label $label linestart_and_overlap $prior_label_posn callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold red]ERROR: target-label '$trimpline' at line $p_linenum and offset from file start: $prior_label_posn line start: $pline_begin[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $p_ubound[a] [a+ yellow bold]from file start[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 512 -linebase 1 -limit 1] + } else { + dict set target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $p_linenum target-label '$trimpline' before call from line $callingline_num. Target is at offset from file start: $prior_label_posn line start: $pline_begin[a]" + } + dict lappend call_labels_found $label $target_label_record + } + #else - label we weren't searching for - even if at file boundary it should be picked up when actually searched? review + } + set spaninfo [fileline::range_spans_chunk_boundaries $pline_start $pline_end 512] + if {[dict get $spaninfo is_span]} { + #puts stdout "boundary spanning line $p_linenum byte range $pline_start -> $pline_end [a+ bold purple]$spaninfo[a]" + #check boundaries within the line + set boundaries [dict get $spaninfo boundaries] + foreach b $boundaries { + if {$b == 0} { + #skip - beginning of line already handled (review?) + continue + } + #overlap test is just a warning - we have a label-like thing overlapping the boundary + #todo - take account of fact that target label can be ": labelname" - so using just labelsize won't detect all overlaps + #The label could even be at the end of a long line that appears at first to be a comment e.g something like + # : whatever : sneakylabel + # or + #@REM ============================================================================================================================================================ : sneakylabel + + #The fact that it overlaps - means it's probably not being found with lf line-endings - and only the label :whatever should be found with crlf endings + #- but we won't always catch that something's fishy + #review + set overlaptail [string range $pline [expr {$b - $labelsize}] [expr {($b + $labelsize) -1}]] ;#subtracting labelsize gives earliest possible overlap + if {[string match "*:$label *" $overlaptail] } { + lappend warning_labels [list label $label warning label_spanning callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow] WARNING: possible label $label spans boundary $b from start of file" } - if {$labelfound} { - set label_found_name [dict get $labelinfo label] - incr found_backward_label - - lappend error_labels [list label $label_found_name file_offset_bytes $b note "label at boundary but no preceeding newline - cmd may interpret as label and execute following line or code at next boundary" callsite [list call ${call}${labelplus} call_linenum $callingline_num]] - puts stdout "[a+ bold red]ERROR: *possible* label '$label_found_name' at line $p_linenum and offset from file start: $b line start: $pline_begin[a]" - puts stdout "[a+ bold red] This label with no preceeding newline appears to span the 512byte boundary at byte $b[a] [a+ yellow bold]from file start[a]" - puts stdout "[a+ bold red] cmd.exe may find this label - but it probably shouldn't be relied upon[a]" - puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" + set pline_tail [string range $pline $b end] - set target_label_record [list label $label_found_name line $p_linenum] - if {$label_found_name in [dict keys $call_labels_found]} { - dict set target_label_record error "called_label_at_file_offset_boundary" - dict lappend target_labels_found $label_found_name $target_label_record - } else { - #review - we need to get better at finding all calls! - dict set target_label_record error "uncalled_label_at_file_offset_boundary" - dict lappend possible_target_labels_found $label_found_name $target_label_record + if {[string first : $pline_tail] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $pline_tail] + set labelfound 0 + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set labelfound 1 + } elseif {[dict get $labelinfo labelfound]} { + puts stdout "Note: detected target label [dict get $labelinfo label] at file offset $b at boundary with no preceeding newline - but it's not the one we're currently scanning for" } + if {$labelfound} { + set label_found_name [dict get $labelinfo label] + incr found_backward_label + + lappend error_labels [list label $label_found_name file_offset_bytes $b note "label at boundary but no preceeding newline - cmd may interpret as label and execute following line or code at next boundary" callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold red]ERROR: *possible* label '$label_found_name' at line $p_linenum and offset from file start: $b line start: $pline_begin[a]" + puts stdout "[a+ bold red] This label with no preceeding newline appears to span the 512byte boundary at byte $b[a] [a+ yellow bold]from file start[a]" + puts stdout "[a+ bold red] cmd.exe may find this label - but it probably shouldn't be relied upon[a]" + puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" - set tail_start $b - set tail_end [expr {$b + [string length $pline_tail]}] - set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] - if {[dict get $tail_spaninfo is_span]} { - set tail_boundaries [dict get $tail_spaninfo boundaries] - set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] - if {[llength $extra_tail_boundaries]} { - puts "Line $p_linenum also spans additional boundaries: $extra_tail_boundaries" - set next_boundary [lindex $extra_tail_boundaries 0] - #boundary doesn't reset if no crlf - we are still within the line - so can calc from line beginning - set next_boundary_data [string range $pline [expr {$pline_begin + $next_boundary}] end] - puts "Line $p_linenum data at next boundary: [a+ yellow bold]$next_boundary_data[a]" - puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 0 -boundaries $next_boundary -linebase 1 -limit 1] - - puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + set target_label_record [list label $label_found_name line $p_linenum] + if {$label_found_name in [dict keys $call_labels_found]} { + dict set target_label_record error "called_label_at_file_offset_boundary" + dict lappend target_labels_found $label_found_name $target_label_record + } else { + #review - we need to get better at finding all calls! + dict set target_label_record error "uncalled_label_at_file_offset_boundary" + dict lappend possible_target_labels_found $label_found_name $target_label_record } - } else { - if {$pidx+1 < [$objFile linecount]} { - set nextlineinfo [$objFile lineinfo $pidx+1] - set nextpayload [dict get $nextlineinfo payload] - puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" - puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + + + set tail_start $b + set tail_end [expr {$b + [string length $pline_tail]}] + set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] + if {[dict get $tail_spaninfo is_span]} { + set tail_boundaries [dict get $tail_spaninfo boundaries] + set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] + if {[llength $extra_tail_boundaries]} { + puts "Line $p_linenum also spans additional boundaries: $extra_tail_boundaries" + set next_boundary [lindex $extra_tail_boundaries 0] + #boundary doesn't reset if no crlf - we are still within the line - so can calc from line beginning + set next_boundary_data [string range $pline [expr {$pline_begin + $next_boundary}] end] + puts "Line $p_linenum data at next boundary: [a+ yellow bold]$next_boundary_data[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 0 -boundaries $next_boundary -linebase 1 -limit 1] + + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } } else { - #EOF reached + if {$pidx+1 < [$objFile linecount]} { + set nextlineinfo [$objFile lineinfo $pidx+1] + set nextpayload [dict get $nextlineinfo payload] + puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } else { + #EOF reached + } } } } - } - } + } - } - incr pline_begin $pline_bytes - } + } + incr pline_begin $pline_bytes + } - if {$found_forward_label == 0} { - if {[string toupper $label] eq "EOF"} { - #EOF/eof label is special - it doesn't have to exist - but if it does - it probably shouldn't be spanning a boundary - puts stdout "[a+ bold green]OK: label :$label doesn't exist - but it's usually not meant to. callsite: [list call ${call}${labelplus} call_linenum $callingline_num] [a]" - } else { - if {$found_backward_label == 0} { - lappend warning_labels [list label $label warning label_not_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] - puts stdout "[a+ bold yellow]WARNING: label :$label not found (in forward or backward scan)[a]" + if {$found_forward_label == 0} { + if {[string toupper $label] eq "EOF"} { + #EOF/eof label is special - it doesn't have to exist - but if it does - it probably shouldn't be spanning a boundary + puts stdout "[a+ bold green]OK: label :$label doesn't exist - but it's usually not meant to. callsite: [list call ${call}${labelplus} call_linenum $callingline_num] [a]" + } else { + if {$found_backward_label == 0} { + lappend warning_labels [list label $label warning label_not_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow]WARNING: label :$label not found (in forward or backward scan)[a]" + } } } - } - if {($found_forward_label + $found_backward_label) > 1} { - #puts "target_labels_found: $target_labels_found" - dict for {targetkey targethits} $target_labels_found { - set targetlines [list] - foreach record $targethits { - lappend targetlines [dict get $record line] - } - set remaining [list] - set previous "" ; - foreach lnum [lsort -integer -increasing $targetlines] { - if {$previous eq ""} { - lappend remaining $lnum - } else { - if {$lnum-1 == $previous} { - puts stdout "[a+ green bold]OK[a] - target-label $targetkey appears on immediately adjacent lines $previous and $lnum - assuming it is a boundary-avoidance tactic rather than an inadvertent duplicate" - set remaining [lrange $remaining 0 end-1];#retain latest - we will allow a run of targets on subsequent lines + if {($found_forward_label + $found_backward_label) > 1} { + #puts "target_labels_found: $target_labels_found" + dict for {targetkey targethits} $target_labels_found { + set targetlines [list] + foreach record $targethits { + lappend targetlines [dict get $record line] + } + set remaining [list] + set previous "" ; + foreach lnum [lsort -integer -increasing $targetlines] { + if {$previous eq ""} { + lappend remaining $lnum + } else { + if {$lnum-1 == $previous} { + puts stdout "[a+ green bold]OK[a] - target-label $targetkey appears on immediately adjacent lines $previous and $lnum - assuming it is a boundary-avoidance tactic rather than an inadvertent duplicate" + set remaining [lrange $remaining 0 end-1];#retain latest - we will allow a run of targets on subsequent lines + } + lappend remaining $lnum } - lappend remaining $lnum + set previous [lindex $remaining end] + } + if {[llength $remaining] > 1} { + lappend warning_labels [list label $label warning multiple_target_labels_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow]WARNING: label :$label seems to appear multiple times[a]" } - set previous [lindex $remaining end] - } - if {[llength $remaining] > 1} { - lappend warning_labels [list label $label warning multiple_target_labels_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] - puts stdout "[a+ bold yellow]WARNING: label :$label seems to appear multiple times[a]" } } } } - } ;# end default switch case on trimln } ;# end switch incr file_offset $callingline_len ;#including per-line stored line-ending } @@ -892,7 +892,7 @@ namespace eval punk::mix::commandset::scriptwrap { return false } } - #assert - customwrapper_folder var exists - but might be empty + #assertion - customwrapper_folder var exists - but might be empty if {[string length $ext]} { @@ -1261,44 +1261,48 @@ namespace eval punk::mix::commandset::scriptwrap { set tp [dict get $taginfo type] ;# type singular - related to just one line #set raw [dict get $taginfo raw] #equivalent to $ln if {[dict exists $tags $nm]} { - #already seen tag name - #tags dict has types key *plural* - need to track whether we have type open and type close (or openclose for self-closing tags) - if {[dict get $tags $nm types] ne "open"} { + #already seen tag name + #tags dict has types key *plural* - need to track whether we have type open and type close (or openclose for self-closing tags) + if {[dict get $tags $nm types] ne "open"} { + lappend errors "line: $linenum tag $nm encountered type $tp after existing type [dict get $tags $nm types]" + dict incr errortags $nm + } else { + #we already have open - expect only close + if {$tp ne "close"} { lappend errors "line: $linenum tag $nm encountered type $tp after existing type [dict get $tags $nm types]" - dict incr errortags $nm + dict incr errortags $nm } else { - #we already have open - expect only close - if {$tp ne "close"} { - lappend errors "line: $linenum tag $nm encountered type $tp after existing type [dict get $tags $nm types]" - dict incr errortags $nm - } else { - #close after open - dict set tags $nm types [list open close] - dict set tags $nm end $linenum - set taglines [dict get $tags $nm taglines] - if {[llength $taglines] != 1} { - error "Unexpected result when closing tag $nm. Existing taglines length not 1." - } - dict set tags $nm taglines [concat $taglines $ln] + #close after open + dict set tags $nm types [list open close] + dict set tags $nm end $linenum + set taglines [dict get $tags $nm taglines] + if {[llength $taglines] != 1} { + error "Unexpected result when closing tag $nm. Existing taglines length not 1." } + dict set tags $nm taglines [concat $taglines $ln] } + } } else { - #first seen of tag name - if {$tp eq "close"} { - lappend errors "line: $linenum tag $nm encountered type $p close first" - dict incr errortags $nm - } else { - dict set tags $nm types $tp + #first seen of tag name + switch -- $tp { + close { + lappend errors "line: $linenum tag $nm encountered type $p close first" + dict incr errortags $nm + } + open { + dict set tags $nm types open dict set tags $nm indent [dict get $taginfo indent] - if {$tp eq "open"} { - dict set tags $nm start $linenum - dict set tags $nm taglines [list $ln] ;#first entry - another will be added on encountering matching closing tag - } elseif {$tp eq "openclose"} { - dict set tags $nm start $linenum - dict set tags $nm end $linenum - dict set tags $nm taglines [list $ln] ;#single entry is final result for self-closing tag - } + dict set tags $nm start $linenum + dict set tags $nm taglines [list $ln] ;#first entry - another will be added on encountering matching closing tag + } + openclose { + dict set tags $nm types openclose + dict set tags $nm indent [dict get $taginfo indent] + dict set tags $nm start $linenum + dict set tags $nm end $linenum + dict set tags $nm taglines [list $ln] ;#single entry is final result for self-closing tag } + } } } } else { @@ -1422,29 +1426,61 @@ namespace eval punk::mix::commandset::scriptwrap { } } else { #in var - don't do anything with carets(?) - if {$c eq "%" && $percentrun == 1} { - #double percent - rather than just an empty var - emit one % - append labelout % - set invar 0 - set percentrun 0 - } elseif {$c eq "%"} { - #presume percentrun is 0 - set invar 0 - lappend varsfound $varname; set varname "" - } elseif {$c in $varterminals} { - set invar 0 - lappend varsfound $varname; set varname "" - } else { - if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { - #review - seems to terminate var - and substitute? - #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + switch -- $c { + % { + if {$percentrun == 1} { + #double percent - rather than just an empty var - emit one % + append labelout % + set invar 0 + set percentrun 0 + } else { + #presume percentrun is 0 + set invar 0 + lappend varsfound $varname; set varname "" + } + } + : { + #$varterminals set invar 0 - append varname $c - } else { - append varname $c + lappend varsfound $varname; set varname "" + } + default { + if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + #review - seems to terminate var - and substitute? + #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + set invar 0 + append varname $c + } else { + append varname $c + } + set percentrun 0 } - set percentrun 0 } + + + #if {$c eq "%" && $percentrun == 1} { + # #double percent - rather than just an empty var - emit one % + # append labelout % + # set invar 0 + # set percentrun 0 + #} elseif {$c eq "%"} { + # #presume percentrun is 0 + # set invar 0 + # lappend varsfound $varname; set varname "" + #} elseif {$c in $varterminals} { + # set invar 0 + # lappend varsfound $varname; set varname "" + #} else { + # if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + # #review - seems to terminate var - and substitute? + # #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + # set invar 0 + # append varname $c + # } else { + # append varname $c + # } + # set percentrun 0 + #} } incr inputconsumed } diff --git a/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl b/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl new file mode 100644 index 0000000..734ccb8 --- /dev/null +++ b/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl @@ -0,0 +1,6 @@ +#!/bin/sh +# -*- tcl -*- \ +# 'build.tcl' name as required by kettle +# Can be run directly - but also using `deck Kettle ...` or `deck KettleShell ...`\ +exec ./kettle -f "$0" "${1+$@}" +kettle doc diff --git a/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl b/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl new file mode 100644 index 0000000..1bb0a46 --- /dev/null +++ b/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl @@ -0,0 +1,995 @@ +# tcl +# +#make any tclkits and modules in src and place them and associated data files/scripts in the parent folder of src. +#e.g in 'bin' and 'modules' folders at same level as 'src' folder. + +set hashline "# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###" +puts $hashline +puts " punkshell make script " +puts $hashline\n +namespace eval ::punkmake { + variable scriptfolder [file normalize [file dirname [info script]]] + variable foldername [file tail $scriptfolder] + variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] + variable non_help_flags [list -k] + variable help_flags [list -help --help /?] + variable known_commands [list project get-project-info shell bootsupport] +} +if {"::try" ni [info commands ::try]} { + puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" + exit 1 +} + +#------------------------------------------------------------------------------ +#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#------------------------------------------------------------------------------ +#If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files +# - then it will attempt to preference these modules +# This allows a source update via 'fossil update' 'git pull' etc to pull in a minimal set of support modules for the make script +# and load these in preference to ones that may have been in the interp's tcl::tm::list or auto_path due to environment variables +set startdir [pwd] +if {[file exists [file join $startdir src bootsupport]]} { + set bootsupport_mod [file join $startdir src bootsupport modules] + set bootsupport_lib [file join $startdir src bootsupport lib] +} else { + set bootsupport_mod [file join $startdir bootsupport modules] + set bootsupport_lib [file join $startdir bootsupport lib] +} +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + + set original_tm_list [tcl::tm::list] + tcl::tm::remove {*}$original_tm_list + set original_auto_path $::auto_path + set ::auto_path [list $bootsupport_lib] + + set support_modules [glob -nocomplain -dir $bootsupport_mod -type f -tail *.tm] + set tcl_core_packages [list tcl::zlib zlib tcl::oo TclOO tcl::tommath tcl::zipfs Tcl Tk] ;#packages we + if {[llength $support_modules] || [llength [glob -nocomplain -dir $bootsupport_lib -tail *]]} { + #only forget all *unloaded* package names + foreach pkg [package names] { + if {$pkg in $tcl_core_packages} { + continue + } + if {![llength [package versions $pkg]]} { + #puts stderr "Got no versions for pkg $pkg" + continue + } + if {![string length [package provide $pkg]]} { + #no returned version indicates it wasn't loaded - so we can forget its index + package forget $pkg + } + } + tcl::tm::add $bootsupport_mod + } + + + if {[file exists [pwd]/modules]} { + tcl::tm::add [pwd]/modules + } + + #package require Thread + # - the full repl requires Threading and punk,shellfilter,shellrun to call and display properly. + + + # tm list already indexed - need 'package forget' to find modules based on current tcl::tm::list + #These are strong dependencies + package forget punk::mix + package require punk::mix + package forget punk::repo + package require punk::repo + package forget punkcheck + package require punkcheck + + + + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] + #------------------------------------------------------------------------------ +} + +# ** *** *** *** *** *** *** *** *** *** *** *** +#*temporarily* hijack package command +# ** *** *** *** *** *** *** *** *** *** *** *** +try { + rename ::package ::punkmake::package_temp_aside + proc ::package {args} { + if {[lindex $args 0] eq "require"} { + lappend ::punkmake::pkg_requirements [lindex $args 1] + } + } + package require punk::mix + package require punk::repo +} finally { + catch {rename ::package ""} + catch {rename ::punkmake::package_temp_aside ::package} +} +# ** *** *** *** *** *** *** *** *** *** *** *** +foreach pkg $::punkmake::pkg_requirements { + if {[catch {package require $pkg} errM]} { + puts stderr "missing pkg: $pkg" + lappend ::punkmake::pkg_missing $pkg + } else { + lappend ::punkmake::pkg_loaded $pkg + } +} + + + + + +proc punkmake_gethelp {args} { + set scriptname [file tail [info script]] + append h "Usage:" \n + append h "" \n + append h " $scriptname -help or $scriptname --help or $scriptname /? or just $scriptname" \n + append h " - This help." \n \n + append h " $scriptname project ?-k?" \n + append h " - this is the literal word project - and confirms you want to run the project build" \n + append h " - the optional -k flag will terminate processes running as the executable being built (if applicable)" \n \n + append h " $scriptname bootsupport" \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " $scriptname get-project-info" \n + append h " - show the name and base folder of the project to be built" \n + append h "" \n + if {[llength $::punkmake::pkg_missing]} { + append h "* ** NOTE ** ***" \n + append h " punkmake has detected that the following packages could not be loaded:" \n + append h " " [join $::punkmake::pkg_missing "\n "] \n + append h "* ** *** *** ***" \n + append h " These packages are required for punk make to function" \n \n + append h "* ** *** *** ***" \n\n + append h "Successfully Loaded packages:" \n + append h " " [join $::punkmake::pkg_loaded "\n "] \n + } + return $h +} +set scriptargs $::argv +set do_help 0 +if {![llength $scriptargs]} { + set do_help 1 +} else { + foreach h $::punkmake::help_flags { + if {[lsearch $scriptargs $h] >= 0} { + set do_help 1 + break + } + } +} +set commands_found [list] +foreach a $scriptargs { + if {![string match -* $a]} { + lappend commands_found $a + } else { + if {$a ni $::punkmake::non_help_flags} { + set do_help 1 + } + } +} +if {[llength $commands_found] != 1 } { + set do_help 1 +} elseif {[lindex $commands_found 0] ni $::punkmake::known_commands} { + puts stderr "Unknown command: [lindex $commands_found 0]\n\n" + set do_help 1 +} +if {$do_help} { + puts stderr [punkmake_gethelp] + exit 0 +} + +set ::punkmake::command [lindex $commands_found 0] + + +if {[lsearch $::argv -k] >= 0} { + set forcekill 1 +} else { + set forcekill 0 +} +#puts stdout "::argv $::argv" +# ---------------------------------------- + +set scriptfolder $::punkmake::scriptfolder + + + +#first look for a project root (something under fossil or git revision control AND matches punk project folder structure) +#If that fails - just look for a 'project shaped folder' ie meets minimum requirements of /src /src/lib /src/modules /lib /modules +if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} { + if {![string length [set projectroot [punk::repo::find_candidate $scriptfolder]]]} { + puts stderr "punkmake script unable to determine an approprite project root at or above the path '$scriptfolder' ensure the make script is within a project folder structure" + puts stderr " -aborted- " + exit 2 + #todo? + #ask user for a project name and create basic structure? + #call punk::mix::cli::new $projectname on parent folder? + } else { + puts stderr "WARNING punkmake script operating in project space that is not under version control" + } +} else { + +} + +set sourcefolder $projectroot/src + +if {$::punkmake::command eq "get-project-info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- -- get-project-info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- projectroot : $projectroot" + if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { + set vc "fossil" + set rev [punk::repo::fossil_revision $scriptfolder] + set rem [punk::repo::fossil_remote $scriptfolder] + } elseif {[punk::repo::find_git $scriptfolder] eq $projectroot} { + set vc "git" + set rev [punk::repo::git_revision $scriptfolder] + set rem [punk::repo::git_remote $scriptfolder] + } else { + set vc " - none found -" + set rev "n/a" + set remotes "n/a" + } + puts stdout "- version control : $vc" + puts stdout "- revision : $rev" + puts stdout "- remote : $rem" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + + exit 0 +} + +if {$::punkmake::command eq "shell"} { + package require punk + package require punk::repl + puts stderr "make shell not fully implemented - dropping into ordinary punk shell" + repl::start stdin + + exit 1 +} + +if {$::punkmake::command eq "bootsupport"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + #puts "-- [tcl::tm::list] --" + puts stdout "Updating bootsupport from local files" + + proc bootsupport_localupdate {projectroot} { + set bootsupport_modules [list] + set bootsupport_module_folders [list] + set bootsupport_config $projectroot/src/bootsupport/include_modules.config ;# + if {[file exists $bootsupport_config]} { + set targetroot $projectroot/src/bootsupport/modules + source $bootsupport_config ;#populate $bootsupport_modules with project-specific list + if {![llength $bootsupport_modules]} { + puts stderr "No local bootsupport modules configured for updating" + } else { + + if {[catch { + #---------- + set boot_installer [punkcheck::installtrack new make.tcl $projectroot/src/bootsupport/.punkcheck] + $boot_installer set_source_target $projectroot $projectroot/src/bootsupport + set boot_event [$boot_installer start_event {-make_step bootsupport}] + #---------- + } errM]} { + puts stderr "Unable to use punkcheck for bootsupport error: $errM" + set boot_event "" + } + + foreach {relpath module} $bootsupport_modules { + set module [string trim $module :] + set module_subpath [string map [list :: /] [namespace qualifiers $module]] + set srclocation [file join $projectroot $relpath $module_subpath] + #puts stdout "$relpath $module $module_subpath $srclocation" + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 + if {![llength $pkgmatches]} { + puts stderr "Missing source for bootsupport module $module - not found in $srclocation" + continue + } + set latestfile [lindex $pkgmatches 0] + set latestver [lindex [split [file rootname $latestfile] -] 1] + foreach m $pkgmatches { + lassign [split [file rootname $m] -] _pkg ver + #puts "comparing $ver vs $latestver" + if {[package vcompare $ver $latestver] == 1} { + set latestver $ver + set latestfile $m + } + } + set srcfile [file join $srclocation $latestfile] + set tgtfile [file join $targetroot $module_subpath $latestfile] + if {$boot_event ne ""} { + #---------- + $boot_event targetset_init INSTALL $tgtfile + $boot_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$boot_event targetset_source_changes] changed]]\ + || [llength [$boot_event get_targets_exist]] < [llength [$boot_event get_targets]]\ + } { + file mkdir [file dirname $tgtfile] ;#ensure containing folder for target exists + $boot_event targetset_started + # -- --- --- --- --- --- + puts "BOOTSUPPORT update: $srcfile -> $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $boot_event targetset_end FAILED + } else { + $boot_event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts -nonewline stderr "." + $boot_event targetset_end SKIPPED + } + $boot_event end + } else { + file copy -force $srcfile $tgtfile + } + } + if {$boot_event ne ""} { + puts \n + $boot_event destroy + $boot_installer destroy + } + } + + if {[llength $bootsupport_module_folders] % 2 != 0} { + #todo - change include_modules.config structure to be line based? we have no way of verifying paired entries because we accept a flat list + puts stderr "WARNING - Skipping bootsupport_module_folders - list should be a list of base subpath pairs" + } else { + foreach {base subfolder} $bootsupport_module_folders { + #user should be careful not to include recursive/cyclic structures e.g module that has a folder which contains other modules from this project + #It will probably work somewhat.. but may make updates confusing.. or worse - start making deeper and deeper copies + set src [file join $projectroot $base $subfolder] + if {![file isdirectory $src]} { + puts stderr "bootsupport folder not found: $src" + continue + } + + #subfolder is the common relative path - so don't include the base in the target path + set tgt [file join $targetroot $subfolder] + file mkdir $tgt + + puts stdout "BOOTSUPPORT non_tm_files $src - copying to $tgt (if source file changed)" + set overwrite "installedsourcechanged-targets" + set resultdict [punkcheck::install_non_tm_files $src $tgt -installer make.tcl -overwrite $overwrite -punkcheck_folder $projectroot/src/bootsupport] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + } + } + + } + } + + bootsupport_localupdate $projectroot + + #/modules/punk/mix/templates/layouts only applies if the project has it's own copy of the punk/mix modules. Generally this should only apply to the punkshell project itself. + set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + foreach project_layout_base $layout_bases { + if {[file exists $project_layout_base]} { + set project_layouts [glob -nocomplain -dir $project_layout_base -type d -tail *] + foreach layoutname $project_layouts { + #don't auto-create src/bootsupport - just update it if it exists + if {[file exists [file join $project_layout_base $layoutname/src/bootsupport]]} { + set antipaths [list\ + README.md\ + ] + set sourcemodules $projectroot/src/bootsupport/modules + set targetroot [file join $project_layout_base $layoutname/src/bootsupport/modules] + file mkdir $targetroot + + puts stdout "BOOTSUPPORT layouts/$layoutname: copying from $sourcemodules to $targetroot (if source file changed)" + set resultdict [punkcheck::install $sourcemodules $targetroot -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + flush stdout + } + } + } else { + puts stderr "No layout base at $project_layout_base" + } + } + puts stdout " bootsupport done " + flush stderr + flush stdout + #punk86 can hang if calling make.tcl via 'run' without this 'after' delay. punk87 unaffected. cause unknown. + #after 500 + ::exit 0 +} + + + +if {$::punkmake::command ne "project"} { + puts stderr "Command $::punkmake::command not implemented - aborting." + flush stderr + after 100 + exit 1 +} + + + +#only a single consolidated /modules folder used for target +set target_modules_base $projectroot/modules +file mkdir $target_modules_base + +#external libs and modules first - and any supporting files - no 'building' required +if {[file exists $sourcefolder/vendorlib]} { + #exclude README.md from source folder - but only the root one + #-antiglob_paths takes relative patterns e.g + # */test.txt will only match test.txt exactly one level deep. + # */*/*.foo will match any path ending in .foo that is exactly 2 levels deep. + # **/test.txt will match at any level below the root (but not in the root) + set antipaths [list\ + README.md\ + ] + + puts stdout "VENDORLIB: copying from $sourcefolder/vendorlib to $projectroot/lib (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/vendorlib $projectroot/lib -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + +} else { + puts stderr "VENDORLIB: No src/vendorlib folder found." +} + +if {[file exists $sourcefolder/vendormodules]} { + #install .tm *and other files* + puts stdout "VENDORMODULES: copying from $sourcefolder/vendormodules to $target_modules_base (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/vendormodules $target_modules_base -installer make.tcl -overwrite installedsourcechanged-targets -antiglob_paths {README.md}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] +} else { + puts stderr "VENDORMODULES: No src/vendormodules folder found." +} + +######################################################## +#templates +#e.g The default project layout is mainly folder structure and readme files - but has some scripts developed under the main src that we want to sync +#src to src/modules/punk/mix/templates/layouts/project/src + +set old_layout_update_list [list\ + [list project $sourcefolder/modules/punk/mix/templates]\ + [list basic $sourcefolder/mixtemplates]\ + ] +set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + +foreach layoutbase $layout_bases { + if {![file exists $layoutbase]} { + continue + } + set project_layouts [glob -nocomplain -dir $layoutbase -type d -tail *] + foreach layoutname $project_layouts { + set config [dict create\ + -make-step sync_layouts\ + ] + #---------- + set tpl_installer [punkcheck::installtrack new make.tcl $layoutbase/.punkcheck] + $tpl_installer set_source_target $sourcefolder $layoutbase + set tpl_event [$tpl_installer start_event $config] + #---------- + set pairs [list] + set pairs [list\ + [list $sourcefolder/build.tcl $layoutbase/$layoutname/src/build.tcl]\ + [list $sourcefolder/make.tcl $layoutbase/$layoutname/src/make.tcl]\ + ] + + foreach filepair $pairs { + lassign $filepair srcfile tgtfile + + file mkdir [file dirname $tgtfile] + #---------- + $tpl_event targetset_init INSTALL $tgtfile + $tpl_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$tpl_event targetset_source_changes] changed]]\ + || [llength [$tpl_event get_targets_exist]] < [llength [$tpl_event get_targets]]\ + } { + $tpl_event targetset_started + # -- --- --- --- --- --- + puts stdout "PROJECT LAYOUT update - layoutname: $layoutname Copying from $srcfile to $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $tpl_event targetset_end FAILED -note "layout:$layoutname copy failed with err: $errM" + } else { + $tpl_event targetset_end OK -note "layout:$layoutname" + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $tpl_event targetset_end SKIPPED + } + } + + $tpl_event end + $tpl_event destroy + $tpl_installer destroy + } +} +######################################################## + + +#default source module folder is at projectroot/src/modules +#There may be multiple other src module folders at same level (e.g folder not being other special-purpose folder and not matching name vendor* that contains at least one .tm file in its root) +set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] +foreach src_module_dir $source_module_folderlist { + puts stderr "Processing source module dir: $src_module_dir" + set dirtail [file tail $src_module_dir] + #modules and associated files belonging to this package/app + set copied [punk::mix::cli::lib::build_modules_from_source_to_base $src_module_dir $target_modules_base -glob *.tm] ;#will only accept a glob ending in .tm + #set copied [list] + puts stdout "--------------------------" + puts stderr "Copied [llength $copied] tm modules from src/$dirtail to $target_modules_base " + puts stdout "--------------------------" + + set overwrite "installedsourcechanged-targets" + #set overwrite "ALL-TARGETS" + puts stdout "MODULEFOLDER non_tm_files $src_module_dir - copying to $target_modules_base (if source file changed)" + set resultdict [punkcheck::install_non_tm_files $src_module_dir $target_modules_base -installer make.tcl -overwrite $overwrite -antiglob_paths {README.md}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] +} + +set installername "make.tcl" + +# ---------------------------------------- +if {[punk::repo::is_fossil_root $projectroot]} { + set config [dict create\ + -make-step configure_fossil\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $projectroot/.punkcheck] + $installer set_source_target $projectroot $projectroot + + set event [$installer start_event $config] + $event targetset_init VIRTUAL fossil_settings_mainmenu ;#VIRTUAL - since there is no actual target file + set menufile $projectroot/.fossil-custom/mainmenu + $event targetset_addsource $menufile + #---------- + + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Configuring fossil setting: mainmenu from: $menufile" + if {[catch { + set fd [open $menufile r] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + exec fossil settings mainmenu $data + } errM]} { + $event targetset_end FAILED -note "fossil update failed: $errM" + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + $event destroy + $installer destroy +} + +set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] +if {$buildfolder ne "$sourcefolder/_build"} { + puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" + puts stdout " -aborted- " + exit 2 +} + + +#find runtimes +set rtfolder $sourcefolder/runtime +set runtimes [glob -nocomplain -dir $rtfolder -types {f x} -tail *] +if {![llength $runtimes]} { + puts stderr "No executable runtimes found in $rtfolder - unable to build any .vfs folders into executables." + puts stderr "Add runtimes to $sourcefolder/runtime if required" + exit 0 +} + +if {[catch {exec sdx help} errM]} { + puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" + puts stderr "err: $errM" + exit 1 +} + +# -- --- --- --- --- --- --- --- --- --- +#load mapvfs.config file (if any) in runtime folder to map runtimes to vfs folders. +#build a dict keyed on runtime executable name. +#If no mapfile (or no mapfile entry for that runtime) - the runtime will be paired with a matching .vfs folder in src folder. e.g punk.exe to src/punk.vfs +#If vfs folders or runtime executables which are explicitly listed in the mapfile don't exist - warn on stderr - but continue. if such nonexistants found; prompt user for whether to continue or abort. +set mapfile $rtfolder/mapvfs.config +set runtime_vfs_map [dict create] +set vfs_runtime_map [dict create] +if {[file exists $mapfile]} { + set fdmap [open $mapfile r] + fconfigure $fdmap -translation binary + set mapdata [read $fdmap] + close $fdmap + set mapdata [string map [list \r\n \n] $mapdata] + set missing [list] + foreach ln [split $mapdata \n] { + set ln [string trim $ln] + if {$ln eq "" || [string match #* $ln]} { + continue + } + set vfspaths [lassign $ln runtime] + if {[string match *.exe $runtime]} { + #.exe is superfluous but allowed + #drop windows .exe suffix so same config can work cross platform - extension will be re-added if necessary later + set runtime [string range $runtime 0 end-4] + } + if {$runtime ne "-"} { + set runtime_test $runtime + if {"windows" eq $::tcl_platform(platform)} { + set runtime_test $runtime.exe + } + if {![file exists [file join $rtfolder $runtime_test]]} { + puts stderr "WARNING: Missing runtime file $rtfolder/$runtime_test (line in mapvfs.config: $ln)" + lappend missing $runtime + } + } + foreach vfs $vfspaths { + if {![file isdirectory [file join $sourcefolder $vfs]]} { + puts stderr "WARNNING: Missing vfs folder [file join $sourcefolder $vfs] specified in mapvfs.config for runtime $runtime" + lappend missing $vfs + } + dict lappend vfs_runtime_map $vfs $runtime + } + if {[dict exists $runtime_vfs_map $runtime]} { + puts stderr "CONFIG FILE ERROR. runtime: $runtime was specified more than once in $mapfile." + exit 3 + } + dict set runtime_vfs_map $runtime $vfspaths + } + if {[llength $missing]} { + puts stderr "WARNING [llength $missing] missing items from $mapfile. (TODO - prompt user to continue/abort)" + foreach m $missing { + puts stderr " $m" + } + puts stderr "continuing..." + } +} +# -- --- --- --- --- --- --- --- --- --- + +set vfs_folders [glob -nocomplain -dir $sourcefolder -types d -tail *.vfs] +#add any extra .vfs folders found in runtime/mapvfs.config file (e.g myotherruntimes/something.vfs) +dict for {vfs -} $vfs_runtime_map { + if {$vfs ni $vfs_folders} { + lappend vfs_folders $vfs + } +} +if {![llength $vfs_folders]} { + puts stdout "No .vfs folders found at '$sourcefolder' - no kits to build" + puts stdout " -done- " + exit 0 +} + +set vfs_folder_changes [dict create] ;#cache whether each .vfs folder has changes so we don't re-run tests if building from same .vfs with multiple runtime executables + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +#set runtimefile [lindex $runtimes 0] +foreach runtimefile $runtimes { + #runtimefile e.g tclkit86bi.exe on windows tclkit86bi on other platforms + + #sdx *may* be pointed to use the runtime we use to build the kit, or the user may manually use this runtime if they don't have tclsh + #sdx will complain if the same runtime is used for the shell as is used in the -runtime argument - so we make a copy (REVIEW) + #if {![file exists $buildfolder/buildruntime.exe]} { + # file copy $rtfolder/$runtimefile $buildfolder/buildruntime.exe + #} + + set basedir $buildfolder + set config [dict create\ + -make-step copy_runtime\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $installer set_source_target $rtfolder $buildfolder + set event [$installer start_event $config] + $event targetset_init INSTALL $buildfolder/build_$runtimefile + $event targetset_addsource $rtfolder/$runtimefile + #---------- + + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Copying runtime from $rtfolder/$runtimefile to $buildfolder/build_$runtimefile" + if {[catch { + file copy -force $rtfolder/$runtimefile $buildfolder/build_$runtimefile + } errM]} { + $event targetset_end FAILED + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + +} + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + +# +# loop over vfs_folders and for each one, loop over configured (or matching) runtimes - build with sdx if source .vfs or source runtime exe has changed. +# we are using punkcheck to install result to buildfolder so we create a .punkcheck file at the target folder to store metadata. +# punkcheck allows us to not rely purely on timestamps (which may be unreliable) +# +set startdir [pwd] +puts stdout "Found [llength $vfs_folders] .vfs folders - building executable for each..." +cd [file dirname $buildfolder] +#root folder mtime is insufficient for change detection. Tree mtime of folders only is a barely passable mechanism for vfs change detection in some circumstances - e.g if files added/removed but never edited in place +#a hash of full tree file & dir mtime may be more reasonable - but it remains to be seen if just tar & checksum is any/much slower. +#Simply rebuilding all the time may be close the speed of detecting change anyway - and almost certainly much faster when there is a change. +#Using first mtime encountered that is later than target is another option - but likely to be highly variable in speed. Last file in the tree could happen to be the latest, and this mechanism doesn't handle build on reversion to older source. +set exe_names_seen [list] +foreach vfs $vfs_folders { + + set vfsname [file rootname $vfs] + puts stdout " Processing vfs $sourcefolder/$vfs" + puts stdout " ------------------------------------" + set skipped_vfs_build 0 + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set basedir $buildfolder + set config [dict create\ + -make-step build_vfs\ + ] + + set runtimes [list] + if {[dict exists $vfs_runtime_map $vfs]} { + set runtimes [dict get $vfs_runtime_map $vfs] ;#map dict is unsuffixed (.exe stripped or was not present) + if {"windows" eq $::tcl_platform(platform)} { + set runtimes_raw $runtimes + set runtimes [list] + foreach rt $runtimes_raw { + if {![string match *.exe $rt] && $rt ne "-"} { + set rt $rt.exe + } + lappend runtimes $rt + } + } + } else { + #only match this vfs to a correspondingly named runtime if there was no explicit entry for that runtime + set matchrt [file rootname [file tail $vfs]] ;#e.g project.vfs -> project + if {![dict exists $runtime_vfs_map $matchrt]} { + if {"windows" eq $::tcl_platform(platform)} { + if {[file exists $rtfolder/$matchrt.exe]} { + lappend runtimes $matchrt.exe + } + } else { + lappend runtimes $matchrt + } + } + } + #assertion $runtimes is a list of executable names suffixed with .exe if on windows - whether or not specified with .exe in the mapvfs.config + + + #todo - non kit based - zipkit? + # $runtimes may now include a dash entry "-" (from mapvfs.config file) + foreach rtname $runtimes { + #rtname of "-" indicates build a kit without a runtime + + #first configured runtime will be the one to use the same name as .vfs folder for output executable. Additional runtimes on this .vfs will need to suffix the runtime name to disambiguate. + #review: This mechanism may not be great for multiplatform builds ? We may be better off consistently combining vfsname and rtname and letting a later platform-specific step choose ones to install in bin with simpler names. + if {$rtname eq "-"} { + set targetkit $vfsname.kit + } else { + if {$::tcl_platform(platform) eq "windows"} { + set targetkit ${vfsname}.exe + } else { + set targetkit $vfsname + } + if {$targetkit in $exe_names_seen} { + #more than one runtime for this .vfs + set targetkit ${vfsname}_$rtname + } + } + lappend exe_names_seen $targetkit + # -- ---------- + set vfs_installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $vfs_installer set_source_target $sourcefolder $buildfolder + set vfs_event [$vfs_installer start_event {-make-step build_vfs}] + $vfs_event targetset_init INSTALL $buildfolder/$targetkit + $vfs_event targetset_addsource $sourcefolder/$vfs + if {$rtname ne "-"} { + $vfs_event targetset_addsource $buildfolder/build_$rtname + } + # -- ---------- + + set changed_unchanged [$vfs_event targetset_source_changes] + + if {[llength [dict get $changed_unchanged changed]] || [llength [$vfs_event get_targets_exist]] < [llength [$vfs_event get_targets]]} { + #source .vfs folder has changes + $vfs_event targetset_started + # -- --- --- --- --- --- + + #use + if {[file exists $buildfolder/$vfsname.new]} { + puts stderr "deleting existing $buildfolder/$vfsname.new" + file delete $buildfolder/$vfsname.new + } + + puts stdout "building $vfsname with sdx.. vfsdir:$vfs cwd: [pwd]" + + + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose + } + } result]} { + if {$rtname ne "-"} { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose failed with msg: $result" + } else { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose failed with msg: $result" + } + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } + + if {![file exists $buildfolder/$vfsname.new]} { + puts stderr "|err> make.tcl build didn't seem to produce output at $sourcefolder/_build/$vfsname.new" + $vfs_event targetset_end FAILED + exit 2 + } + + # -- --- --- + if {$::tcl_platform(platform) eq "windows"} { + set pscmd "tasklist" + } else { + set pscmd "ps" + } + + #killing process doesn't apply to .kit build + if {$rtname ne "-"} { + if {![catch { + exec $pscmd | grep $vfsname + } still_running]} { + + puts stdout "found $vfsname instances still running\n" + set count_killed 0 + foreach ln [split $still_running \n] { + puts stdout " $ln" + + if {$::tcl_platform(platform) eq "windows"} { + set pid [lindex $ln 1] + if {$forcekill} { + set killcmd [list taskkill /F /PID $pid] + } else { + set killcmd [list taskkill /PID $pid] + } + } else { + set pid [lindex $ln 0] + #review! + if {$forcekill} { + set killcmd [list kill -9 $pid] + } else { + set killcmd [list kill $pid] + } + } + puts stdout " pid: $pid (attempting to kill now using '$killcmd')" + if {[catch { + exec {*}$killcmd + } errMsg]} { + puts stderr "$killcmd returned an error:" + puts stderr $errMsg + if {!$forcekill} { + puts stderr "(try '[info script] -k' option to force kill)" + } + #avoid exiting if the kill failure was because the task has already exited + #review - *no running instance* works with windows taskkill - "*No such process*" works with kill -9 on FreeBSD and linux - other platforms? + if {![string match "*no running instance*" $errMsg] && ![string match "*No such process*" $errMsg]} { + exit 4 + } + } else { + puts stderr "$killcmd ran without error" + incr count_killed + } + } + if {$count_killed > 0} { + puts stderr "\nKilled $count_killed processes. Waiting a short time before attempting to delete executable" + after 1000 + } + } else { + puts stderr "Ok.. no running '$vfsname' processes found" + } + } + + if {[file exists $buildfolder/$targetkit]} { + puts stderr "deleting existing $buildfolder/$targetkit" + if {[catch { + file delete $buildfolder/$targetkit + } msg]} { + puts stderr "Failed to delete $buildfolder/$targetkit" + exit 4 + } + } + #WINDOWS filesystem 'tunneling' (file replacement within 15secs) could cause targetkit to copy ctime & shortname metadata from previous file! + #This is probably harmless - but worth being aware of. + file rename $buildfolder/$vfsname.new $buildfolder/$targetkit + # -- --- --- --- --- --- + $vfs_event targetset_end OK + + + after 200 + set deployment_folder [file dirname $sourcefolder]/bin + file mkdir $deployment_folder + + # -- ---------- + set bin_installer [punkcheck::installtrack new "make.tcl" $deployment_folder/.punkcheck] + $bin_installer set_source_target $buildfolder $deployment_folder + set bin_event [$bin_installer start_event {-make-step final_kit_install}] + $bin_event targetset_init INSTALL $deployment_folder/$targetkit + #todo - move final deployment step outside of the build vfs loop? (final deployment can fail and then isn't rerun even though _build and deployed versions differ, unless .vfs modified again) + #set last_completion [$bin_event targetset_last_complete] + + $bin_event targetset_addsource $buildfolder/$targetkit + $bin_event targetset_started + # -- ---------- + + + set delete_failed 0 + if {[file exists $deployment_folder/$targetkit]} { + puts stderr "deleting existing deployed at $deployment_folder/$targetkit" + if {[catch { + file delete $deployment_folder/$targetkit + } errMsg]} { + puts stderr "deletion of deployed version at $deployment_folder/$targetkit failed: $errMsg" + set delete_failed 1 + } + } + if {!$delete_failed} { + puts stdout "copying.." + puts stdout "$buildfolder/$targetkit" + puts stdout "to:" + puts stdout "$deployment_folder/$targetkit" + after 300 + file copy $buildfolder/$targetkit $deployment_folder/$targetkit + # -- ---------- + $bin_event targetset_end OK + # -- ---------- + } else { + $bin_event targetset_end FAILED -note "could not delete" + exit 5 + } + $bin_event destroy + $bin_installer destroy + + } else { + set skipped_vfs_build 1 + puts stderr "." + puts stdout "Skipping build for vfs $vfs with runtime $rtname - no change detected" + $vfs_event targetset_end SKIPPED + } + $vfs_event destroy + $vfs_installer destroy + } ;#end foreach rtname in runtimes + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +} +cd $startdir + +puts stdout "done" +exit 0 + + diff --git a/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/bootsupport/modules/punk/ns-0.1.0.tm index aeb8d89..93e5f44 100644 --- a/src/bootsupport/modules/punk/ns-0.1.0.tm +++ b/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -215,7 +215,7 @@ namespace eval punk::ns { } proc nschildren {fqns} { if {![string match ::* $fqns]} { - error "nschildren only accespts a fully qualified namespace" + error "nschildren only accepts a fully qualified namespace" } set parent [nsprefix $fqns] set tail [nstail $fqns] @@ -225,6 +225,9 @@ namespace eval punk::ns { return [lsort $nslist] } + #Note nsjoin,nsjoinall,nsprefix,nstail are string functions that don't care about namespaces in existence. + #Some functions in punk::ns are + proc nsjoin {prefix name} { if {[string match ::* $name]} { if {"$prefix" ne ""} { @@ -265,6 +268,17 @@ namespace eval punk::ns { } return [join $nonempty_segments ::] } + + + #REVIEW - the combination of nsprefix & nstail are designed to *almost* always be able to reassemble the input, and to be independent of what namespaces actually exist + #The main difference being collapsing (or ignoring) repeated double-colons + #we need to distinguish unprefixed from prefixed ie ::x vs x + #There is an apparent inconsistency with nstail ::a:::x being able to return :x + #whereas nsprefix :::a will return just a + #This is because :x (or even just : ) can in theory be the name of a command and we may need to see it (although it is not a good idea) + #and a namespace can exist with leading colon - but is even worse - as default Tcl commands will misreport e.g namespace current within namespace eval + #The view is taken that a namespace with leading/trailing colons is so error-prone that even introspection is unreliable so we will rule that out. + # proc nsprefix {{nspath ""}} { #normalize the common case of :::: set nspath [string map [list :::: ::] $nspath] @@ -281,8 +295,8 @@ namespace eval punk::ns { } } - #namespace tail which handles :::cmd ::x:::y ::x:::/y etc - #todo - raise error for unexpected sequences such as :::: or more than 2 colons together. + #namespace tail which handles :::cmd ::x:::y ::x:::/y etc in a specific manner for string processing + #review - consider making -strict raise an error for unexpected sequences such as :::: or any situation with more than 2 colons together. proc nstail {nspath args} { #normalize the common case of :::: set nspath [string map [list :::: ::] $nspath] @@ -301,7 +315,7 @@ namespace eval punk::ns { } } - #e.g ::x::y:::z should return ":z" + #e.g ::x::y:::z should return ":z" despite it being a bad idea for a command name. return [lindex $parts end] } @@ -792,7 +806,7 @@ namespace eval punk::ns { } if {$cmd in $aliases && $cmd in $seencmds} { #masked commandless-alias - #assert member of masked - but we use seencmds instead to detect. + #assertion member of masked - but we use seencmds instead to detect. set c [a+ yellow bold] set prefix "${a}als " set prefix [overtype::right $prefix "-R"] @@ -880,12 +894,14 @@ namespace eval punk::ns { foreach nsdict $with_results { dict set opts -nsdict $nsdict set block [get_nslist {*}$opts] - if {[string first \n $block] < 0} { - #single line - set width [textblock::width [list $block]] - } else { - set width [textblock::width $block] - } + #if {[string first \n $block] < 0} { + # #single line + # set width [textblock::width [list $block]] + #} else { + # set width [textblock::width $block] + #} + set width [textblock::width $block] + #if multiple results or if there is only 1 result - but we are path-globbing - then we need to show location if {$count_with_results > 1 || [regexp {[*?]} [nsprefix $ns_absolute]]} { append output \n [dict get $nsdict location] @@ -1356,7 +1372,14 @@ namespace eval punk::ns { proc corp {path} { #thanks to Richard Suchenwirth for the original - wiki.tcl-lang.org/page/corp #Note: modified here to support aliases and relative/absolute name (with respect to namespace .ie ::name vs name) - set indent " " ;#review + if {[info exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set indent [string repeat " " $tw] ;#match + #set indent [string repeat " " $tw] ;#A more sensible default for code - review + if {[info exists ::auto_index($path)]} { set body "\n${indent}#corp# auto_index $::auto_index($path)" } else { @@ -1415,7 +1438,7 @@ namespace eval punk::ns { if {![catch {package require textutil::tabify} errpkg]} { set bodytext [info body $origin] #punk::lib::indent preserves trailing empty lines - unlike textutil version - set bodytext [punk::lib::undent [textutil::untabify2 $bodytext]] + set bodytext [punk::lib::undent [textutil::untabify2 $bodytext $tw]] append body [punk::lib::indent $bodytext $indent] } else { append body [info body $origin] @@ -1522,20 +1545,23 @@ namespace eval punk::ns { #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns - if {[string tolower $pkg_or_existing_ns] in [list :: global]} { - set ns :: - set ver "";# tcl version? - } else { - if {[string match ::* $pkg_or_existing_ns]} { - if {![namespace exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + switch -- [string tolower $pkg_or_existing_ns] { + "::" - global { + set ns :: + set ver "";# tcl version? + } + default { + if {[string match ::* $pkg_or_existing_ns]} { + if {![namespace exists $pkg_or_existing_ns]} { + set ver [package require [string range $pkg_or_existing_ns 2 end]] + } else { + set ver "" + } + set ns $pkg_or_existing_ns } else { - set ver "" + set ver [package require $pkg_or_existing_ns] + set ns ::$pkg_or_existing_ns } - set ns $pkg_or_existing_ns - } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns } } if {[namespace exists $ns]} { diff --git a/src/bootsupport/modules/punkcheck-0.1.0.tm b/src/bootsupport/modules/punkcheck-0.1.0.tm index 2a8aedf..86b174a 100644 --- a/src/bootsupport/modules/punkcheck-0.1.0.tm +++ b/src/bootsupport/modules/punkcheck-0.1.0.tm @@ -1493,53 +1493,57 @@ namespace eval punkcheck { lappend files_copied $current_source_dir/$m incr filecount_new } else { - if {$overwrite_what eq "installedsourcechanged-targets"} { - if {[llength $changed]} { - #An unrecorded installation is considered a source change (from unknown/unrecorded source to recorded) - file copy -force $current_source_dir/$m $current_target_dir - set new_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m] - lappend files_copied $current_source_dir/$m - } else { - set is_skip 1 - lappend files_skipped $current_source_dir/$m - } - } elseif {$overwrite_what eq "synced-targets"} { - if {[llength $changed]} { - #only overwrite if the target checksum equals the last installed checksum (ie target is in sync with source and so hasn't been customized) - set existing_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m] - set is_target_unmodified_since_install 0 - set target_cksum_compare "unknown" - set latest_install_record [punkcheck::recordlist::file_record_latest_installrecord $filerec] ;#may be no such record - in which case we get an empty list - if {[dict exists $latest_install_record -targets_cksums]} { - set last_install_cksum [dict get $latest_install_record -targets_cksums] ;#in this case we know there is only one as 'install' always uses targetset size of 1. (FILEINFO record per file in source folder) - if {[dict get $existing_tgt_cksum_info cksum] eq $last_install_cksum} { - set is_target_unmodified_since_install 1 - set target_cksum_compare "match" - } else { - set target_cksum_compare "nomatch" - } - } else { - set target_cksum_compare "norecord" - } - if {$is_target_unmodified_since_install} { + switch -- $overwrite_what { + installedsourcechanged-targets { + if {[llength $changed]} { + #An unrecorded installation is considered a source change (from unknown/unrecorded source to recorded) file copy -force $current_source_dir/$m $current_target_dir set new_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m] lappend files_copied $current_source_dir/$m } else { - #either cksum is different or we were unable to verify the record. Either way we can't know if the target is in sync so we must skip it set is_skip 1 - puts stderr "Skipping file copy $m target $current_target_dir/$m - require synced_target to overwrite - current target cksum compared to previous install: $target_cksum_compare" lappend files_skipped $current_source_dir/$m } - } else { + } + synced-targets { + if {[llength $changed]} { + #only overwrite if the target checksum equals the last installed checksum (ie target is in sync with source and so hasn't been customized) + set existing_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m] + set is_target_unmodified_since_install 0 + set target_cksum_compare "unknown" + set latest_install_record [punkcheck::recordlist::file_record_latest_installrecord $filerec] ;#may be no such record - in which case we get an empty list + if {[dict exists $latest_install_record -targets_cksums]} { + set last_install_cksum [dict get $latest_install_record -targets_cksums] ;#in this case we know there is only one as 'install' always uses targetset size of 1. (FILEINFO record per file in source folder) + if {[dict get $existing_tgt_cksum_info cksum] eq $last_install_cksum} { + set is_target_unmodified_since_install 1 + set target_cksum_compare "match" + } else { + set target_cksum_compare "nomatch" + } + } else { + set target_cksum_compare "norecord" + } + if {$is_target_unmodified_since_install} { + file copy -force $current_source_dir/$m $current_target_dir + set new_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m] + lappend files_copied $current_source_dir/$m + } else { + #either cksum is different or we were unable to verify the record. Either way we can't know if the target is in sync so we must skip it + set is_skip 1 + puts stderr "Skipping file copy $m target $current_target_dir/$m - require synced_target to overwrite - current target cksum compared to previous install: $target_cksum_compare" + lappend files_skipped $current_source_dir/$m + } + } else { + set is_skip 1 + lappend files_skipped $current_source_dir/$m + } + } + default { set is_skip 1 + puts stderr "Skipping file copy $m target $current_target_dir/$m already exists (use -overwrite all-targets to overwrite)" + #TODO! implement newer-targets older-targets? (note ctimes/mtimes are unreliable - may not be worth implementing) lappend files_skipped $current_source_dir/$m } - } else { - set is_skip 1 - puts stderr "Skipping file copy $m target $current_target_dir/$m already exists (use -overwrite all-targets to overwrite)" - #TODO! implement newer-targets older-targets? (note ctimes/mtimes are unreliable - may not be worth implementing) - lappend files_skipped $current_source_dir/$m } } } @@ -1584,11 +1588,15 @@ namespace eval punkcheck { set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *] set hiddensubdirs [glob -nocomplain -dir $current_source_dir -type {hidden d} -tail *] foreach h $hiddensubdirs { - if {$h in [list "." ".."]} { - continue - } - if {$h ni $subdirs} { - lappend subdirs $h + switch -- $h { + "." - ".." { + continue + } + default { + if {$h ni $subdirs} { + lappend subdirs $h + } + } } } } @@ -1736,8 +1744,24 @@ namespace eval punkcheck { } proc file_install_record_source_changes {install_record} { #reject INSTALLFAILED items ? - if {[dict get $install_record tag] ni [list "QUERY-INPROGRESS" "INSTALL-RECORD" "INSTALL-SKIPPED" "INSTALL-INPROGRESS" "MODIFY-INPROGRESS" "MODIFY-RECORD" "MODIFY-SKIPPED" "VIRTUAL-INPROGRESS" "VIRTUAL-RECORD" "VIRTUAL-SKIPPED" "DELETE-RECORD" "DELETE-INPROGRESS" "DELETE-SKIPPED"]} { - error "file_install_record_source_changes bad install record: tag '[dict get $install_record tag]' not INSTALL-RECORD|SKIPPED|INSTALL-INPROGRESS|MODIFY-RECORD|MODIFY-INPROGRESS|VIRTUAL-RECORD|VIRTUAL-INPROGRESS|DELETE-RECORD|DELETE-INPROGRESS" + switch -- [dict get $install_record tag] { + "QUERY-INPROGRESS" - + "INSTALL-RECORD" - + "INSTALL-SKIPPED" - + "INSTALL-INPROGRESS" - + "MODIFY-INPROGRESS" - + "MODIFY-RECORD" - + "MODIFY-SKIPPED" - + "VIRTUAL-INPROGRESS" - + "VIRTUAL-RECORD" - + "VIRTUAL-SKIPPED" - + "DELETE-RECORD" - + "DELETE-INPROGRESS" - + "DELETE-SKIPPED" { + } + default { + error "file_install_record_source_changes bad install record: tag '[dict get $install_record tag]' not INSTALL-RECORD|SKIPPED|INSTALL-INPROGRESS|MODIFY-RECORD|MODIFY-INPROGRESS|VIRTUAL-RECORD|VIRTUAL-INPROGRESS|DELETE-RECORD|DELETE-INPROGRESS" + } } set source_list [dict_getwithdefault $install_record body [list]] set changed [list] diff --git a/src/bootsupport/modules/uuid-1.0.7.tm b/src/bootsupport/modules/uuid-1.0.7.tm new file mode 100644 index 0000000..fbd43f3 --- /dev/null +++ b/src/bootsupport/modules/uuid-1.0.7.tm @@ -0,0 +1,245 @@ +# uuid.tcl - Copyright (C) 2004 Pat Thoyts +# +# UUIDs are 128 bit values that attempt to be unique in time and space. +# +# Reference: +# http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt +# +# uuid: scheme: +# http://www.globecom.net/ietf/draft/draft-kindel-uuid-uri-00.html +# +# Usage: uuid::uuid generate +# uuid::uuid equal $idA $idB + +package require Tcl 8.5 + +namespace eval uuid { + variable accel + array set accel {critcl 0} + + namespace export uuid + + variable uid + if {![info exists uid]} { + set uid 1 + } + + proc K {a b} {set a} +} + +### +# Optimization +# Caches machine info after the first pass +### + +proc ::uuid::generate_tcl_machinfo {} { + variable machinfo + if {[info exists machinfo]} { + return $machinfo + } + lappend machinfo [clock seconds]; # timestamp + lappend machinfo [clock clicks]; # system incrementing counter + lappend machinfo [info hostname]; # spatial unique id (poor) + lappend machinfo [pid]; # additional entropy + lappend machinfo [array get ::tcl_platform] + + ### + # If we have /dev/urandom just stream 128 bits from that + ### + if {[file exists /dev/urandom]} { + set fin [open /dev/urandom r] + binary scan [read $fin 128] H* machinfo + close $fin + } elseif {[catch {package require nettool}]} { + # More spatial information -- better than hostname. + # bug 1150714: opening a server socket may raise a warning messagebox + # with WinXP firewall, using ipconfig will return all IP addresses + # including ipv6 ones if available. ipconfig is OK on win98+ + if {[string equal $::tcl_platform(platform) "windows"]} { + catch {exec ipconfig} config + lappend machinfo $config + } else { + catch { + set s [socket -server void -myaddr [info hostname] 0] + K [fconfigure $s -sockname] [close $s] + } r + lappend machinfo $r + } + + if {[package provide Tk] != {}} { + lappend machinfo [winfo pointerxy .] + lappend machinfo [winfo id .] + } + } else { + ### + # If the nettool package works on this platform + # use the stream of hardware ids from it + ### + lappend machinfo {*}[::nettool::hwid_list] + } + return $machinfo +} + +# Generates a binary UUID as per the draft spec. We generate a pseudo-random +# type uuid (type 4). See section 3.4 +# +proc ::uuid::generate_tcl {} { + package require md5 2 + variable uid + + set tok [md5::MD5Init] + md5::MD5Update $tok [incr uid]; # package incrementing counter + foreach string [generate_tcl_machinfo] { + md5::MD5Update $tok $string + } + set r [md5::MD5Final $tok] + binary scan $r c* r + + # 3.4: set uuid versioning fields + lset r 8 [expr {([lindex $r 8] & 0x3F) | 0x80}] + lset r 6 [expr {([lindex $r 6] & 0x0F) | 0x40}] + + return [binary format c* $r] +} + +if {[string equal $tcl_platform(platform) "windows"] + && [package provide critcl] != {}} { + namespace eval uuid { + critcl::ccode { + #define WIN32_LEAN_AND_MEAN + #define STRICT + #include + #include + typedef long (__stdcall *LPFNUUIDCREATE)(UUID *); + typedef const unsigned char cu_char; + } + critcl::cproc generate_c {Tcl_Interp* interp} ok { + HRESULT hr = S_OK; + int r = TCL_OK; + UUID uuid = {0}; + HMODULE hLib; + LPFNUUIDCREATE lpfnUuidCreate = NULL; + hLib = LoadLibraryA(("rpcrt4.dll")); + if (hLib) + lpfnUuidCreate = (LPFNUUIDCREATE) + GetProcAddress(hLib, "UuidCreate"); + if (lpfnUuidCreate) { + Tcl_Obj *obj; + lpfnUuidCreate(&uuid); + obj = Tcl_NewByteArrayObj((cu_char *)&uuid, sizeof(uuid)); + Tcl_SetObjResult(interp, obj); + } else { + Tcl_SetResult(interp, "error: failed to create a guid", + TCL_STATIC); + r = TCL_ERROR; + } + return r; + } + } +} + +# Convert a binary uuid into its string representation. +# +proc ::uuid::tostring {uuid} { + binary scan $uuid H* s + foreach {a b} {0 7 8 11 12 15 16 19 20 end} { + append r [string range $s $a $b] - + } + return [string tolower [string trimright $r -]] +} + +# Convert a string representation of a uuid into its binary format. +# +proc ::uuid::fromstring {uuid} { + return [binary format H* [string map {- {}} $uuid]] +} + +# Compare two uuids for equality. +# +proc ::uuid::equal {left right} { + set l [fromstring $left] + set r [fromstring $right] + return [string equal $l $r] +} + +# Call our generate uuid implementation +proc ::uuid::generate {} { + variable accel + if {$accel(critcl)} { + return [generate_c] + } else { + return [generate_tcl] + } +} + +# uuid generate -> string rep of a new uuid +# uuid equal uuid1 uuid2 +# +proc uuid::uuid {cmd args} { + switch -exact -- $cmd { + generate { + if {[llength $args] != 0} { + return -code error "wrong # args:\ + should be \"uuid generate\"" + } + return [tostring [generate]] + } + equal { + if {[llength $args] != 2} { + return -code error "wrong \# args:\ + should be \"uuid equal uuid1 uuid2\"" + } + return [eval [linsert $args 0 equal]] + } + default { + return -code error "bad option \"$cmd\":\ + must be generate or equal" + } + } +} + +# ------------------------------------------------------------------------- + +# LoadAccelerator -- +# +# This package can make use of a number of compiled extensions to +# accelerate the digest computation. This procedure manages the +# use of these extensions within the package. During normal usage +# this should not be called, but the test package manipulates the +# list of enabled accelerators. +# +proc ::uuid::LoadAccelerator {name} { + variable accel + set r 0 + switch -exact -- $name { + critcl { + if {![catch {package require tcllibc}]} { + set r [expr {[info commands ::uuid::generate_c] != {}}] + } + } + default { + return -code error "invalid accelerator package:\ + must be one of [join [array names accel] {, }]" + } + } + set accel($name) $r +} + +# ------------------------------------------------------------------------- + +# Try and load a compiled extension to help. +namespace eval ::uuid { + variable e {} + foreach e {critcl} { + if {[LoadAccelerator $e]} break + } + unset e +} + +package provide uuid 1.0.7 + +# ------------------------------------------------------------------------- +# Local variables: +# mode: tcl +# indent-tabs-mode: nil +# End: