diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 933f55ac..41fc34ee 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -218,7 +218,9 @@ namespace eval punk::ansi { #Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs #most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too #by mapping these we can display regardless. - #nul char - no cp437 image. (which is good - because we use nul as a filler to mean empty column in overtype rendering) + #nul char - no cp437 image but commonly used as space in ansi graphics. + #(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW + dict set cp437_map \u0000 " " ;#space dict set cp437_map \u0001 \u263A ;#smiley dict set cp437_map \u0003 \u263B ;#smiley-filled dict set cp437_map \u0003 \u2665 ;#heart @@ -1597,10 +1599,23 @@ namespace eval punk::ansi { set codestate_initial $codestate_empty ;#keep a copy for resets. set did_reset 0 + #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? + #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? + #we will output 7bit merge of the SGRs even if some or all were 8bit CSi + #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals + #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. + #review - consider a higher-level option for always emitting 8bit or always 7bit + #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. + + #We still output any non SGR codes in the list as they came in - preserving their CSI + foreach c $args { - switch -- [string index $c 1][string index $c end] { + #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes + #.. but preserve original c + set cnorm [string map [list \x9b {8[} ] $c] + switch -- [string index $cnorm 1][string index $cnorm end] { {[m} { - set params [string range $c 2 end-1] ;#strip leading esc lb and trailing m + set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m #some systems use colon for 256 colors or RGB or nonstandard subparameters #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. @@ -2185,6 +2200,15 @@ namespace eval punk::ansi::class { variable o_from_ansistring o_to_ansistring variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed constructor {args} { + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports) + set nspath [namespace path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + namespace path $nspath + #-- -- if {[llength $args] < 2} { error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} } @@ -2251,14 +2275,72 @@ namespace eval punk::ansi::class { } method rendernext {} { upvar ${o_ns_from}::o_ansisplits from_ansisplits - upvar ${o_ns_from}::o_elements elements + upvar ${o_ns_from}::o_elements from_elements + upvar ${o_ns_from}::o_splitindex from_splitindex + + #if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' + if {![llength $from_ansisplits]} { + namespace eval $o_ns_from {my MakeSplit} + } + + set eidx [llength $o_rendereditems] + + #compare what we've rendered so far to our source to confirm they're still in sync + if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} { + puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]" + puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]" + error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring" + } + if {$eidx == [llength $from_elements]} { + #nothing new available + return [dict create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0] + } - if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' - set elements_unrendered [expr {[llength $elements] - [llength $o_rendereditems]}] + set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] #we need to render in pt code chunks - not each grapheme element individually - #translate from element index to ansisplits index? + #translate from element index to ansisplits index + set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to + + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo type_rendered item + #we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc) + #review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ? + #if so - we should report a list of the grapheme types that were rendered in a pt block + #as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway + #e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation. + + #we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element + + set newtext "" + set rendercount 0 + if {$type_rendered eq "g"} { + + set e_splitindex $process_splitindex + while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} { + append newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + + incr eidx + set e_splitindex [lindex $from_splitindex $eidx] + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo _type item + } + } else { + set newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + } - return [dict create count_unrendered $elements_unrendered] + set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}] + assert {$rendercount == $count_rendered} + + #todo - renderline equivalent? + + $o_to_ansistring append $newtext + + return [dict create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] } } @@ -2274,42 +2356,58 @@ namespace eval punk::ansi::class { #As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string. #oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics oo::class create class_ansistring { - variable o_cksum_command - variable o_string - variable o_count + variable o_cksum_command o_string o_count #this is the main state we keep of the split apart string #we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext - variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes - variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split + variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes + variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split #State regarding output renderstring (if any) - variable o_renderout ;#another class_ansistring instance - variable o_renderer ;# punk::ansi::class::renderer::class_ instance + variable o_renderout ;#another class_ansistring instance + variable o_renderer ;# punk::ansi::class::renderer::class_ instance variable o_renderwidth variable o_rendertype - variable o_elements o_sgrstacks ;#elements contains entry for each grapheme/control + each ansi code, stacks has list of ansi sgr codes - variable o_gx0states + # -- per element lookups -- + # llengths should all be the same + # we maintain 4 lookups per entry rather than a single nested list + # it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing. + variable o_elements ;#elements contains entry for each grapheme/control + each ansi code + variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes. + variable o_gx0states ;#0|1 for alternate graphics gx0 + variable o_splitindex ;#entry for each element indicating the index of the split it belongs to. + # -- -- constructor {string} { set o_string $string + + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports) set nspath [namespace path] if {"::punk::ansi::class" ni $nspath} { lappend nspath ::punk::ansi::class } namespace path $nspath + #-- -- #we choose not to generate an internal split-state for the initial string - which may potentially be large. #there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it. #The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string) + set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc. set o_ptlist [list] #o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built. + set o_elements [list] + set o_sgrstacks [list] + set o_gx0states [list] + set o_splitindex [list] + set o_cksum_command [list sha1::sha1 -hex] @@ -2394,16 +2492,22 @@ namespace eval punk::ansi::class { set o_ptlist [list] set codestack [list] set gx0_state 0 ;#default off + set current_split_index 0 ;#incremented for each pt block, incremented for each code foreach {pt code} $o_ansisplits { lappend o_ptlist $pt foreach grapheme [punk::char::grapheme_split $pt] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index } + #after handling the pt block - incr the current_split_index + incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry + #we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end) if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index #maintenance warning - dup in append! if {[punk::ansi::codetype::is_sgr_reset $code]} { @@ -2429,12 +2533,14 @@ namespace eval punk::ansi::class { lappend o_elements [list other $code] } } + #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index + incr current_split_index } - #assertion every grapheme and every individual code has been added to o_elements - #every element has an entry in o_sgrstacks - #every element has an entry in o_gx0states - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} } + #assertion every grapheme and every individual code has been added to o_elements + #every element has an entry in o_sgrstacks + #every element has an entry in o_gx0states + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} } method convert_altg {} { #do we need a method to retrieve without converting in the object? @@ -2617,10 +2723,12 @@ namespace eval punk::ansi::class { } set last_codestack [lindex $o_sgrstacks end] set last_gx0state [lindex $o_gx0states end] + set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex foreach grapheme [punk::char::grapheme_split $catstr] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $last_codestack lappend o_gx0states $last_gx0state + lappend o_splitindex $current_split_index } incr o_count [my DoCount $catstr] } else { @@ -2630,7 +2738,7 @@ namespace eval punk::ansi::class { my MakeSplit set combined_plaintext [join $o_ptlist ""] set o_count [my DoCount $combined_plaintext] - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } else { #update each element of internal state incrementally without reprocessing what is already there. @@ -2639,6 +2747,7 @@ namespace eval punk::ansi::class { set ptnew "" set codestack [lindex $o_sgrstacks end] set gx0_state [lindex $o_gx0states end] + set current_split_index 0 foreach {pt code} $newsplits { lappend o_ptlist $pt append ptnew $pt @@ -2646,10 +2755,13 @@ namespace eval punk::ansi::class { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index } + incr current_split_index ;#increment 1 of 2 within each loop if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list] @@ -2674,6 +2786,7 @@ namespace eval punk::ansi::class { lappend o_elements [list other $code] } } + incr current_split_index ;#increment 2 of 2 } } lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $newsplits 0]] @@ -2681,7 +2794,7 @@ namespace eval punk::ansi::class { incr o_count [my DoCount $ptnew] } } - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } #method append_and_render - append and render up to end of appended data at same time @@ -3556,7 +3669,7 @@ namespace eval punk::ansi::ansistring { #Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc. #The column/row concept works for an ansistring that has been 'rendered' to some defined area. - #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many. + #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows. #return pair of column extents occupied by the character index supplied. #single-width grapheme will return pair of integers of equal value diff --git a/src/testansi/67_Calendar_2020_06_June.ans b/src/testansi/67_Calendar_2020_06_June.ans new file mode 100644 index 00000000..49e0b785 Binary files /dev/null and b/src/testansi/67_Calendar_2020_06_June.ans differ