From a1e9865b80300da5e73835dd57fb16bdaeaae038 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Fri, 15 Mar 2024 00:41:59 +1100 Subject: [PATCH] ansi art cp437 nul char fix + ansistring work --- src/modules/punk/ansi-999999.0a1.0.tm | 161 ++++++++++++++++++---- src/testansi/67_Calendar_2020_06_June.ans | Bin 0 -> 9339 bytes 2 files changed, 137 insertions(+), 24 deletions(-) create mode 100644 src/testansi/67_Calendar_2020_06_June.ans diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 933f55a..41fc34e 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -218,7 +218,9 @@ namespace eval punk::ansi { #Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs #most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too #by mapping these we can display regardless. - #nul char - no cp437 image. (which is good - because we use nul as a filler to mean empty column in overtype rendering) + #nul char - no cp437 image but commonly used as space in ansi graphics. + #(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW + dict set cp437_map \u0000 " " ;#space dict set cp437_map \u0001 \u263A ;#smiley dict set cp437_map \u0003 \u263B ;#smiley-filled dict set cp437_map \u0003 \u2665 ;#heart @@ -1597,10 +1599,23 @@ namespace eval punk::ansi { set codestate_initial $codestate_empty ;#keep a copy for resets. set did_reset 0 + #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? + #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? + #we will output 7bit merge of the SGRs even if some or all were 8bit CSi + #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals + #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. + #review - consider a higher-level option for always emitting 8bit or always 7bit + #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. + + #We still output any non SGR codes in the list as they came in - preserving their CSI + foreach c $args { - switch -- [string index $c 1][string index $c end] { + #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes + #.. but preserve original c + set cnorm [string map [list \x9b {8[} ] $c] + switch -- [string index $cnorm 1][string index $cnorm end] { {[m} { - set params [string range $c 2 end-1] ;#strip leading esc lb and trailing m + set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m #some systems use colon for 256 colors or RGB or nonstandard subparameters #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. @@ -2185,6 +2200,15 @@ namespace eval punk::ansi::class { variable o_from_ansistring o_to_ansistring variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed constructor {args} { + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports) + set nspath [namespace path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + namespace path $nspath + #-- -- if {[llength $args] < 2} { error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} } @@ -2251,14 +2275,72 @@ namespace eval punk::ansi::class { } method rendernext {} { upvar ${o_ns_from}::o_ansisplits from_ansisplits - upvar ${o_ns_from}::o_elements elements + upvar ${o_ns_from}::o_elements from_elements + upvar ${o_ns_from}::o_splitindex from_splitindex + + #if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' + if {![llength $from_ansisplits]} { + namespace eval $o_ns_from {my MakeSplit} + } + + set eidx [llength $o_rendereditems] + + #compare what we've rendered so far to our source to confirm they're still in sync + if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} { + puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]" + puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]" + error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring" + } + if {$eidx == [llength $from_elements]} { + #nothing new available + return [dict create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0] + } - if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' - set elements_unrendered [expr {[llength $elements] - [llength $o_rendereditems]}] + set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] #we need to render in pt code chunks - not each grapheme element individually - #translate from element index to ansisplits index? + #translate from element index to ansisplits index + set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to + + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo type_rendered item + #we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc) + #review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ? + #if so - we should report a list of the grapheme types that were rendered in a pt block + #as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway + #e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation. + + #we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element + + set newtext "" + set rendercount 0 + if {$type_rendered eq "g"} { + + set e_splitindex $process_splitindex + while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} { + append newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + + incr eidx + set e_splitindex [lindex $from_splitindex $eidx] + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo _type item + } + } else { + set newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + } - return [dict create count_unrendered $elements_unrendered] + set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}] + assert {$rendercount == $count_rendered} + + #todo - renderline equivalent? + + $o_to_ansistring append $newtext + + return [dict create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] } } @@ -2274,42 +2356,58 @@ namespace eval punk::ansi::class { #As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string. #oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics oo::class create class_ansistring { - variable o_cksum_command - variable o_string - variable o_count + variable o_cksum_command o_string o_count #this is the main state we keep of the split apart string #we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext - variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes - variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split + variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes + variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split #State regarding output renderstring (if any) - variable o_renderout ;#another class_ansistring instance - variable o_renderer ;# punk::ansi::class::renderer::class_ instance + variable o_renderout ;#another class_ansistring instance + variable o_renderer ;# punk::ansi::class::renderer::class_ instance variable o_renderwidth variable o_rendertype - variable o_elements o_sgrstacks ;#elements contains entry for each grapheme/control + each ansi code, stacks has list of ansi sgr codes - variable o_gx0states + # -- per element lookups -- + # llengths should all be the same + # we maintain 4 lookups per entry rather than a single nested list + # it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing. + variable o_elements ;#elements contains entry for each grapheme/control + each ansi code + variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes. + variable o_gx0states ;#0|1 for alternate graphics gx0 + variable o_splitindex ;#entry for each element indicating the index of the split it belongs to. + # -- -- constructor {string} { set o_string $string + + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports) set nspath [namespace path] if {"::punk::ansi::class" ni $nspath} { lappend nspath ::punk::ansi::class } namespace path $nspath + #-- -- #we choose not to generate an internal split-state for the initial string - which may potentially be large. #there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it. #The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string) + set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc. set o_ptlist [list] #o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built. + set o_elements [list] + set o_sgrstacks [list] + set o_gx0states [list] + set o_splitindex [list] + set o_cksum_command [list sha1::sha1 -hex] @@ -2394,16 +2492,22 @@ namespace eval punk::ansi::class { set o_ptlist [list] set codestack [list] set gx0_state 0 ;#default off + set current_split_index 0 ;#incremented for each pt block, incremented for each code foreach {pt code} $o_ansisplits { lappend o_ptlist $pt foreach grapheme [punk::char::grapheme_split $pt] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index } + #after handling the pt block - incr the current_split_index + incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry + #we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end) if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index #maintenance warning - dup in append! if {[punk::ansi::codetype::is_sgr_reset $code]} { @@ -2429,12 +2533,14 @@ namespace eval punk::ansi::class { lappend o_elements [list other $code] } } + #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index + incr current_split_index } - #assertion every grapheme and every individual code has been added to o_elements - #every element has an entry in o_sgrstacks - #every element has an entry in o_gx0states - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} } + #assertion every grapheme and every individual code has been added to o_elements + #every element has an entry in o_sgrstacks + #every element has an entry in o_gx0states + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} } method convert_altg {} { #do we need a method to retrieve without converting in the object? @@ -2617,10 +2723,12 @@ namespace eval punk::ansi::class { } set last_codestack [lindex $o_sgrstacks end] set last_gx0state [lindex $o_gx0states end] + set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex foreach grapheme [punk::char::grapheme_split $catstr] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $last_codestack lappend o_gx0states $last_gx0state + lappend o_splitindex $current_split_index } incr o_count [my DoCount $catstr] } else { @@ -2630,7 +2738,7 @@ namespace eval punk::ansi::class { my MakeSplit set combined_plaintext [join $o_ptlist ""] set o_count [my DoCount $combined_plaintext] - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } else { #update each element of internal state incrementally without reprocessing what is already there. @@ -2639,6 +2747,7 @@ namespace eval punk::ansi::class { set ptnew "" set codestack [lindex $o_sgrstacks end] set gx0_state [lindex $o_gx0states end] + set current_split_index 0 foreach {pt code} $newsplits { lappend o_ptlist $pt append ptnew $pt @@ -2646,10 +2755,13 @@ namespace eval punk::ansi::class { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index } + incr current_split_index ;#increment 1 of 2 within each loop if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list] @@ -2674,6 +2786,7 @@ namespace eval punk::ansi::class { lappend o_elements [list other $code] } } + incr current_split_index ;#increment 2 of 2 } } lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $newsplits 0]] @@ -2681,7 +2794,7 @@ namespace eval punk::ansi::class { incr o_count [my DoCount $ptnew] } } - assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]} + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } #method append_and_render - append and render up to end of appended data at same time @@ -3556,7 +3669,7 @@ namespace eval punk::ansi::ansistring { #Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc. #The column/row concept works for an ansistring that has been 'rendered' to some defined area. - #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many. + #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows. #return pair of column extents occupied by the character index supplied. #single-width grapheme will return pair of integers of equal value diff --git a/src/testansi/67_Calendar_2020_06_June.ans b/src/testansi/67_Calendar_2020_06_June.ans new file mode 100644 index 0000000000000000000000000000000000000000..49e0b78574ad24c68757b40c353928a1428af192 GIT binary patch literal 9339 zcmbW6&5k5T6@|-NBzCNrO-d{gQvVFyLcKB;LM)Jw5Z=tn3|NeC*^+G*yj4}cPGwcT z2a6WpIrrX($m$*&5sT`|sED}docj|I*>B$6JiWbnx_{W8KHk=7F9y7Mcl-4A;pzT< ze|W<>xk&FlwqEry?9@8=R&$p(@7~@F=HI-#f6T`=oFUkG+%N0u;bH6b-Uf4Bz0}0# z?S5L^^zgWU^X}K5@#SH^u5EM&^#73&z1D2y1j*K0R0{LuEZSQhm-FkrePkLaO0s~A z*7jL+S@1SsEbF`=6=WxkhH3+sv#uQXLoT`q!3%ngoz1ZPOOBUSoHkAq4B{y%uyPT& zZ14x#WiBU9MWbByn!kGATv&o^bAy*nyoo=a^5GQ@6JUSLFBxgZc)M4mP3z#B18lo% zOzBReKx4>vN)AM^%=9xFx2500ah+PvDo++Q<~{78ym1n}PNZO#4|HAy-MTEcyA8$b z-ijm0CFYkwr*+Dihe)E^{VZujc2+CCJR8WJurudw|6I>CBqT{iLJ(GpIPPK$xvI{O zi=kyLhrXl)`WkfwsVf;H@H{x(84$g=D0WABXz~-gq%yfgDP33<>XLv-aE+Dh`s&I# zZAd;VbqR@}&)97idyw-tGgCbF0-y-+gFeTUHJZlTW1i6CAV+oTAE8aH+OVF>tEc0! z`?OaHmV|B05^c2-XI*F2aM)S&y#iswu9F-26uI!3r5yPgAe)%Di&t)0lj=;+1C4^1 z&)fZ2b*`x$H3S!}hXd3aZe?L}Qj$@)9cB@*cave(;nCgm!VEF4D3<`6%G^ndLOC&l zYReNx3sktW=qggX7X|wy6Ks26ZaK-I=0;qYM5#B<{;}wA6 zvw>XsCc6ls8kDn^<#Lv#&eY$hm|&h)60o_D;7@D z;EbZgUk93M17ql{km2vP!qpO!h9R& zz~UE2Y(_|z5J!6|PP)&dQuuzJb+%+y`92rD+h^#k(=ttKu|CeTWKra}l8pf`r#i&b zGh*njxN<4f{CJL}C_>DMC7Vg`JnS3g#=K&DdxNVW*)U8J^v&xb>sopa_nJVn=6YwO zBv5!%6<>7^wH%N`oI5L31(l{wB_;T96r}?QrN|-D8Wlfdixmn*b$w-cR0F&S&AXDIyp8J>)^m5 zv#qT|D*-yVr~m>^x#*Z&I8NsSDb+hi5{X_uh$?eUXBZH{iP&eAnRk}5obOPH0ALLu(hQ#4??kve0zJOYD+t9z z72V4fFY9|6c)`;4HGqt9q_hjIRPe9)N`=G`F0F|tGtLlEmA8t};V2iVYPb?lhj@&o z3LMn5weVA`L#%3J{i^JvOcmJ*hag}EX6if=AqYU$R;6#VFW|&f#)Z@b<8H56mOZovkdE=KA-Ac4QV>*_bku3ZoPm;co~c#> zA5yB0@m;WK1L$+I!gazx1O-D3Vl392_ zhF(S+aq6;LC<~Z!>KavG8)c}Pp9@)=qLK`(RmDO>KVT;c;O@wP8p%PY4LpnrM%BW;%8U^5qv}_f%{h$?i--+!sB*^t4mRm6wNIyW3zt_Q=bFph z>Id?*s--mp!j_g)*rubXXn|dyj!)5K91cP7T&8-X$@y=Hl z#&BYIK3TMqfRXH4p8K1oQ`oyofDpO4h@JWD49MupKVH#Jr%qg(W6u~tm&WP?@^eB3 zveBy451seLeTOd^(9(%J1z~a!9&|XMH9%I!@G9}==@tFoY%zd z;~OrOTh+@ZL(i~8eBqd_o0>7#+0H3KU*%EPRd9(aj&?WuufO>4Rg~I1n4Sq5)2oni znNCCN>?>VV&koh^WlPsMtX{ms%2G+s-_&$IW~mE*W&PXvwpF-((~Mpeb6KA*IvWW{4Qr$5PW>G?ib5434kW4&1!e!?yDvX{KSF5*WZIFly*T(- z4u9FwOTqGO#^sWe%Y8s6ONOvQ1rS(rE(8$OE1(bXI@6ajJRwm8EF zKd88x+PG5fQPDTIkjv%U(3sCB`)j3DSnC<+!T5>u1e(fWI@ZXp&9X~DjMC+Z7z~pi z=PSb}j~UViIV;Y+SnW+byheC1dtqs*rF7%ZnHr{s!}8@)?ed3(JTTt6hHe~9F?7Bt zyd6{UWr|;ON@$yWY9i|(?ajffg0`&hudm<#1EVjR z_J-+~Q`Eow^{-!j^ZuXTfBVhXAKrgg6Yg$qe{*|x^Z1`%>~^1g@~7RM?Cy3y`@?Vl NIDP*6Pvfb5{ug*KKzslI literal 0 HcmV?d00001