diff --git a/src/bootsupport/modules/punk/char-0.1.0.tm b/src/bootsupport/modules/punk/char-0.1.0.tm index 675f42b0..f8123b94 100644 --- a/src/bootsupport/modules/punk/char-0.1.0.tm +++ b/src/bootsupport/modules/punk/char-0.1.0.tm @@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char { if {[tcl::string::last \n $text] >= 0} { error "string_width accepts only a single line" } - tailcall ansifreestring_width $text + #tailcall ansifreestring_width $text + ansifreestring_width $text } #todo - consider disallowing/erroring out when \r \n in string? @@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach c $codes { - if {$c <= 255 && !($c < 31 || $c == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$c <= 255} { + if {$c == 9 || ($c >= 31 && $c != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$c < 917504 || $c > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $c] @@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char { } proc wcswidth_single {char} { scan $char %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { + if {$dec <= 255} { + if {$dec == 9} { + #tab always represented by at least one char in terminal etc. + #caller will need to process tabs themselves to determine extra width applicable to their circumstance. + return 1 + } + if {($dec < 31 || $dec == 127)} { + return 0 + } #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth return 1 @@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char { set width 0 foreach c [split $string {}] { scan $c %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {$dec == 9 || ($dec >= 31 && $dec != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint @@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach dec $codes { - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {($dec ==9 || ($dec >= 31 && $dec != 127))} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcswidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] @@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char { #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth if {!($dec < 31 || $dec == 127)} { - incr width + incr width } } else { #TODO - various other joiners and non-printing chars @@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char { #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here - #c0 controls - set re_ascii_c0 {[\U0000-\U001F]} + #c0 controls + del (127 7f) - tab + #set re_ascii_c0 {[\U0000-\U001F]} + set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} set text [regsub -all $re_ascii_c0 $text ""] #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective @@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char { # return [tcl::string::length $text] #} if {![regexp "\[\uFF-\U10FFFF\]" $text]} { - #return [tcl::string::length $text] - return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii + return [tcl::string::length $text] + #punk::char::wcswidth has to split and examine dec value of each code + #By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW + #return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii } #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? diff --git a/src/bootsupport/modules/textblock-0.1.3.tm b/src/bootsupport/modules/textblock-0.1.3.tm index c102ca29..4a7e3c32 100644 --- a/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/bootsupport/modules/textblock-0.1.3.tm @@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock { set RST [a] proc frame_samples {} { + #@ for example 50us per frame and 16 frames - we are already at 800us + #As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up. + #textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call + #frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of + #adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display. + set FRAMETYPELABELS [dict create] if {[info commands ::textblock::frame] ne ""} { foreach ft [frametypes] { @@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock { -checkargs -default 1 -type boolean\ -help "If true do extra argument checks and provide more comprehensive error info. - Set false for slight performance improvement." + As the argument parser loads around 16 default frame + samples dynamically, this can add add up as each may + take 10s of microseconds. For many-framed tables + and other applications this can add up. + Set false for performance improvement." -etabs -default 0\ -help "expanding tabs - experimental/unimplemented." -type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\ @@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock { #use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache set optnames [tcl::dict::keys $opts] set opts_ok 1 ;#default assumption + #NOTE: mis-spelling options in this list can trigger $opt_ok false + #and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables foreach {k v} $optlist { set k2 [tcl::prefix::match -error "" $optnames $k] switch -- $k2 { - -etabs - -type - -boxlimits - -boxmap - -join + -etabs - -type - -boxlimits - -boxmap - -joins - -title - -titlealign - -subtitle - -subtitlealign - -width - -height - -ansiborder - -ansibase - -blockalign - -textalign - -ellipsis @@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock { #only use punk::args if check_args is true or our basic checks failed #never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame if {[llength $args] != 1 && (!$opts_ok || $check_args)} { + #as frame is called a lot within table building - checking args can have a *big* impact on final performance. set argd [punk::args::get_by_id ::textblock::frame $args] set opts [dict get $argd opts] set contents [dict get $argd values contents] diff --git a/src/modules/punk/char-999999.0a1.0.tm b/src/modules/punk/char-999999.0a1.0.tm index 197a30a9..a021bf42 100644 --- a/src/modules/punk/char-999999.0a1.0.tm +++ b/src/modules/punk/char-999999.0a1.0.tm @@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char { if {[tcl::string::last \n $text] >= 0} { error "string_width accepts only a single line" } - tailcall ansifreestring_width $text + #tailcall ansifreestring_width $text + ansifreestring_width $text } #todo - consider disallowing/erroring out when \r \n in string? @@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach c $codes { - if {$c <= 255 && !($c < 31 || $c == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$c <= 255} { + if {$c == 9 || ($c >= 31 && $c != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$c < 917504 || $c > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $c] @@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char { } proc wcswidth_single {char} { scan $char %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { + if {$dec <= 255} { + if {$dec == 9} { + #tab always represented by at least one char in terminal etc. + #caller will need to process tabs themselves to determine extra width applicable to their circumstance. + return 1 + } + if {($dec < 31 || $dec == 127)} { + return 0 + } #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth return 1 @@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char { set width 0 foreach c [split $string {}] { scan $c %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {$dec == 9 || ($dec >= 31 && $dec != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint @@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach dec $codes { - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {($dec ==9 || ($dec >= 31 && $dec != 127))} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcswidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] @@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char { #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth if {!($dec < 31 || $dec == 127)} { - incr width + incr width } } else { #TODO - various other joiners and non-printing chars @@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char { #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here - #c0 controls - set re_ascii_c0 {[\U0000-\U001F]} + #c0 controls + del (127 7f) - tab + #set re_ascii_c0 {[\U0000-\U001F]} + set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} set text [regsub -all $re_ascii_c0 $text ""] #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective @@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char { # return [tcl::string::length $text] #} if {![regexp "\[\uFF-\U10FFFF\]" $text]} { - #return [tcl::string::length $text] - return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii + return [tcl::string::length $text] + #punk::char::wcswidth has to split and examine dec value of each code + #By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW + #return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii } #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? diff --git a/src/modules/textblock-999999.0a1.0.tm b/src/modules/textblock-999999.0a1.0.tm index cc3d24c6..971d7331 100644 --- a/src/modules/textblock-999999.0a1.0.tm +++ b/src/modules/textblock-999999.0a1.0.tm @@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock { set RST [a] proc frame_samples {} { + #@ for example 50us per frame and 16 frames - we are already at 800us + #As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up. + #textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call + #frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of + #adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display. + set FRAMETYPELABELS [dict create] if {[info commands ::textblock::frame] ne ""} { foreach ft [frametypes] { @@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock { -checkargs -default 1 -type boolean\ -help "If true do extra argument checks and provide more comprehensive error info. - Set false for slight performance improvement." + As the argument parser loads around 16 default frame + samples dynamically, this can add add up as each may + take 10s of microseconds. For many-framed tables + and other applications this can add up. + Set false for performance improvement." -etabs -default 0\ -help "expanding tabs - experimental/unimplemented." -type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\ @@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock { #use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache set optnames [tcl::dict::keys $opts] set opts_ok 1 ;#default assumption + #NOTE: mis-spelling options in this list can trigger $opt_ok false + #and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables foreach {k v} $optlist { set k2 [tcl::prefix::match -error "" $optnames $k] switch -- $k2 { - -etabs - -type - -boxlimits - -boxmap - -join + -etabs - -type - -boxlimits - -boxmap - -joins - -title - -titlealign - -subtitle - -subtitlealign - -width - -height - -ansiborder - -ansibase - -blockalign - -textalign - -ellipsis @@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock { #only use punk::args if check_args is true or our basic checks failed #never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame if {[llength $args] != 1 && (!$opts_ok || $check_args)} { + #as frame is called a lot within table building - checking args can have a *big* impact on final performance. set argd [punk::args::get_by_id ::textblock::frame $args] set opts [dict get $argd opts] set contents [dict get $argd values contents] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm index 675f42b0..f8123b94 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm @@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char { if {[tcl::string::last \n $text] >= 0} { error "string_width accepts only a single line" } - tailcall ansifreestring_width $text + #tailcall ansifreestring_width $text + ansifreestring_width $text } #todo - consider disallowing/erroring out when \r \n in string? @@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach c $codes { - if {$c <= 255 && !($c < 31 || $c == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$c <= 255} { + if {$c == 9 || ($c >= 31 && $c != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$c < 917504 || $c > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $c] @@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char { } proc wcswidth_single {char} { scan $char %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { + if {$dec <= 255} { + if {$dec == 9} { + #tab always represented by at least one char in terminal etc. + #caller will need to process tabs themselves to determine extra width applicable to their circumstance. + return 1 + } + if {($dec < 31 || $dec == 127)} { + return 0 + } #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth return 1 @@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char { set width 0 foreach c [split $string {}] { scan $c %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {$dec == 9 || ($dec >= 31 && $dec != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint @@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach dec $codes { - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {($dec ==9 || ($dec >= 31 && $dec != 127))} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcswidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] @@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char { #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth if {!($dec < 31 || $dec == 127)} { - incr width + incr width } } else { #TODO - various other joiners and non-printing chars @@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char { #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here - #c0 controls - set re_ascii_c0 {[\U0000-\U001F]} + #c0 controls + del (127 7f) - tab + #set re_ascii_c0 {[\U0000-\U001F]} + set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} set text [regsub -all $re_ascii_c0 $text ""] #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective @@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char { # return [tcl::string::length $text] #} if {![regexp "\[\uFF-\U10FFFF\]" $text]} { - #return [tcl::string::length $text] - return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii + return [tcl::string::length $text] + #punk::char::wcswidth has to split and examine dec value of each code + #By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW + #return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii } #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm index c102ca29..4a7e3c32 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm @@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock { set RST [a] proc frame_samples {} { + #@ for example 50us per frame and 16 frames - we are already at 800us + #As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up. + #textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call + #frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of + #adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display. + set FRAMETYPELABELS [dict create] if {[info commands ::textblock::frame] ne ""} { foreach ft [frametypes] { @@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock { -checkargs -default 1 -type boolean\ -help "If true do extra argument checks and provide more comprehensive error info. - Set false for slight performance improvement." + As the argument parser loads around 16 default frame + samples dynamically, this can add add up as each may + take 10s of microseconds. For many-framed tables + and other applications this can add up. + Set false for performance improvement." -etabs -default 0\ -help "expanding tabs - experimental/unimplemented." -type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\ @@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock { #use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache set optnames [tcl::dict::keys $opts] set opts_ok 1 ;#default assumption + #NOTE: mis-spelling options in this list can trigger $opt_ok false + #and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables foreach {k v} $optlist { set k2 [tcl::prefix::match -error "" $optnames $k] switch -- $k2 { - -etabs - -type - -boxlimits - -boxmap - -join + -etabs - -type - -boxlimits - -boxmap - -joins - -title - -titlealign - -subtitle - -subtitlealign - -width - -height - -ansiborder - -ansibase - -blockalign - -textalign - -ellipsis @@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock { #only use punk::args if check_args is true or our basic checks failed #never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame if {[llength $args] != 1 && (!$opts_ok || $check_args)} { + #as frame is called a lot within table building - checking args can have a *big* impact on final performance. set argd [punk::args::get_by_id ::textblock::frame $args] set opts [dict get $argd opts] set contents [dict get $argd values contents] diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm index 675f42b0..f8123b94 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm @@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char { if {[tcl::string::last \n $text] >= 0} { error "string_width accepts only a single line" } - tailcall ansifreestring_width $text + #tailcall ansifreestring_width $text + ansifreestring_width $text } #todo - consider disallowing/erroring out when \r \n in string? @@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach c $codes { - if {$c <= 255 && !($c < 31 || $c == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$c <= 255} { + if {$c == 9 || ($c >= 31 && $c != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$c < 917504 || $c > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $c] @@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char { } proc wcswidth_single {char} { scan $char %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { + if {$dec <= 255} { + if {$dec == 9} { + #tab always represented by at least one char in terminal etc. + #caller will need to process tabs themselves to determine extra width applicable to their circumstance. + return 1 + } + if {($dec < 31 || $dec == 127)} { + return 0 + } #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth return 1 @@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char { set width 0 foreach c [split $string {}] { scan $c %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {$dec == 9 || ($dec >= 31 && $dec != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint @@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach dec $codes { - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {($dec ==9 || ($dec >= 31 && $dec != 127))} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcswidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] @@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char { #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth if {!($dec < 31 || $dec == 127)} { - incr width + incr width } } else { #TODO - various other joiners and non-printing chars @@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char { #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here - #c0 controls - set re_ascii_c0 {[\U0000-\U001F]} + #c0 controls + del (127 7f) - tab + #set re_ascii_c0 {[\U0000-\U001F]} + set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} set text [regsub -all $re_ascii_c0 $text ""] #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective @@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char { # return [tcl::string::length $text] #} if {![regexp "\[\uFF-\U10FFFF\]" $text]} { - #return [tcl::string::length $text] - return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii + return [tcl::string::length $text] + #punk::char::wcswidth has to split and examine dec value of each code + #By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW + #return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii } #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm index c102ca29..4a7e3c32 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm @@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock { set RST [a] proc frame_samples {} { + #@ for example 50us per frame and 16 frames - we are already at 800us + #As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up. + #textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call + #frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of + #adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display. + set FRAMETYPELABELS [dict create] if {[info commands ::textblock::frame] ne ""} { foreach ft [frametypes] { @@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock { -checkargs -default 1 -type boolean\ -help "If true do extra argument checks and provide more comprehensive error info. - Set false for slight performance improvement." + As the argument parser loads around 16 default frame + samples dynamically, this can add add up as each may + take 10s of microseconds. For many-framed tables + and other applications this can add up. + Set false for performance improvement." -etabs -default 0\ -help "expanding tabs - experimental/unimplemented." -type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\ @@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock { #use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache set optnames [tcl::dict::keys $opts] set opts_ok 1 ;#default assumption + #NOTE: mis-spelling options in this list can trigger $opt_ok false + #and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables foreach {k v} $optlist { set k2 [tcl::prefix::match -error "" $optnames $k] switch -- $k2 { - -etabs - -type - -boxlimits - -boxmap - -join + -etabs - -type - -boxlimits - -boxmap - -joins - -title - -titlealign - -subtitle - -subtitlealign - -width - -height - -ansiborder - -ansibase - -blockalign - -textalign - -ellipsis @@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock { #only use punk::args if check_args is true or our basic checks failed #never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame if {[llength $args] != 1 && (!$opts_ok || $check_args)} { + #as frame is called a lot within table building - checking args can have a *big* impact on final performance. set argd [punk::args::get_by_id ::textblock::frame $args] set opts [dict get $argd opts] set contents [dict get $argd values contents] diff --git a/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm index 675f42b0..f8123b94 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm @@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char { if {[tcl::string::last \n $text] >= 0} { error "string_width accepts only a single line" } - tailcall ansifreestring_width $text + #tailcall ansifreestring_width $text + ansifreestring_width $text } #todo - consider disallowing/erroring out when \r \n in string? @@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach c $codes { - if {$c <= 255 && !($c < 31 || $c == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$c <= 255} { + if {$c == 9 || ($c >= 31 && $c != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$c < 917504 || $c > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $c] @@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char { } proc wcswidth_single {char} { scan $char %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { + if {$dec <= 255} { + if {$dec == 9} { + #tab always represented by at least one char in terminal etc. + #caller will need to process tabs themselves to determine extra width applicable to their circumstance. + return 1 + } + if {($dec < 31 || $dec == 127)} { + return 0 + } #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth return 1 @@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char { set width 0 foreach c [split $string {}] { scan $c %c dec - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {$dec == 9 || ($dec >= 31 && $dec != 127)} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcwidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint @@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char { set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] foreach dec $codes { - if {$dec <= 255 && !($dec < 31 || $dec == 127)} { - #review - non-printing ascii? why does textutil::wcswidth report 1 ?? - #todo - compare with python or other lang wcwidth - incr width + if {$dec <= 255} { + if {($dec ==9 || ($dec >= 31 && $dec != 127))} { + #review - non-printing ascii? why does textutil::wcswidth report 1 ?? + #todo - compare with python or other lang wcswidth + incr width + } } elseif {$dec < 917504 || $dec > 917631} { #TODO - various other joiners and non-printing chars set w [textutil::wcswidth_char $dec] @@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char { #review - non-printing ascii? why does textutil::wcswidth report 1 ?? #todo - compare with python or other lang wcwidth if {!($dec < 31 || $dec == 127)} { - incr width + incr width } } else { #TODO - various other joiners and non-printing chars @@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char { #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #todo - document that these shouldn't be present in input rather than explicitly checking here - #c0 controls - set re_ascii_c0 {[\U0000-\U001F]} + #c0 controls + del (127 7f) - tab + #set re_ascii_c0 {[\U0000-\U001F]} + set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} set text [regsub -all $re_ascii_c0 $text ""] #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective @@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char { # return [tcl::string::length $text] #} if {![regexp "\[\uFF-\U10FFFF\]" $text]} { - #return [tcl::string::length $text] - return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii + return [tcl::string::length $text] + #punk::char::wcswidth has to split and examine dec value of each code + #By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW + #return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii } #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? diff --git a/src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm b/src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm index c102ca29..4a7e3c32 100644 --- a/src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm +++ b/src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm @@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock { set RST [a] proc frame_samples {} { + #@ for example 50us per frame and 16 frames - we are already at 800us + #As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up. + #textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call + #frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of + #adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display. + set FRAMETYPELABELS [dict create] if {[info commands ::textblock::frame] ne ""} { foreach ft [frametypes] { @@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock { -checkargs -default 1 -type boolean\ -help "If true do extra argument checks and provide more comprehensive error info. - Set false for slight performance improvement." + As the argument parser loads around 16 default frame + samples dynamically, this can add add up as each may + take 10s of microseconds. For many-framed tables + and other applications this can add up. + Set false for performance improvement." -etabs -default 0\ -help "expanding tabs - experimental/unimplemented." -type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\ @@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock { #use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache set optnames [tcl::dict::keys $opts] set opts_ok 1 ;#default assumption + #NOTE: mis-spelling options in this list can trigger $opt_ok false + #and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables foreach {k v} $optlist { set k2 [tcl::prefix::match -error "" $optnames $k] switch -- $k2 { - -etabs - -type - -boxlimits - -boxmap - -join + -etabs - -type - -boxlimits - -boxmap - -joins - -title - -titlealign - -subtitle - -subtitlealign - -width - -height - -ansiborder - -ansibase - -blockalign - -textalign - -ellipsis @@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock { #only use punk::args if check_args is true or our basic checks failed #never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame if {[llength $args] != 1 && (!$opts_ok || $check_args)} { + #as frame is called a lot within table building - checking args can have a *big* impact on final performance. set argd [punk::args::get_by_id ::textblock::frame $args] set opts [dict get $argd opts] set contents [dict get $argd values contents]