Browse Source

fix textblock::frame slowdown affecting tables (inadvertent arg parsing on happy path)

master
Julian Noble 1 day ago
parent
commit
220741a8f5
  1. 56
      src/bootsupport/modules/punk/char-0.1.0.tm
  2. 17
      src/bootsupport/modules/textblock-0.1.3.tm
  3. 56
      src/modules/punk/char-999999.0a1.0.tm
  4. 17
      src/modules/textblock-999999.0a1.0.tm
  5. 56
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
  6. 17
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm
  7. 56
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
  8. 17
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm
  9. 56
      src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm
  10. 17
      src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm

56
src/bootsupport/modules/punk/char-0.1.0.tm

@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
if {[tcl::string::last \n $text] >= 0} {
error "string_width accepts only a single line"
}
tailcall ansifreestring_width $text
#tailcall ansifreestring_width $text
ansifreestring_width $text
}
#todo - consider disallowing/erroring out when \r \n in string?
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$c <= 255} {
if {$c == 9 || ($c >= 31 && $c != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
}
proc wcswidth_single {char} {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
if {$dec <= 255} {
if {$dec == 9} {
#tab always represented by at least one char in terminal etc.
#caller will need to process tabs themselves to determine extra width applicable to their circumstance.
return 1
}
if {($dec < 31 || $dec == 127)} {
return 0
}
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
set width 0
foreach c [split $string {}] {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {$dec == 9 || ($dec >= 31 && $dec != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {($dec ==9 || ($dec >= 31 && $dec != 127))} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcswidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
#c0 controls + del (127 7f) - tab
#set re_ascii_c0 {[\U0000-\U001F]}
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
# return [tcl::string::length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
#return [tcl::string::length $text]
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
return [tcl::string::length $text]
#punk::char::wcswidth has to split and examine dec value of each code
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?

17
src/bootsupport/modules/textblock-0.1.3.tm

@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock {
set RST [a]
proc frame_samples {} {
#@ for example 50us per frame and 16 frames - we are already at 800us
#As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up.
#textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call
#frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of
#adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display.
set FRAMETYPELABELS [dict create]
if {[info commands ::textblock::frame] ne ""} {
foreach ft [frametypes] {
@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock {
-checkargs -default 1 -type boolean\
-help "If true do extra argument checks and
provide more comprehensive error info.
Set false for slight performance improvement."
As the argument parser loads around 16 default frame
samples dynamically, this can add add up as each may
take 10s of microseconds. For many-framed tables
and other applications this can add up.
Set false for performance improvement."
-etabs -default 0\
-help "expanding tabs - experimental/unimplemented."
-type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\
@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock {
#use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache
set optnames [tcl::dict::keys $opts]
set opts_ok 1 ;#default assumption
#NOTE: mis-spelling options in this list can trigger $opt_ok false
#and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables
foreach {k v} $optlist {
set k2 [tcl::prefix::match -error "" $optnames $k]
switch -- $k2 {
-etabs - -type - -boxlimits - -boxmap - -join
-etabs - -type - -boxlimits - -boxmap - -joins
- -title - -titlealign - -subtitle - -subtitlealign - -width - -height
- -ansiborder - -ansibase
- -blockalign - -textalign - -ellipsis
@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock {
#only use punk::args if check_args is true or our basic checks failed
#never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame
if {[llength $args] != 1 && (!$opts_ok || $check_args)} {
#as frame is called a lot within table building - checking args can have a *big* impact on final performance.
set argd [punk::args::get_by_id ::textblock::frame $args]
set opts [dict get $argd opts]
set contents [dict get $argd values contents]

56
src/modules/punk/char-999999.0a1.0.tm

@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
if {[tcl::string::last \n $text] >= 0} {
error "string_width accepts only a single line"
}
tailcall ansifreestring_width $text
#tailcall ansifreestring_width $text
ansifreestring_width $text
}
#todo - consider disallowing/erroring out when \r \n in string?
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$c <= 255} {
if {$c == 9 || ($c >= 31 && $c != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
}
proc wcswidth_single {char} {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
if {$dec <= 255} {
if {$dec == 9} {
#tab always represented by at least one char in terminal etc.
#caller will need to process tabs themselves to determine extra width applicable to their circumstance.
return 1
}
if {($dec < 31 || $dec == 127)} {
return 0
}
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
set width 0
foreach c [split $string {}] {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {$dec == 9 || ($dec >= 31 && $dec != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {($dec ==9 || ($dec >= 31 && $dec != 127))} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcswidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
#c0 controls + del (127 7f) - tab
#set re_ascii_c0 {[\U0000-\U001F]}
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
# return [tcl::string::length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
#return [tcl::string::length $text]
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
return [tcl::string::length $text]
#punk::char::wcswidth has to split and examine dec value of each code
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?

17
src/modules/textblock-999999.0a1.0.tm

@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock {
set RST [a]
proc frame_samples {} {
#@ for example 50us per frame and 16 frames - we are already at 800us
#As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up.
#textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call
#frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of
#adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display.
set FRAMETYPELABELS [dict create]
if {[info commands ::textblock::frame] ne ""} {
foreach ft [frametypes] {
@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock {
-checkargs -default 1 -type boolean\
-help "If true do extra argument checks and
provide more comprehensive error info.
Set false for slight performance improvement."
As the argument parser loads around 16 default frame
samples dynamically, this can add add up as each may
take 10s of microseconds. For many-framed tables
and other applications this can add up.
Set false for performance improvement."
-etabs -default 0\
-help "expanding tabs - experimental/unimplemented."
-type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\
@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock {
#use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache
set optnames [tcl::dict::keys $opts]
set opts_ok 1 ;#default assumption
#NOTE: mis-spelling options in this list can trigger $opt_ok false
#and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables
foreach {k v} $optlist {
set k2 [tcl::prefix::match -error "" $optnames $k]
switch -- $k2 {
-etabs - -type - -boxlimits - -boxmap - -join
-etabs - -type - -boxlimits - -boxmap - -joins
- -title - -titlealign - -subtitle - -subtitlealign - -width - -height
- -ansiborder - -ansibase
- -blockalign - -textalign - -ellipsis
@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock {
#only use punk::args if check_args is true or our basic checks failed
#never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame
if {[llength $args] != 1 && (!$opts_ok || $check_args)} {
#as frame is called a lot within table building - checking args can have a *big* impact on final performance.
set argd [punk::args::get_by_id ::textblock::frame $args]
set opts [dict get $argd opts]
set contents [dict get $argd values contents]

56
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm

@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
if {[tcl::string::last \n $text] >= 0} {
error "string_width accepts only a single line"
}
tailcall ansifreestring_width $text
#tailcall ansifreestring_width $text
ansifreestring_width $text
}
#todo - consider disallowing/erroring out when \r \n in string?
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$c <= 255} {
if {$c == 9 || ($c >= 31 && $c != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
}
proc wcswidth_single {char} {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
if {$dec <= 255} {
if {$dec == 9} {
#tab always represented by at least one char in terminal etc.
#caller will need to process tabs themselves to determine extra width applicable to their circumstance.
return 1
}
if {($dec < 31 || $dec == 127)} {
return 0
}
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
set width 0
foreach c [split $string {}] {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {$dec == 9 || ($dec >= 31 && $dec != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {($dec ==9 || ($dec >= 31 && $dec != 127))} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcswidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
#c0 controls + del (127 7f) - tab
#set re_ascii_c0 {[\U0000-\U001F]}
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
# return [tcl::string::length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
#return [tcl::string::length $text]
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
return [tcl::string::length $text]
#punk::char::wcswidth has to split and examine dec value of each code
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?

17
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm

@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock {
set RST [a]
proc frame_samples {} {
#@ for example 50us per frame and 16 frames - we are already at 800us
#As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up.
#textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call
#frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of
#adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display.
set FRAMETYPELABELS [dict create]
if {[info commands ::textblock::frame] ne ""} {
foreach ft [frametypes] {
@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock {
-checkargs -default 1 -type boolean\
-help "If true do extra argument checks and
provide more comprehensive error info.
Set false for slight performance improvement."
As the argument parser loads around 16 default frame
samples dynamically, this can add add up as each may
take 10s of microseconds. For many-framed tables
and other applications this can add up.
Set false for performance improvement."
-etabs -default 0\
-help "expanding tabs - experimental/unimplemented."
-type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\
@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock {
#use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache
set optnames [tcl::dict::keys $opts]
set opts_ok 1 ;#default assumption
#NOTE: mis-spelling options in this list can trigger $opt_ok false
#and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables
foreach {k v} $optlist {
set k2 [tcl::prefix::match -error "" $optnames $k]
switch -- $k2 {
-etabs - -type - -boxlimits - -boxmap - -join
-etabs - -type - -boxlimits - -boxmap - -joins
- -title - -titlealign - -subtitle - -subtitlealign - -width - -height
- -ansiborder - -ansibase
- -blockalign - -textalign - -ellipsis
@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock {
#only use punk::args if check_args is true or our basic checks failed
#never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame
if {[llength $args] != 1 && (!$opts_ok || $check_args)} {
#as frame is called a lot within table building - checking args can have a *big* impact on final performance.
set argd [punk::args::get_by_id ::textblock::frame $args]
set opts [dict get $argd opts]
set contents [dict get $argd values contents]

56
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm

@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
if {[tcl::string::last \n $text] >= 0} {
error "string_width accepts only a single line"
}
tailcall ansifreestring_width $text
#tailcall ansifreestring_width $text
ansifreestring_width $text
}
#todo - consider disallowing/erroring out when \r \n in string?
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$c <= 255} {
if {$c == 9 || ($c >= 31 && $c != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
}
proc wcswidth_single {char} {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
if {$dec <= 255} {
if {$dec == 9} {
#tab always represented by at least one char in terminal etc.
#caller will need to process tabs themselves to determine extra width applicable to their circumstance.
return 1
}
if {($dec < 31 || $dec == 127)} {
return 0
}
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
set width 0
foreach c [split $string {}] {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {$dec == 9 || ($dec >= 31 && $dec != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {($dec ==9 || ($dec >= 31 && $dec != 127))} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcswidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
#c0 controls + del (127 7f) - tab
#set re_ascii_c0 {[\U0000-\U001F]}
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
# return [tcl::string::length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
#return [tcl::string::length $text]
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
return [tcl::string::length $text]
#punk::char::wcswidth has to split and examine dec value of each code
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?

17
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm

@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock {
set RST [a]
proc frame_samples {} {
#@ for example 50us per frame and 16 frames - we are already at 800us
#As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up.
#textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call
#frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of
#adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display.
set FRAMETYPELABELS [dict create]
if {[info commands ::textblock::frame] ne ""} {
foreach ft [frametypes] {
@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock {
-checkargs -default 1 -type boolean\
-help "If true do extra argument checks and
provide more comprehensive error info.
Set false for slight performance improvement."
As the argument parser loads around 16 default frame
samples dynamically, this can add add up as each may
take 10s of microseconds. For many-framed tables
and other applications this can add up.
Set false for performance improvement."
-etabs -default 0\
-help "expanding tabs - experimental/unimplemented."
-type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\
@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock {
#use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache
set optnames [tcl::dict::keys $opts]
set opts_ok 1 ;#default assumption
#NOTE: mis-spelling options in this list can trigger $opt_ok false
#and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables
foreach {k v} $optlist {
set k2 [tcl::prefix::match -error "" $optnames $k]
switch -- $k2 {
-etabs - -type - -boxlimits - -boxmap - -join
-etabs - -type - -boxlimits - -boxmap - -joins
- -title - -titlealign - -subtitle - -subtitlealign - -width - -height
- -ansiborder - -ansibase
- -blockalign - -textalign - -ellipsis
@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock {
#only use punk::args if check_args is true or our basic checks failed
#never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame
if {[llength $args] != 1 && (!$opts_ok || $check_args)} {
#as frame is called a lot within table building - checking args can have a *big* impact on final performance.
set argd [punk::args::get_by_id ::textblock::frame $args]
set opts [dict get $argd opts]
set contents [dict get $argd values contents]

56
src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm

@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
if {[tcl::string::last \n $text] >= 0} {
error "string_width accepts only a single line"
}
tailcall ansifreestring_width $text
#tailcall ansifreestring_width $text
ansifreestring_width $text
}
#todo - consider disallowing/erroring out when \r \n in string?
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$c <= 255} {
if {$c == 9 || ($c >= 31 && $c != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
}
proc wcswidth_single {char} {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
if {$dec <= 255} {
if {$dec == 9} {
#tab always represented by at least one char in terminal etc.
#caller will need to process tabs themselves to determine extra width applicable to their circumstance.
return 1
}
if {($dec < 31 || $dec == 127)} {
return 0
}
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
set width 0
foreach c [split $string {}] {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {$dec == 9 || ($dec >= 31 && $dec != 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
if {$dec <= 255} {
if {($dec ==9 || ($dec >= 31 && $dec != 127))} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcswidth
incr width
}
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
#c0 controls + del (127 7f) - tab
#set re_ascii_c0 {[\U0000-\U001F]}
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
# return [tcl::string::length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
#return [tcl::string::length $text]
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
return [tcl::string::length $text]
#punk::char::wcswidth has to split and examine dec value of each code
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?

17
src/vfs/_vfscommon.vfs/modules/textblock-0.1.3.tm

@ -7725,6 +7725,12 @@ tcl::namespace::eval textblock {
set RST [a]
proc frame_samples {} {
#@ for example 50us per frame and 16 frames - we are already at 800us
#As this can be triggered in @dynamic punk::args parsing for textblock::frame - even that can add up.
#textblock::frame shouldn't use punk::args parsing on the happy path - so this isn't an issue if -checkargs 0 supplied in that call
#frame_samples might be a candidate for memoization/caching - but we need to leave open the possibility of
#adding/loading or even editing frametypes in a running system and having that reflected in the textblock::frame argument usage display.
set FRAMETYPELABELS [dict create]
if {[info commands ::textblock::frame] ne ""} {
foreach ft [frametypes] {
@ -7756,7 +7762,11 @@ tcl::namespace::eval textblock {
-checkargs -default 1 -type boolean\
-help "If true do extra argument checks and
provide more comprehensive error info.
Set false for slight performance improvement."
As the argument parser loads around 16 default frame
samples dynamically, this can add add up as each may
take 10s of microseconds. For many-framed tables
and other applications this can add up.
Set false for performance improvement."
-etabs -default 0\
-help "expanding tabs - experimental/unimplemented."
-type -default light -choices {${[textblock::frametypes]}} -choicerestricted 0 -choicecolumns 8 -type dict\
@ -7876,10 +7886,12 @@ tcl::namespace::eval textblock {
#use -buildcache 1 with -usecache 0 for debugging cache issues so we can inspect using textblock::frame_cache
set optnames [tcl::dict::keys $opts]
set opts_ok 1 ;#default assumption
#NOTE: mis-spelling options in this list can trigger $opt_ok false
#and fallback to using punk::args to parse unnecessarily. - performance can degrate noticeably on tables
foreach {k v} $optlist {
set k2 [tcl::prefix::match -error "" $optnames $k]
switch -- $k2 {
-etabs - -type - -boxlimits - -boxmap - -join
-etabs - -type - -boxlimits - -boxmap - -joins
- -title - -titlealign - -subtitle - -subtitlealign - -width - -height
- -ansiborder - -ansibase
- -blockalign - -textalign - -ellipsis
@ -7900,6 +7912,7 @@ tcl::namespace::eval textblock {
#only use punk::args if check_args is true or our basic checks failed
#never need to checkargs if only one argument supplied even if it looks like an option - as it will be treated as data to frame
if {[llength $args] != 1 && (!$opts_ok || $check_args)} {
#as frame is called a lot within table building - checking args can have a *big* impact on final performance.
set argd [punk::args::get_by_id ::textblock::frame $args]
set opts [dict get $argd opts]
set contents [dict get $argd values contents]

Loading…
Cancel
Save