|
|
|
@ -1967,7 +1967,8 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
if {[tcl::string::last \n $text] >= 0} { |
|
|
|
|
error "string_width accepts only a single line" |
|
|
|
|
} |
|
|
|
|
tailcall ansifreestring_width $text |
|
|
|
|
#tailcall ansifreestring_width $text |
|
|
|
|
ansifreestring_width $text |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#todo - consider disallowing/erroring out when \r \n in string? |
|
|
|
@ -1988,10 +1989,12 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
|
|
|
|
|
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] |
|
|
|
|
foreach c $codes { |
|
|
|
|
if {$c <= 255 && !($c < 31 || $c == 127)} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
incr width |
|
|
|
|
if {$c <= 255} { |
|
|
|
|
if {$c == 9 || ($c >= 31 && $c != 127)} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
incr width |
|
|
|
|
} |
|
|
|
|
} elseif {$c < 917504 || $c > 917631} { |
|
|
|
|
#TODO - various other joiners and non-printing chars |
|
|
|
|
set w [textutil::wcswidth_char $c] |
|
|
|
@ -2069,7 +2072,15 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
} |
|
|
|
|
proc wcswidth_single {char} { |
|
|
|
|
scan $char %c dec |
|
|
|
|
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { |
|
|
|
|
if {$dec <= 255} { |
|
|
|
|
if {$dec == 9} { |
|
|
|
|
#tab always represented by at least one char in terminal etc. |
|
|
|
|
#caller will need to process tabs themselves to determine extra width applicable to their circumstance. |
|
|
|
|
return 1 |
|
|
|
|
} |
|
|
|
|
if {($dec < 31 || $dec == 127)} { |
|
|
|
|
return 0 |
|
|
|
|
} |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
return 1 |
|
|
|
@ -2084,10 +2095,12 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
set width 0 |
|
|
|
|
foreach c [split $string {}] { |
|
|
|
|
scan $c %c dec |
|
|
|
|
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
incr width |
|
|
|
|
if {$dec <= 255} { |
|
|
|
|
if {$dec == 9 || ($dec >= 31 && $dec != 127)} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
incr width |
|
|
|
|
} |
|
|
|
|
} elseif {$dec < 917504 || $dec > 917631} { |
|
|
|
|
#TODO - various other joiners and non-printing chars |
|
|
|
|
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint |
|
|
|
@ -2118,10 +2131,12 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
|
|
|
|
|
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] |
|
|
|
|
foreach dec $codes { |
|
|
|
|
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
incr width |
|
|
|
|
if {$dec <= 255} { |
|
|
|
|
if {($dec ==9 || ($dec >= 31 && $dec != 127))} { |
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcswidth |
|
|
|
|
incr width |
|
|
|
|
} |
|
|
|
|
} elseif {$dec < 917504 || $dec > 917631} { |
|
|
|
|
#TODO - various other joiners and non-printing chars |
|
|
|
|
set w [textutil::wcswidth_char $dec] |
|
|
|
@ -2152,7 +2167,7 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? |
|
|
|
|
#todo - compare with python or other lang wcwidth |
|
|
|
|
if {!($dec < 31 || $dec == 127)} { |
|
|
|
|
incr width |
|
|
|
|
incr width |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
#TODO - various other joiners and non-printing chars |
|
|
|
@ -2235,8 +2250,9 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f |
|
|
|
|
#todo - document that these shouldn't be present in input rather than explicitly checking here |
|
|
|
|
|
|
|
|
|
#c0 controls |
|
|
|
|
set re_ascii_c0 {[\U0000-\U001F]} |
|
|
|
|
#c0 controls + del (127 7f) - tab |
|
|
|
|
#set re_ascii_c0 {[\U0000-\U001F]} |
|
|
|
|
set re_ascii_c0 {[\u0000-\u0008\u000A-\u001F\u007F]} |
|
|
|
|
set text [regsub -all $re_ascii_c0 $text ""] |
|
|
|
|
|
|
|
|
|
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective |
|
|
|
@ -2252,8 +2268,10 @@ tcl::namespace::eval punk::char {
|
|
|
|
|
# return [tcl::string::length $text] |
|
|
|
|
#} |
|
|
|
|
if {![regexp "\[\uFF-\U10FFFF\]" $text]} { |
|
|
|
|
#return [tcl::string::length $text] |
|
|
|
|
return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii |
|
|
|
|
return [tcl::string::length $text] |
|
|
|
|
#punk::char::wcswidth has to split and examine dec value of each code |
|
|
|
|
#By stripping controls + 7F (leaving tab) we've already eliminated the non-printable ascii - REVIEW |
|
|
|
|
#return [punk::char::wcswidth $text] ;#still use our wcswidth to account for non-printable ascii |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? |
|
|
|
|