Browse Source

whitespace changes and tclint.toml config for tclint LSP

master
Julian Noble 4 weeks ago
parent
commit
e8c1de935e
  1. 6
      src/modules/canaryspace-999999.0a1.0.tm
  2. 25
      src/modules/punk-0.1.tm
  3. 270
      src/modules/punk/char-999999.0a1.0.tm
  4. 2
      src/modules/punk/console-999999.0a1.0.tm
  5. 51
      src/modules/punk/icomm-999999.0a1.0.tm
  6. 3
      src/modules/punk/jtest.tcl
  7. 407
      src/modules/punk/repl-999999.0a1.0.tm
  8. 34
      tclint.toml

6
src/modules/canaryspace-999999.0a1.0.tm

@ -13,8 +13,8 @@
# Meta summary Diagnostic tool for namespace navigation/introspection to help avoid command conflicts. # Meta summary Diagnostic tool for namespace navigation/introspection to help avoid command conflicts.
# Meta description canaryspace loads the ::canaryspace namespace with wrappers for the set of commands # Meta description canaryspace loads the ::canaryspace namespace with wrappers for the set of commands
# Meta description that exist in the global namespace :: at the time the canaryspace package is loaded. # Meta description that exist in the global namespace :: at the time the canaryspace package is loaded.
# Meta description These commands just emit info to stderr to assist in determining whether calls are # Meta description These commands just emit info to stderr to assist in determining whether calls are
# Meta description unintentionally being run in the namespace. # Meta description unintentionally being run in the namespace.
# Meta description This is often the case with commands which use uplevel 1 or similar constructs to call # Meta description This is often the case with commands which use uplevel 1 or similar constructs to call
# Meta description code in the callers namespace. If such commands need to run in arbitrary namespaces # Meta description code in the callers namespace. If such commands need to run in arbitrary namespaces
# Meta description which may have arbitrary commands then uplevelled commands may need to be prefixed with # Meta description which may have arbitrary commands then uplevelled commands may need to be prefixed with
@ -68,6 +68,6 @@ namespace eval canaryspace {
## Ready ## Ready
package provide canaryspace [namespace eval canaryspace { package provide canaryspace [namespace eval canaryspace {
::variable version ::variable version
::set version 999999.0a1.0 ::set version 999999.0a1.0
}] }]
return return

25
src/modules/punk-0.1.tm

@ -1583,7 +1583,7 @@ namespace eval punk {
} }
%# { %# {
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%# not string length is not supported" error "!%# not string length is not supported"
} }
#string length - REVIEW - #string length - REVIEW -
@ -1595,7 +1595,7 @@ namespace eval punk {
%%# { %%# {
#experimental #experimental
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%%# not string length is not supported" error "!%%# not string length is not supported"
} }
#string length - REVIEW - #string length - REVIEW -
@ -1606,7 +1606,7 @@ namespace eval punk {
} }
%str { %str {
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%str - not string-get is not supported" error "!%str - not string-get is not supported"
} }
lappend INDEX_OPERATIONS string-get lappend INDEX_OPERATIONS string-get
@ -1617,7 +1617,7 @@ namespace eval punk {
%sp { %sp {
#experimental #experimental
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%sp - not string-space is not supported" error "!%sp - not string-space is not supported"
} }
lappend INDEX_OPERATIONS string-space lappend INDEX_OPERATIONS string-space
@ -1628,7 +1628,7 @@ namespace eval punk {
%empty { %empty {
#experimental #experimental
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%empty - not string-empty is not supported" error "!%empty - not string-empty is not supported"
} }
lappend INDEX_OPERATIONS string-empty lappend INDEX_OPERATIONS string-empty
@ -1638,7 +1638,7 @@ namespace eval punk {
} }
@words { @words {
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%words - not list-words-from-string is not supported" error "!%words - not list-words-from-string is not supported"
} }
lappend INDEX_OPERATIONS list-words-from-string lappend INDEX_OPERATIONS list-words-from-string
@ -1650,7 +1650,7 @@ namespace eval punk {
#experimental - leading character based on result not input(?) #experimental - leading character based on result not input(?)
#input type is string - but output is list #input type is string - but output is list
set active_key_type "list" set active_key_type "list"
if $get_not { if {$get_not} {
error "!%chars - not list-chars-from-string is not supported" error "!%chars - not list-chars-from-string is not supported"
} }
lappend INDEX_OPERATIONS list-from_chars lappend INDEX_OPERATIONS list-from_chars
@ -1662,7 +1662,7 @@ namespace eval punk {
#experimental - flatten one level of list #experimental - flatten one level of list
#join without arg - output is list #join without arg - output is list
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!@join - not list-join-list is not supported" error "!@join - not list-join-list is not supported"
} }
lappend INDEX_OPERATIONS list-join-list lappend INDEX_OPERATIONS list-join-list
@ -1674,7 +1674,7 @@ namespace eval punk {
#experimental #experimental
#input type is list - but output is string #input type is list - but output is string
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%join - not string-join-list is not supported" error "!%join - not string-join-list is not supported"
} }
lappend INDEX_OPERATIONS string-join-list lappend INDEX_OPERATIONS string-join-list
@ -1684,7 +1684,7 @@ namespace eval punk {
} }
%ansiview { %ansiview {
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%# not string-ansiview is not supported" error "!%# not string-ansiview is not supported"
} }
lappend INDEX_OPERATIONS string-ansiview lappend INDEX_OPERATIONS string-ansiview
@ -1694,7 +1694,7 @@ namespace eval punk {
} }
%ansiviewstyle { %ansiviewstyle {
set active_key_type "string" set active_key_type "string"
if $get_not { if {$get_not} {
error "!%# not string-ansiviewstyle is not supported" error "!%# not string-ansiviewstyle is not supported"
} }
lappend INDEX_OPERATIONS string-ansiviewstyle lappend INDEX_OPERATIONS string-ansiviewstyle
@ -5368,6 +5368,7 @@ namespace eval punk {
#for var="val {a b c}" #for var="val {a b c}"
#proc ::punk::val {{v {}}} {tailcall lindex $v} #proc ::punk::val {{v {}}} {tailcall lindex $v}
#proc ::punk::val {{v {}}} {return $v} ;#2023 - approx 2x faster than the tailcall lindex version #proc ::punk::val {{v {}}} {return $v} ;#2023 - approx 2x faster than the tailcall lindex version
#tclint-disable-next-line
proc ::punk::val [list [list v [purelist]]] {return $v} proc ::punk::val [list [list v [purelist]]] {return $v}
#---------------- #----------------
@ -7437,7 +7438,7 @@ namespace eval punk {
foreach v $known_punk { foreach v $known_punk {
set c1 [overtype::left $col1 $v] set c1 [overtype::left $col1 $v]
if {[info exists ::env($v)]} { if {[info exists ::env($v)]} {
set c2 [overtype::left $col2 [set ::env($v)] set c2 [overtype::left $col2 [set ::env($v)]]
} else { } else {
set c2 [overtype::right $col2 "(NOT SET)"] set c2 [overtype::right $col2 "(NOT SET)"]
} }

270
src/modules/punk/char-999999.0a1.0.tm

@ -19,7 +19,7 @@
#[manpage_begin punkshell_module_punk::char 0 999999.0a1.0] #[manpage_begin punkshell_module_punk::char 0 999999.0a1.0]
#[copyright "2024"] #[copyright "2024"]
#[titledesc {character-set and unicode utilities}] [comment {-- Name section and table of contents description --}] #[titledesc {character-set and unicode utilities}] [comment {-- Name section and table of contents description --}]
#[moddesc {character-set nad unicode}] [comment {-- Description at end of page heading --}] #[moddesc {character-set nad unicode}] [comment {-- Description at end of page heading --}]
#[require punk::char] #[require punk::char]
#[keywords module encodings] #[keywords module encodings]
#[description] #[description]
@ -49,11 +49,11 @@
#*** !doctools #*** !doctools
#[item] [package {overtype}] #[item] [package {overtype}]
#[para] - #[para] -
#[item] [package {textblock}] #[item] [package {textblock}]
#[para] - #[para] -
#[item] [package console] #[item] [package console]
#[para] - #[para] -
package require Tcl 8.6- package require Tcl 8.6-
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review #dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
@ -92,20 +92,20 @@ tcl::namespace::eval punk::char {
#just the 7-bit ascii. use [page ascii] for the 8-bit layout #just the 7-bit ascii. use [page ascii] for the 8-bit layout
proc ascii {} {return { proc ascii {} {return {
00 NUL 01 SOH 02 STX 03 ETX 04 EOT 05 ENQ 06 ACK 07 BEL 00 NUL 01 SOH 02 STX 03 ETX 04 EOT 05 ENQ 06 ACK 07 BEL
08 BS 09 HT 0a LF 0b VT 0c FF 0d CR 0e SO 0f SI 08 BS 09 HT 0a LF 0b VT 0c FF 0d CR 0e SO 0f SI
10 DLE 11 DC1 12 DC2 13 DC3 14 DC4 15 NAK 16 SYN 17 ETB 10 DLE 11 DC1 12 DC2 13 DC3 14 DC4 15 NAK 16 SYN 17 ETB
18 CAN 19 EM 1a SUB 1b ESC 1c FS 1d GS 1e RS 1f US 18 CAN 19 EM 1a SUB 1b ESC 1c FS 1d GS 1e RS 1f US
20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 ' 20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 '
28 ( 29 ) 2a * 2b + 2c , 2d - 2e . 2f / 28 ( 29 ) 2a * 2b + 2c , 2d - 2e . 2f /
30 0 31 1 32 2 33 3 34 4 35 5 36 6 37 7 30 0 31 1 32 2 33 3 34 4 35 5 36 6 37 7
38 8 39 9 3a : 3b ; 3c < 3d = 3e > 3f ? 38 8 39 9 3a : 3b ; 3c < 3d = 3e > 3f ?
40 @ 41 A 42 B 43 C 44 D 45 E 46 F 47 G 40 @ 41 A 42 B 43 C 44 D 45 E 46 F 47 G
48 H 49 I 4a J 4b K 4c L 4d M 4e N 4f O 48 H 49 I 4a J 4b K 4c L 4d M 4e N 4f O
50 P 51 Q 52 R 53 S 54 T 55 U 56 V 57 W 50 P 51 Q 52 R 53 S 54 T 55 U 56 V 57 W
58 X 59 Y 5a Z 5b [ 5c \ 5d ] 5e ^ 5f _ 58 X 59 Y 5a Z 5b [ 5c \ 5d ] 5e ^ 5f _
60 ` 61 a 62 b 63 c 64 d 65 e 66 f 67 g 60 ` 61 a 62 b 63 c 64 d 65 e 66 f 67 g
68 h 69 i 6a j 6b k 6c l 6d m 6e n 6f o 68 h 69 i 6a j 6b k 6c l 6d m 6e n 6f o
70 p 71 q 72 r 73 s 74 t 75 u 76 v 77 w 70 p 71 q 72 r 73 s 74 t 75 u 76 v 77 w
78 x 79 y 7a z 7b { 7c | 7d } 7e ~ 7f DEL 78 x 79 y 7a z 7b { 7c | 7d } 7e ~ 7f DEL
}} }}
@ -124,7 +124,7 @@ tcl::namespace::eval punk::char {
} elseif {[tcl::string::length $v] == 0} { } elseif {[tcl::string::length $v] == 0} {
set v " " set v " "
} }
append out "$k $v " append out "$k $v "
if {$i > 0 && $i % 8 == 0} { if {$i > 0 && $i % 8 == 0} {
set out [tcl::string::range $out 0 end-2] set out [tcl::string::range $out 0 end-2]
append out \n " " append out \n " "
@ -143,7 +143,7 @@ tcl::namespace::eval punk::char {
set out "" set out ""
append out [page dingbats] \n append out [page dingbats] \n
set unicode_dict [charset_dictget Dingbats] set unicode_dict [charset_dictget Dingbats]
append out " " append out " "
set i 1 set i 1
tcl::dict::for {k charinfo} $unicode_dict { tcl::dict::for {k charinfo} $unicode_dict {
@ -155,7 +155,7 @@ tcl::namespace::eval punk::char {
} else { } else {
set displayv $char set displayv $char
} }
append out "$k $displayv " append out "$k $displayv "
if {$i > 0 && $i % 8 == 0} { if {$i > 0 && $i % 8 == 0} {
set out [tcl::string::range $out 0 end-2] set out [tcl::string::range $out 0 end-2]
append out \n " " append out \n " "
@ -205,7 +205,7 @@ tcl::namespace::eval punk::char {
tcl::dict::lappend d $encname $mname tcl::dict::lappend d $encname $mname
} }
} }
} }
foreach enc [lsort $encnames] { foreach enc [lsort $encnames] {
set mime_enc [${encmimens}::mapencoding $enc] set mime_enc [${encmimens}::mapencoding $enc]
if {$mime_enc ne ""} { if {$mime_enc ne ""} {
@ -236,7 +236,7 @@ tcl::namespace::eval punk::char {
tailcall page $encname {*}$args tailcall page $encname {*}$args
} }
#This will not display for example, c0 glyphs for cp437 #This will not display for example, c0 glyphs for cp437
# we could use the punk::ansi::cp437_map dict - but while that might be what some expect to see - it would probably be too much magic for this function - which is intended to align more with what Tcl's encoding convertfrom/to actually does. # we could use the punk::ansi::cp437_map dict - but while that might be what some expect to see - it would probably be too much magic for this function - which is intended to align more with what Tcl's encoding convertfrom/to actually does.
# for nonprinting members of the page, 2 and 3 letter codes are used rather than unicode visualisation replacements or even unicode equivalent replacements known to be appropriate for the page # for nonprinting members of the page, 2 and 3 letter codes are used rather than unicode visualisation replacements or even unicode equivalent replacements known to be appropriate for the page
proc page {encname args} { proc page {encname args} {
@ -255,7 +255,7 @@ tcl::namespace::eval punk::char {
#set d_ascii [pagedict_raw ascii] #set d_ascii [pagedict_raw ascii]
set d_ascii [basedict] set d_ascii [basedict]
set d_asciiposn [lreverse $d_ascii] ;#values should be unique except for "???". We are mainly interested in which ones have display-names e.g cr csi set d_asciiposn [lreverse $d_ascii] ;#values should be unique except for "???". We are mainly interested in which ones have display-names e.g cr csi
#The results of this are best seen by comparing the ebcdic and ascii pages #The results of this are best seen by comparing the ebcdic and ascii pages
set d_page [pagedict_raw $encname] set d_page [pagedict_raw $encname]
@ -285,7 +285,7 @@ tcl::namespace::eval punk::char {
set displayv $bytedisplay set displayv $bytedisplay
} else { } else {
if {[tcl::string::length $rawchar] == 0} { if {[tcl::string::length $rawchar] == 0} {
set displayv " " set displayv " "
} else { } else {
#presumed 1 #presumed 1
set displayv " $rawchar " set displayv " $rawchar "
@ -294,7 +294,7 @@ tcl::namespace::eval punk::char {
} }
} }
append out "$k $displayv " append out "$k $displayv "
if {$i > 0 && $i % $cols == 0} { if {$i > 0 && $i % $cols == 0} {
set out [tcl::string::range $out 0 end-2] set out [tcl::string::range $out 0 end-2]
append out \n " " append out \n " "
@ -318,7 +318,7 @@ tcl::namespace::eval punk::char {
set outchar [encoding convertfrom $encpage $ch] set outchar [encoding convertfrom $encpage $ch]
} else { } else {
#here we will use \0xFFFD instead of our replacement string ??? - as the name pagechar implies always returning a single character. REVIEW. #here we will use \0xFFFD instead of our replacement string ??? - as the name pagechar implies always returning a single character. REVIEW.
set outchar $::punk::char::invalid_display_char set outchar $::punk::char::invalid_display_char
} }
return $outchar return $outchar
} }
@ -348,7 +348,7 @@ tcl::namespace::eval punk::char {
#set outchar [encoding convertto $encpage [format %c $num]] #set outchar [encoding convertto $encpage [format %c $num]]
set outchar [format %c $num] set outchar [format %c $num]
} else { } else {
set outchar $::punk::char::invalid_display_char set outchar $::punk::char::invalid_display_char
} }
return $outchar return $outchar
} }
@ -381,7 +381,7 @@ tcl::namespace::eval punk::char {
} }
proc pagedict_raw {encname} { proc pagedict_raw {encname} {
variable invalid ;# ="???" variable invalid ;# ="???"
set encname [encname $encname] set encname [encname $encname]
set d [tcl::dict::create] set d [tcl::dict::create]
for {set i 0} {$i < 256} {incr i} { for {set i 0} {$i < 256} {incr i} {
@ -409,7 +409,7 @@ tcl::namespace::eval punk::char {
tcl::dict::set d $k [tcl::dict::get $a128 $k] tcl::dict::set d $k [tcl::dict::get $a128 $k]
} else { } else {
# #
tcl::dict::set d $k $invalid tcl::dict::set d $k $invalid
} }
if {$i <=32} { if {$i <=32} {
@ -457,7 +457,7 @@ tcl::namespace::eval punk::char {
} }
return $d return $d
} }
proc basedict {} { proc basedict {} {
#this gives same result independent of current value of 'encoding system' #this gives same result independent of current value of 'encoding system'
set d [tcl::dict::create] set d [tcl::dict::create]
@ -529,10 +529,10 @@ tcl::namespace::eval punk::char {
return $d return $d
} }
#-- --- --- --- --- --- --- --- #-- --- --- --- --- --- --- ---
# encoding convertfrom & encoding convertto can be somewhat confusing to think about. (Need to think in reverse.) # encoding convertfrom & encoding convertto can be somewhat confusing to think about. (Need to think in reverse.)
# e.g encoding convertto dingbats <somethingpretty> will output something that doesn't look dingbatty on screen. # e.g encoding convertto dingbats <somethingpretty> will output something that doesn't look dingbatty on screen.
#-- --- --- --- --- --- --- --- #-- --- --- --- --- --- --- ---
#must use Tcl instead of tcl (at least for 8.6) #must use Tcl instead of tcl (at least for 8.6)
if {![package vsatisfies [package present Tcl] 8.7-]} { if {![package vsatisfies [package present Tcl] 8.7-]} {
proc encodable "s {enc [encoding system]}" { proc encodable "s {enc [encoding system]}" {
@ -574,7 +574,7 @@ tcl::namespace::eval punk::char {
} }
} }
} }
#-- --- --- --- --- --- --- --- #-- --- --- --- --- --- --- ---
proc test_japanese {{encoding jis0208}} { proc test_japanese {{encoding jis0208}} {
#A very basic test of 2char encodings such as jis0208 #A very basic test of 2char encodings such as jis0208
set yatbun 日本 ;# encoding convertfrom jis0208 F|K\\ set yatbun 日本 ;# encoding convertfrom jis0208 F|K\\
@ -584,7 +584,7 @@ tcl::namespace::eval punk::char {
set ebun [encoding convertto $encoding $bun] set ebun [encoding convertto $encoding $bun]
puts "$encoding encoded: ${eyat} ${ebun}" puts "$encoding encoded: ${eyat} ${ebun}"
puts "reencoded: [encoding convertfrom $encoding $eyat] [encoding convertfrom $encoding $ebun]" puts "reencoded: [encoding convertfrom $encoding $eyat] [encoding convertfrom $encoding $ebun]"
return $yatbun return $yatbun
} }
proc test_grave {} { proc test_grave {} {
set g [format %c 0x300] set g [format %c 0x300]
@ -692,7 +692,7 @@ tcl::namespace::eval punk::char {
#ESC ( B ESC ) B ASCII Set #ESC ( B ESC ) B ASCII Set
#ESC ( 0 ESC ) 0 Special Graphics #ESC ( 0 ESC ) 0 Special Graphics
#ESC ( 1 ESC ) 1 Alternate Character ROM Standard Character Set #ESC ( 1 ESC ) 1 Alternate Character ROM Standard Character Set
#ESC ( 2 ESC ) 2 Alternate Character ROM Special Graphic #ESC ( 2 ESC ) 2 Alternate Character ROM Special Graphic
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Unicode character sets - some hardcoded - some loadable from data files # Unicode character sets - some hardcoded - some loadable from data files
@ -700,7 +700,7 @@ tcl::namespace::eval punk::char {
variable charinfo [tcl::dict::create] variable charinfo [tcl::dict::create]
variable charsets [tcl::dict::create] variable charsets [tcl::dict::create]
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Aggregate character sets (ones that pick various ranges from underlying unicode ranges) # Aggregate character sets (ones that pick various ranges from underlying unicode ranges)
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
@ -741,7 +741,7 @@ tcl::namespace::eval punk::char {
} }
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Unicode ranges # Unicode ranges
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
tcl::dict::set charsets "greek" [list ranges [list {start 880 end 1023} {start 7936 end 8191}] description "Greek and Coptic" settype "other"] tcl::dict::set charsets "greek" [list ranges [list {start 880 end 1023} {start 7936 end 8191}] description "Greek and Coptic" settype "other"]
@ -975,9 +975,9 @@ tcl::namespace::eval punk::char {
variable charset_extents_rangenames ;# dict keyed on start,end pointing to list of 2-tuples {setname rangeindex_within_set} variable charset_extents_rangenames ;# dict keyed on start,end pointing to list of 2-tuples {setname rangeindex_within_set}
#build 2 indexes for each range.(charsets are not just unicode blocks so can have multiple ranges) #build 2 indexes for each range.(charsets are not just unicode blocks so can have multiple ranges)
#Note that a range could be as small as a single char (startpoint = endpoint) so there can be many ranges with same start and end if charsets use some of the same subsets. #Note that a range could be as small as a single char (startpoint = endpoint) so there can be many ranges with same start and end if charsets use some of the same subsets.
#as each charset_extents_startpoins,charset_extents_endpoints is built - the associated range name and index is appended to the rangenames dict #as each charset_extents_startpoins,charset_extents_endpoints is built - the associated range name and index is appended to the rangenames dict
#startpoints - key is startpoint of a range, value is list of endpoints one for each range starting at this startpoint-key #startpoints - key is startpoint of a range, value is list of endpoints one for each range starting at this startpoint-key
#endpoints - key is endpoint of a range, value is list of startpoints one for each range ending at this endpoint-key #endpoints - key is endpoint of a range, value is list of startpoints one for each range ending at this endpoint-key
proc _build_charset_extents {} { proc _build_charset_extents {} {
variable charsets variable charsets
variable charset_extents_startpoints variable charset_extents_startpoints
@ -995,7 +995,7 @@ tcl::namespace::eval punk::char {
set end [tcl::dict::get [lindex $ranges 0] end] set end [tcl::dict::get [lindex $ranges 0] end]
if {![tcl::dict::exists $charset_extents_startpoints $start] || $end ni [tcl::dict::get $charset_extents_startpoints $start]} { if {![tcl::dict::exists $charset_extents_startpoints $start] || $end ni [tcl::dict::get $charset_extents_startpoints $start]} {
#assertion if end wasn't in startpoits list - then start won't be in endpoints list #assertion if end wasn't in startpoits list - then start won't be in endpoints list
tcl::dict::lappend charset_extents_startpoints $start $end tcl::dict::lappend charset_extents_startpoints $start $end
tcl::dict::lappend charset_extents_endpoints $end $start tcl::dict::lappend charset_extents_endpoints $end $start
} }
tcl::dict::lappend charset_extents_rangenames ${start},${end} [list $setname 1] tcl::dict::lappend charset_extents_rangenames ${start},${end} [list $setname 1]
@ -1023,14 +1023,14 @@ tcl::namespace::eval punk::char {
#no need to sort charset_extents_rangenames - lookup only done using dict methods #no need to sort charset_extents_rangenames - lookup only done using dict methods
return [tcl::dict::size $charset_extents_startpoints] return [tcl::dict::size $charset_extents_startpoints]
} }
_build_charset_extents ;#rebuilds for all charsets _build_charset_extents ;#rebuilds for all charsets
#nerdfonts are within the Private use E000 - F8FF range #nerdfonts are within the Private use E000 - F8FF range
proc load_nerdfonts {} { proc load_nerdfonts {} {
variable charsets variable charsets
variable charinfo variable charinfo
package require fileutil package require fileutil
set ver [package provide punk::char] set ver [package provide punk::char]
if {$ver ne ""} { if {$ver ne ""} {
set ifneeded [package ifneeded punk::char [package provide punk::char]] set ifneeded [package ifneeded punk::char [package provide punk::char]]
#puts stderr "punk::char ifneeded script: $ifneeded" #puts stderr "punk::char ifneeded script: $ifneeded"
@ -1038,7 +1038,7 @@ tcl::namespace::eval punk::char {
set basedir [file dirname [lindex $sourceinfo end]] set basedir [file dirname [lindex $sourceinfo end]]
} else { } else {
#review - will only work at package load time #review - will only work at package load time
set scr [info script] set scr [info script]
if {$scr eq ""} { if {$scr eq ""} {
error "load_nerdfonts unable to determine package folder" error "load_nerdfonts unable to determine package folder"
} }
@ -1048,7 +1048,7 @@ tcl::namespace::eval punk::char {
set fname [file join $pkg_data_dir nerd-fonts-glyph-list.txt] set fname [file join $pkg_data_dir nerd-fonts-glyph-list.txt]
if {[file exists $fname]} { if {[file exists $fname]} {
#puts stderr "load_nerdfonts loading $fname" #puts stderr "load_nerdfonts loading $fname"
set data [fileutil::cat -translation binary $fname] set data [fileutil::cat -translation binary $fname]
set short_seen [tcl::dict::create] set short_seen [tcl::dict::create]
set current_set_range [tcl::dict::create] set current_set_range [tcl::dict::create]
set filesets_loading [list] set filesets_loading [list]
@ -1066,7 +1066,7 @@ tcl::namespace::eval punk::char {
dict unset charset $setname dict unset charset $setname
} }
set newrange [list start $dec end $dec] set newrange [list start $dec end $dec]
tcl::dict::set current_set_range $setname $newrange tcl::dict::set current_set_range $setname $newrange
tcl::dict::set charsets $setname [list ranges [list $newrange] description "nerd fonts $rawsetname" settype "nerdfonts privateuse"] tcl::dict::set charsets $setname [list ranges [list $newrange] description "nerd fonts $rawsetname" settype "nerdfonts privateuse"]
lappend filesets_loading $setname lappend filesets_loading $setname
@ -1080,13 +1080,13 @@ tcl::namespace::eval punk::char {
#overwrite last ranges element #overwrite last ranges element
set rangelist [lrange [tcl::dict::get $charsets $setname ranges] 0 end-1] set rangelist [lrange [tcl::dict::get $charsets $setname ranges] 0 end-1]
lappend rangelist [tcl::dict::get $current_set_range $setname] lappend rangelist [tcl::dict::get $current_set_range $setname]
tcl::dict::set charsets $setname ranges $rangelist tcl::dict::set charsets $setname ranges $rangelist
} else { } else {
#new range for set #new range for set
tcl::dict::set current_set_range $setname start $dec tcl::dict::set current_set_range $setname start $dec
tcl::dict::set current_set_range $setname end $dec tcl::dict::set current_set_range $setname end $dec
set rangelist [tcl::dict::get $charsets $setname ranges] set rangelist [tcl::dict::get $charsets $setname ranges]
lappend rangelist [tcl::dict::get $current_set_range $setname] lappend rangelist [tcl::dict::get $current_set_range $setname]
tcl::dict::set charsets $setname ranges $rangelist tcl::dict::set charsets $setname ranges $rangelist
} }
@ -1130,7 +1130,7 @@ tcl::namespace::eval punk::char {
proc package_base {} { proc package_base {} {
#assume punk::char is in .tm form and we can use the package provide statement to determine base location #assume punk::char is in .tm form and we can use the package provide statement to determine base location
#review #review
set pkgver [package present punk::char] set pkgver [package present punk::char]
set pkginfo [package ifneeded punk::char $pkgver] set pkginfo [package ifneeded punk::char $pkgver]
set tmfile [lindex $pkginfo end] set tmfile [lindex $pkginfo end]
@ -1156,7 +1156,7 @@ tcl::namespace::eval punk::char {
if {[tcl::dict::exists $dictValue {*}$keys]} { if {[tcl::dict::exists $dictValue {*}$keys]} {
return [tcl::dict::get $dictValue {*}$keys] return [tcl::dict::get $dictValue {*}$keys]
} else { } else {
return [lindex $args end] return [lindex $args end]
} }
} }
} }
@ -1193,7 +1193,7 @@ tcl::namespace::eval punk::char {
if {[set pcolon [tcl::string::first ";" $ln]] > 0} { if {[set pcolon [tcl::string::first ";" $ln]] > 0} {
set lhs [tcl::string::trim [tcl::string::range $ln 0 $pcolon-1]] set lhs [tcl::string::trim [tcl::string::range $ln 0 $pcolon-1]]
set name [tcl::string::trim [tcl::string::range $ln $pcolon+1 end]] set name [tcl::string::trim [tcl::string::range $ln $pcolon+1 end]]
set lhsparts [split $lhs .] set lhsparts [split $lhs .]
set start [lindex $lhsparts 0] set start [lindex $lhsparts 0]
set end [lindex $lhsparts end] set end [lindex $lhsparts end]
#puts "$start -> $end '$name'" #puts "$start -> $end '$name'"
@ -1207,9 +1207,9 @@ tcl::namespace::eval punk::char {
return $block_count return $block_count
} }
#unicode scripts #unicode scripts
#unicode UnicodeData.txt #unicode UnicodeData.txt
@ -1225,8 +1225,8 @@ tcl::namespace::eval punk::char {
# -- --- # -- ---
#A = Ambiguous - All characters that can be sometimes wide and sometimes narrow. (wide in east asian legacy sets, narrow in non-east asian usage) (private use chars considered ambiguous) #A = Ambiguous - All characters that can be sometimes wide and sometimes narrow. (wide in east asian legacy sets, narrow in non-east asian usage) (private use chars considered ambiguous)
#F = East Asian Full-width #F = East Asian Full-width
#H = East Asian Half-width #H = East Asian Half-width
#N = Not east Asian (Neutral) - all other characters. (do not occur in legacy East Asian character sets) - treated like Na #N = Not east Asian (Neutral) - all other characters. (do not occur in legacy East Asian character sets) - treated like Na
#Na = East Asian Narrow - all other characters that are always narrow and have explicit full-width counterparts (e.g includes all of ASCII) #Na = East Asian Narrow - all other characters that are always narrow and have explicit full-width counterparts (e.g includes all of ASCII)
#W = East Asian Wide - all other characters that are always wide (Occur only in the context of Eas Asian Typography) #W = East Asian Wide - all other characters that are always wide (Occur only in the context of Eas Asian Typography)
# -- --- # -- ---
@ -1304,7 +1304,7 @@ tcl::namespace::eval punk::char {
set initial_fields $known_fields set initial_fields $known_fields
if {"testwidth" ni $opt_fields} { if {"testwidth" ni $opt_fields} {
if {"testwidth" ni $opt_except} { if {"testwidth" ni $opt_except} {
lappend opt_except testwidth lappend opt_except testwidth
} }
} }
if {"char" ni $opt_fields} { if {"char" ni $opt_fields} {
@ -1369,13 +1369,13 @@ tcl::namespace::eval punk::char {
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time #testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
set existing_testwidth "" set existing_testwidth ""
if {[tcl::dict::exists $charinfo $dec_char testwidth]} { if {[tcl::dict::exists $charinfo $dec_char testwidth]} {
set existing_testwidth [tcl::dict::get $charinfo $dec_char testwidth] set existing_testwidth [tcl::dict::get $charinfo $dec_char testwidth]
} }
if {$existing_testwidth eq ""} { if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.) #no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char] set char [format %c $dec_char]
set chwidth [char_info_testwidth $char] set chwidth [char_info_testwidth $char]
tcl::dict::set returninfo testwidth $chwidth tcl::dict::set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way? #cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
tcl::dict::set charinfo $dec_char testwidth $chwidth tcl::dict::set charinfo $dec_char testwidth $chwidth
@ -1387,10 +1387,10 @@ tcl::namespace::eval punk::char {
set char [format %c $dec_char] set char [format %c $dec_char]
tcl::dict::set returninfo char $char tcl::dict::set returninfo char $char
} }
memberof { memberof {
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising #memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4) #note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges. #This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here. #We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char) #some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
set memberof [list] set memberof [list]
@ -1435,7 +1435,7 @@ tcl::namespace::eval punk::char {
set splen [tcl::dict::size $charset_extents_startpoints] set splen [tcl::dict::size $charset_extents_startpoints]
set eplen [tcl::dict::size $charset_extents_endpoints] set eplen [tcl::dict::size $charset_extents_endpoints]
set s [lsearch -bisect -integer $skeys $dec] set s [lsearch -bisect -integer $skeys $dec]
set s_at_or_below [lrange $skeys 0 $s] set s_at_or_below [lrange $skeys 0 $s]
set e_of_s [list] set e_of_s [list]
foreach sk $s_at_or_below { foreach sk $s_at_or_below {
lappend e_of_s {*}[tcl::dict::get $charset_extents_startpoints $sk] lappend e_of_s {*}[tcl::dict::get $charset_extents_startpoints $sk]
@ -1472,16 +1472,16 @@ tcl::namespace::eval punk::char {
set eps [list] set eps [list]
} }
return [tcl::dict::create startpoints $splen endpoints $eplen midpoint [expr {floor($eplen/2)}] posn $e lhs_endpoints_to_check "[llength $reduced_endpoints]/[llength $e_of_s]=[llength $sps]ranges" rhs_startpoints_to_check "[llength $reduced_startpoints]/[llength $s_of_e]=[llength $eps]"] return [tcl::dict::create startpoints $splen endpoints $eplen midpoint [expr {floor($eplen/2)}] posn $e lhs_endpoints_to_check "[llength $reduced_endpoints]/[llength $e_of_s]=[llength $sps]ranges" rhs_startpoints_to_check "[llength $reduced_startpoints]/[llength $s_of_e]=[llength $eps]"]
} }
#for just a few extra sets such as wgl4 and unicode blocks loaded - this gives e.g 5x + better performance than the simple search above, and up to twice as slow for tcl 8.6 #for just a few extra sets such as wgl4 and unicode blocks loaded - this gives e.g 5x + better performance than the simple search above, and up to twice as slow for tcl 8.6
#performance biased towards lower numbered characters (which is not too bad in the context of unicode) #performance biased towards lower numbered characters (which is not too bad in the context of unicode)
#todo - could be tuned to perform better at end by assuming a fairly even distribution of ranges - and so searching startpoint ranges first for items left of middle, endpoint ranges first for items right of middle #todo - could be tuned to perform better at end by assuming a fairly even distribution of ranges - and so searching startpoint ranges first for items left of middle, endpoint ranges first for items right of middle
#review with scripts loaded and more defined ranges.. #review with scripts loaded and more defined ranges..
#This algorithm supports arbitrary overlapping ranges and ranges with same start & endpoints #This algorithm supports arbitrary overlapping ranges and ranges with same start & endpoints
#Should be much better than O(n) for n sets except for pathological case of member of all or nearly-all intervals ? #Should be much better than O(n) for n sets except for pathological case of member of all or nearly-all intervals ?
#review - compare with 'interval tree' algorithms. #review - compare with 'interval tree' algorithms.
proc char_info_dec_memberof {dec} { proc char_info_dec_memberof {dec} {
variable charset_extents_startpoints variable charset_extents_startpoints
variable charset_extents_endpoints variable charset_extents_endpoints
@ -1563,7 +1563,7 @@ tcl::namespace::eval punk::char {
set matchcount 0 set matchcount 0
foreach glob $and_globs { foreach glob $and_globs {
if {[tcl::string::match -nocase $glob $s] || [tcl::string::match -nocase $glob $d]} { if {[tcl::string::match -nocase $glob $s] || [tcl::string::match -nocase $glob $d]} {
incr matchcount incr matchcount
} }
} }
if {$matchcount == [llength $and_globs]} { if {$matchcount == [llength $and_globs]} {
@ -1595,12 +1595,12 @@ tcl::namespace::eval punk::char {
} }
#non-overlapping unicode blocks #non-overlapping unicode blocks
proc char_blocks {{name_or_glob *}} { proc char_blocks {{name_or_glob *}} {
variable charsets variable charsets
#todo - more efficient datastructures? #todo - more efficient datastructures?
if {![regexp {[?*]} $name_or_glob]} { if {![regexp {[?*]} $name_or_glob]} {
#no glob - just retrieve it #no glob - just retrieve it
if {[tcl::dict::exists $charsets $name_or_glob]} { if {[tcl::dict::exists $charsets $name_or_glob]} {
if {[tcl::dict::get $charsets $name_or_glob settype] eq "block"} { if {[tcl::dict::get $charsets $name_or_glob settype] eq "block"} {
return [tcl::dict::create $name_or_glob [tcl::dict::get $charsets $name_or_glob]] return [tcl::dict::create $name_or_glob [tcl::dict::get $charsets $name_or_glob]]
@ -1630,7 +1630,7 @@ tcl::namespace::eval punk::char {
proc charset_names {{name_or_glob *}} { proc charset_names {{name_or_glob *}} {
variable charsets variable charsets
if {![regexp {[?*]} $name_or_glob]} { if {![regexp {[?*]} $name_or_glob]} {
#no glob - just retrieve it #no glob - just retrieve it
if {[tcl::dict::exists $charsets $name_or_glob]} { if {[tcl::dict::exists $charsets $name_or_glob]} {
return [list $name_or_glob] return [list $name_or_glob]
} }
@ -1641,7 +1641,7 @@ tcl::namespace::eval punk::char {
} }
} else { } else {
if {$name_or_glob eq "*"} { if {$name_or_glob eq "*"} {
return [lsort [tcl::dict::keys $charsets]] return [lsort [tcl::dict::keys $charsets]]
} }
#tcl::dict::keys $dict <pattern> doesn't have option for case insensitive searches #tcl::dict::keys $dict <pattern> doesn't have option for case insensitive searches
return [lsort [lsearch -all -inline -nocase [tcl::dict::keys $charsets] $name_or_glob]] return [lsort [lsearch -all -inline -nocase [tcl::dict::keys $charsets] $name_or_glob]]
@ -1655,7 +1655,7 @@ tcl::namespace::eval punk::char {
variable charsets variable charsets
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results #dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [tcl::dict::keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below #set sortedkeys [lsort -increasing -dictionary [tcl::dict::keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [tcl::dict::keys $charsets]] set sortedkeys [lsort -increasing [tcl::dict::keys $charsets]]
if {$namesearch eq "*"} { if {$namesearch eq "*"} {
return $sortedkeys return $sortedkeys
} }
@ -1664,7 +1664,7 @@ tcl::namespace::eval punk::char {
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else { } else {
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted? #return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
return [lsearch -inline -nocase $sortedkeys $namesearch] ;#no globs return [lsearch -inline -nocase $sortedkeys $namesearch] ;#no globs
} }
} }
proc charsets {{namesearch *}} { proc charsets {{namesearch *}} {
@ -1738,7 +1738,7 @@ tcl::namespace::eval punk::char {
set opt_ansi [tcl::dict::get $opts -ansi] set opt_ansi [tcl::dict::get $opts -ansi]
set opt_lined [tcl::dict::get $opts -lined] set opt_lined [tcl::dict::get $opts -lined]
# -- --- --- --- # -- --- --- ---
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
if {$opt_ansi} { if {$opt_ansi} {
set a1 [a BLACK white bold] set a1 [a BLACK white bold]
@ -1768,7 +1768,7 @@ tcl::namespace::eval punk::char {
} }
set i 1 set i 1
append out \n $prefix $charsetname append out \n $prefix $charsetname
append out \n append out \n
set marker_line $prefix set marker_line $prefix
set line $prefix set line $prefix
@ -1847,7 +1847,7 @@ tcl::namespace::eval punk::char {
} else { } else {
set charset_dict [charset_dictget $charsetname] set charset_dict [charset_dictget $charsetname]
} }
set col_items_short [list] set col_items_short [list]
set col_items_desc [list] set col_items_desc [list]
tcl::dict::for {k inf} $charset_dict { tcl::dict::for {k inf} $charset_dict {
@ -1873,7 +1873,7 @@ tcl::namespace::eval punk::char {
return $out return $out
} }
#use console cursor movements to test and cache the column-width of each char in the set of characters returned by the search criteria #use console cursor movements to test and cache the column-width of each char in the set of characters returned by the search criteria
proc charset_calibrate {namesearch args} { proc charset_calibrate {namesearch args} {
variable charsets variable charsets
variable charinfo variable charinfo
@ -1909,7 +1909,7 @@ tcl::namespace::eval punk::char {
set twidth [tcl::dict::get $charinfo $dec testwidth] set twidth [tcl::dict::get $charinfo $dec testwidth]
} }
if {$twidth eq ""} { if {$twidth eq ""} {
#puts -nonewline stdout "." ;#this #puts -nonewline stdout "." ;#this
set width [char_info_testwidth $ch] ;#based on console test rather than unicode props set width [char_info_testwidth $ch] ;#based on console test rather than unicode props
tcl::dict::set charinfo $dec testwidth $width tcl::dict::set charinfo $dec testwidth $width
} else { } else {
@ -1925,10 +1925,10 @@ tcl::namespace::eval punk::char {
#maint warning - also in overtype! #maint warning - also in overtype!
#intended for single grapheme - but will work for multiple #intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines #cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy) #(a cache of ansifreestring_width calls - as these are quite regex heavy)
#review - effective memory leak on longrunning programs if never cleared #review - effective memory leak on longrunning programs if never cleared
#tradeoff in fragmenting cache and reducing efficiency vs ability to clear in a scoped manner #tradeoff in fragmenting cache and reducing efficiency vs ability to clear in a scoped manner
proc grapheme_width_cached {ch {key ""}} { proc grapheme_width_cached {ch {key ""}} {
variable grapheme_widths variable grapheme_widths
#if key eq "*" - we won't be able to clear that cache individually. Perhaps that's ok #if key eq "*" - we won't be able to clear that cache individually. Perhaps that's ok
@ -1975,7 +1975,7 @@ tcl::namespace::eval punk::char {
# - compare with wcswidth returning -1 for entire string containing such in python,perl # - compare with wcswidth returning -1 for entire string containing such in python,perl
proc wcswidth {string} { proc wcswidth {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length) #faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?) #..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#Tcl initial evaluation stack size is 2000 (? review) #Tcl initial evaluation stack size is 2000 (? review)
#REVIEW - when we cater for grapheme clusters - we can't just split the string at arbitrary points like this!! #REVIEW - when we cater for grapheme clusters - we can't just split the string at arbitrary points like this!!
set chunksize 2000 set chunksize 2000
@ -1984,14 +1984,14 @@ tcl::namespace::eval punk::char {
set startidx 0 set startidx 0
set endidx [expr {$startidx + $chunksize -1}] set endidx [expr {$startidx + $chunksize -1}]
for {set i 0} {$i < $chunks_required} {incr i} { for {set i 0} {$i < $chunks_required} {incr i} {
set chunk [tcl::string::range $string $startidx $endidx] set chunk [tcl::string::range $string $startidx $endidx]
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes { foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} { if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? #review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth #todo - compare with python or other lang wcwidth
incr width incr width
} elseif {$c < 917504 || $c > 917631} { } elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars #TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c] set w [textutil::wcswidth_char $c]
@ -2023,7 +2023,7 @@ tcl::namespace::eval punk::char {
set graphemes [list] set graphemes [list]
while {$i < [tcl::string::length $string]} { while {$i < [tcl::string::length $string]} {
set aftercluster [tk::endOfCluster $string $i] set aftercluster [tk::endOfCluster $string $i]
lappend graphemes [string range $string $i $aftercluster-1] lappend graphemes [string range $string $i $aftercluster-1]
set i $aftercluster set i $aftercluster
} }
return $graphemes return $graphemes
@ -2052,15 +2052,15 @@ tcl::namespace::eval punk::char {
} }
} }
incr width $gw incr width $gw
#if {[string first \u200d $g] >=0} { #if {[string first \u200d $g] >=0} {
# incr width 2 # incr width 2
#} else { #} else {
# #other joiners??? # #other joiners???
# incr width [wcswidth_unclustered $g] # incr width [wcswidth_unclustered $g]
#} #}
} else { } else {
incr width [wcswidth_unclustered $g] incr width [wcswidth_unclustered $g]
} }
set i $aftercluster set i $aftercluster
} }
@ -2071,8 +2071,8 @@ tcl::namespace::eval punk::char {
scan $char %c dec scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? #review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth #todo - compare with python or other lang wcwidth
return 1 return 1
} elseif {$dec < 917504 || $dec > 917631} { } elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars #TODO - various other joiners and non-printing chars
return [textutil::wcswidth_char $dec] ;#note textutil::wcswidth_char takes a decimal codepoint! return [textutil::wcswidth_char $dec] ;#note textutil::wcswidth_char takes a decimal codepoint!
@ -2086,8 +2086,8 @@ tcl::namespace::eval punk::char {
scan $c %c dec scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? #review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth #todo - compare with python or other lang wcwidth
incr width incr width
} elseif {$dec < 917504 || $dec > 917631} { } elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars #TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2105,7 +2105,7 @@ tcl::namespace::eval punk::char {
# - compare with wcswidth returning -1 for entire string containing such in python,perl # - compare with wcswidth returning -1 for entire string containing such in python,perl
proc wcswidth_unclustered {string} { proc wcswidth_unclustered {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length) #faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?) #..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#Tcl initial evaluation stack size is 2000 (? review) #Tcl initial evaluation stack size is 2000 (? review)
#we can only split the string at arbitrary points like this because we are specifically dealing with input that has no clusters!. #we can only split the string at arbitrary points like this because we are specifically dealing with input that has no clusters!.
set chunksize 2000 set chunksize 2000
@ -2114,14 +2114,14 @@ tcl::namespace::eval punk::char {
set startidx 0 set startidx 0
set endidx [expr {$startidx + $chunksize -1}] set endidx [expr {$startidx + $chunksize -1}]
for {set i 0} {$i < $chunks_required} {incr i} { for {set i 0} {$i < $chunks_required} {incr i} {
set chunk [tcl::string::range $string $startidx $endidx] set chunk [tcl::string::range $string $startidx $endidx]
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]] set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes { foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} { if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? #review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth #todo - compare with python or other lang wcwidth
incr width incr width
} elseif {$dec < 917504 || $dec > 917631} { } elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars #TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] set w [textutil::wcswidth_char $dec]
@ -2141,8 +2141,8 @@ tcl::namespace::eval punk::char {
proc wcswidth0 {string} { proc wcswidth0 {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length) #faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+ #..but - 'scan' is horrible for 400K+
#TODO #TODO
set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]] set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]]
set width 0 set width 0
foreach dec $codes { foreach dec $codes {
@ -2150,9 +2150,9 @@ tcl::namespace::eval punk::char {
if {$dec < 917504 || $dec > 917631} { if {$dec < 917504 || $dec > 917631} {
if {$dec <= 255} { if {$dec <= 255} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ?? #review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth #todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} { if {!($dec < 31 || $dec == 127)} {
incr width incr width
} }
} else { } else {
#TODO - various other joiners and non-printing chars #TODO - various other joiners and non-printing chars
@ -2179,11 +2179,11 @@ tcl::namespace::eval punk::char {
#prerequisites - no ansi escapes - no newlines - utf8 encoding assumed #prerequisites - no ansi escapes - no newlines - utf8 encoding assumed
#review - what about \r \t \b ? #review - what about \r \t \b ?
#NO processing of \b - already handled in ansi::printing_length which then calls this #NO processing of \b - already handled in ansi::printing_length which then calls this
#this version breaks string into sequences of ascii vs unicode #this version breaks string into sequences of ascii vs unicode
proc ansifreestring_width {text} { proc ansifreestring_width {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines #caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
#we can c0 control characters after or while processing ansi escapes. #we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} { #if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2204,11 +2204,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} { #if {[regexp $re_leading_diacritic $text]} {
# set text " $text" # set text " $text"
#} #}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""] set text [regsub -all $re_diacritics $text ""]
# -- --- --- --- --- --- --- # -- --- --- --- --- --- ---
@ -2221,10 +2221,10 @@ tcl::namespace::eval punk::char {
#for now - strip them out #for now - strip them out
#ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length #ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u200b zero width space #ZWSP \u200b zero width space
#\uFFEFBOM/ ZWNBSP and others that should be zero width #\uFFEFBOM/ ZWNBSP and others that should be zero width
#todo - work out proper way to mark/group zero width. #todo - work out proper way to mark/group zero width.
#set text [tcl::string::map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text] #set text [tcl::string::map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text]
set text [tcl::string::map [list \u200b "" \u200c "" \u200d ""] $text] set text [tcl::string::map [list \u200b "" \u200c "" \u200d ""] $text]
@ -2241,7 +2241,7 @@ tcl::namespace::eval punk::char {
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
#some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all #some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all
#we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here #we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here
#they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function) #they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function)
set text [regsub -all {[\u0080-\u009f]+} $text ""] set text [regsub -all {[\u0080-\u009f]+} $text ""]
@ -2262,7 +2262,7 @@ tcl::namespace::eval punk::char {
#set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525] #set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work #tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11 #fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only #set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters #maintain unicode as sequences - todo - scan for grapheme clusters
#set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0100-\U10FFFF\])+" $text] #set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0100-\U10FFFF\])+" $text]
@ -2291,7 +2291,7 @@ tcl::namespace::eval punk::char {
#we can c0 control characters after or while processing ansi escapes. #we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} { #if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2312,11 +2312,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} { #if {[regexp $re_leading_diacritic $text]} {
# set text " $text" # set text " $text"
#} #}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""] set text [regsub -all $re_diacritics $text ""]
#review #review
@ -2325,7 +2325,7 @@ tcl::namespace::eval punk::char {
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length #ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space #ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
@ -2343,10 +2343,10 @@ tcl::namespace::eval punk::char {
} }
#review - wcswidth should detect these #review - wcswidth should detect these
set re_ascii_fullwidth {[\uFF01-\uFF5e]} set re_ascii_fullwidth {[\uFF01-\uFF5e]}
set doublewidth_char_count 0 set doublewidth_char_count 0
set zerowidth_char_count 0 set zerowidth_char_count 0
#split just to get the standalone character widths - and then scan for other combiners (?) #split just to get the standalone character widths - and then scan for other combiners (?)
#review #review
#set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525] #set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
@ -2354,7 +2354,7 @@ tcl::namespace::eval punk::char {
#fixed in bug-4ed788c618 2023-11 #fixed in bug-4ed788c618 2023-11
#set uc_sequences [regexp -all -inline -indices {[\u0100-\U10FFFF]} $text] #set uc_sequences [regexp -all -inline -indices {[\u0100-\U10FFFF]} $text]
#set uc_sequences [regexp -all -inline -indices "\[\u0100-\U10FFFF\]" $text] ;#e.g for string: \U0001f9d1abc\U001f525ab returns {0 0} {4 4} #set uc_sequences [regexp -all -inline -indices "\[\u0100-\U10FFFF\]" $text] ;#e.g for string: \U0001f9d1abc\U001f525ab returns {0 0} {4 4}
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars { foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} { if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count incr doublewidth_char_count
@ -2364,12 +2364,12 @@ tcl::namespace::eval punk::char {
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs. # b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic) #(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run. # c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here # d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals #Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop #further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split #todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode. # to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results #- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} { if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c] set width [char_info_testwidth_cached $c]
} else { } else {
@ -2378,7 +2378,7 @@ tcl::namespace::eval punk::char {
set width [textutil::wcswidth_char [scan $c %c]] set width [textutil::wcswidth_char [scan $c %c]]
} }
if {$width == 0} { if {$width == 0} {
incr zerowidth_char_count incr zerowidth_char_count
} elseif {$width == 2} { } elseif {$width == 2} {
incr doublewidth_char_count incr doublewidth_char_count
} }
@ -2395,7 +2395,7 @@ tcl::namespace::eval punk::char {
#we can c0 control characters after or while processing ansi escapes. #we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} { #if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2416,11 +2416,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} { #if {[regexp $re_leading_diacritic $text]} {
# set text " $text" # set text " $text"
#} #}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""] set text [regsub -all $re_diacritics $text ""]
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
@ -2437,7 +2437,7 @@ tcl::namespace::eval punk::char {
return [tcl::string::length $text] return [tcl::string::length $text]
} }
#slow when ascii mixed with unicode (but why?) #slow when ascii mixed with unicode (but why?)
return [punk::char::wcswidth $text] return [punk::char::wcswidth $text]
} }
#This shouldn't be called on text containing ansi codes! #This shouldn't be called on text containing ansi codes!
@ -2516,11 +2516,11 @@ tcl::namespace::eval punk::char {
return [format $fmt {*}$declist] return [format $fmt {*}$declist]
} }
#split into plaintext and runs of combiners (combining diacritical marks - not ZWJ or ZWJNJ) #split into plaintext and runs of combiners (combining diacritical marks - not ZWJ or ZWJNJ)
proc combiner_split {text} { proc combiner_split {text} {
#split into odd numbered list (or zero) in a similar way to punk::ansi::ta::_perlish_split #split into odd numbered list (or zero) in a similar way to punk::ansi::ta::_perlish_split
# #
#set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set graphemes [list] set graphemes [list]
if {[tcl::string::length $text] == 0} { if {[tcl::string::length $text] == 0} {
return {} return {}
@ -2528,12 +2528,12 @@ tcl::namespace::eval punk::char {
set list [list] set list [list]
set start 0 set start 0
set strlen [tcl::string::length $text] set strlen [tcl::string::length $text]
#make sure our regexes aren't non-greedy - or we may not have exit condition for loop #make sure our regexes aren't non-greedy - or we may not have exit condition for loop
#review #review
while {$start < $strlen && [regexp -start $start -indices -- {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} $text match]} { while {$start < $strlen && [regexp -start $start -indices -- {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} $text match]} {
lassign $match matchStart matchEnd lassign $match matchStart matchEnd
#puts "->start $start ->match $matchStart $matchEnd" #puts "->start $start ->match $matchStart $matchEnd"
lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd] lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd]
set start [expr {$matchEnd+1}] set start [expr {$matchEnd+1}]
} }
lappend list [tcl::string::range $text $start end] lappend list [tcl::string::range $text $start end]
@ -2547,7 +2547,7 @@ tcl::namespace::eval punk::char {
#This is difficult in Tcl without unicode property based Character Classes in the regex engine #This is difficult in Tcl without unicode property based Character Classes in the regex engine
#review - this needs to be performant - it is used a lot by punk terminal/ansi features #review - this needs to be performant - it is used a lot by punk terminal/ansi features
#todo - trie data structures for unicode? #todo - trie data structures for unicode?
#for now we can at least combine diacritics #for now we can at least combine diacritics
#should also handle the ZWJ (and the variation selectors? eg \uFE0F) character which should account for emoji clusters #should also handle the ZWJ (and the variation selectors? eg \uFE0F) character which should account for emoji clusters
#Note - emoji cluster could be for example 11 code points/41 bytes (family emoji with skin tone modifiers for each member, 3 ZWJs) #Note - emoji cluster could be for example 11 code points/41 bytes (family emoji with skin tone modifiers for each member, 3 ZWJs)
#This still leaves a whole class of clusters.. korean etc unhandled :/ #This still leaves a whole class of clusters.. korean etc unhandled :/
@ -2560,7 +2560,7 @@ tcl::namespace::eval punk::char {
set clist [split $pt ""] set clist [split $pt ""]
lappend graphemes {*}[lrange $clist 0 end-1] lappend graphemes {*}[lrange $clist 0 end-1]
lappend graphemes [tcl::string::cat [lindex $clist end] $combiners] lappend graphemes [tcl::string::cat [lindex $clist end] $combiners]
} }
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} { if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[split [lindex $csplits end] ""] lappend graphemes {*}[split [lindex $csplits end] ""]
@ -2575,7 +2575,7 @@ tcl::namespace::eval punk::char {
set combiner_decs [scan $combiners [tcl::string::repeat %c [tcl::string::length $combiners]]] set combiner_decs [scan $combiners [tcl::string::repeat %c [tcl::string::length $combiners]]]
lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs] lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs]
lappend graphemes {*}$pt_decs lappend graphemes {*}$pt_decs
} }
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} { if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[scan [lindex $csplits end] [tcl::string::repeat %c [tcl::string::length [lindex $csplits end]]]] lappend graphemes {*}[scan [lindex $csplits end] [tcl::string::repeat %c [tcl::string::length [lindex $csplits end]]]]
@ -2592,7 +2592,7 @@ tcl::namespace::eval punk::char {
lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs] lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs]
} }
lappend graphemes {*}$pt_decs lappend graphemes {*}$pt_decs
} }
return $graphemes return $graphemes
} }
proc grapheme_split2 {text} { proc grapheme_split2 {text} {
@ -2601,7 +2601,7 @@ tcl::namespace::eval punk::char {
foreach {pt combiners} [lrange $csplits 0 end-1] { foreach {pt combiners} [lrange $csplits 0 end-1] {
set clist [split $pt ""] set clist [split $pt ""]
lappend graphemes {*}[lrange $clist 0 end-1] [tcl::string::cat [lindex $clist end] $combiners] lappend graphemes {*}[lrange $clist 0 end-1] [tcl::string::cat [lindex $clist end] $combiners]
} }
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} { if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[split [lindex $csplits end] ""] lappend graphemes {*}[split [lindex $csplits end] ""]
@ -2645,10 +2645,10 @@ tcl::namespace::eval punk::char {
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready ## Ready
package provide punk::char [tcl::namespace::eval punk::char { package provide punk::char [tcl::namespace::eval punk::char {
variable version variable version
set version 999999.0a1.0 set version 999999.0a1.0
}] }]
return return

2
src/modules/punk/console-999999.0a1.0.tm

@ -777,7 +777,7 @@ namespace eval punk::console {
set extension [lindex [split $waitvar($callid) -] 1] set extension [lindex [split $waitvar($callid) -] 1]
if {$extension eq ""} { if {$extension eq ""} {
puts "blank extension $waitvar($callid)" puts "blank extension $waitvar($callid)"
puts "->[set $waitvar($callid]<-" puts "->[set $waitvar($callid)]<-"
} }
puts stderr "get_ansi_response_payload Extending timeout by $extension" puts stderr "get_ansi_response_payload Extending timeout by $extension"
after cancel $timeoutid($callid) after cancel $timeoutid($callid)

51
src/modules/punk/icomm-999999.0a1.0.tm

@ -21,7 +21,7 @@
#[manpage_begin shellspy_module_punk::icomm 0 999999.0a1.0] #[manpage_begin shellspy_module_punk::icomm 0 999999.0a1.0]
#[copyright "2025"] #[copyright "2025"]
#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] #[titledesc {Module API}] [comment {-- Name section and table of contents description --}]
#[moddesc {-}] [comment {-- Description at end of page heading --}] #[moddesc {-}] [comment {-- Description at end of page heading --}]
#[require punk::icomm] #[require punk::icomm]
#[keywords module] #[keywords module]
#[description] #[description]
@ -107,7 +107,7 @@ package require punk::args
# #
# Note that the actual code was changed in several places (Reordered, # Note that the actual code was changed in several places (Reordered,
# eval speedup) # eval speedup)
# #
# comm works just like Tk's send, except that it uses sockets. # comm works just like Tk's send, except that it uses sockets.
# These commands work just like "send" and "winfo interps": # These commands work just like "send" and "winfo interps":
# #
@ -116,7 +116,7 @@ package require punk::args
# #
# See the manual page comm.n for further details on this package. # See the manual page comm.n for further details on this package.
package require Tcl 8.6- package require Tcl 8.6-
package require snit ; # comm::future objects. package require snit ; # comm::future objects.
namespace eval ::punk::icomm { namespace eval ::punk::icomm {
@ -196,7 +196,7 @@ namespace eval ::punk::icomm {
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools #*** !doctools
#[subsection {Namespace punk::icomm}] #[subsection {Namespace punk::icomm}]
#[para] Core API functions for punk::icomm #[para] Core API functions for punk::icomm
#[list_begin definitions] #[list_begin definitions]
variable PUNKARGS variable PUNKARGS
@ -306,7 +306,7 @@ namespace eval ::punk::icomm {
## API: Setup async result generation for a remotely invoked command. ## API: Setup async result generation for a remotely invoked command.
# (future,fid,<fid>) -> list (future) # (future,fid,<fid>) -> list (future)
# (current,async) -> bool (default 0) # (current,async) -> bool (default 0)
# (current,state) -> list (chan fid cmd ser) # (current,state) -> list (chan fid cmd ser)
proc comm_cmd_return_async {chan} { proc comm_cmd_return_async {chan} {
@ -711,7 +711,6 @@ namespace eval ::punk::icomm {
# #
# Results: # Results:
# None. # None.
proc commConfigure {chan {force 0} args} { proc commConfigure {chan {force 0} args} {
variable comm variable comm
@ -891,7 +890,7 @@ namespace eval ::punk::icomm {
#treat as always connected - call commIncoming imediately. #treat as always connected - call commIncoming imediately.
punk::icomm::commIncoming $chan $comm($chan,tclchan) "localaddr" "localtclchan" punk::icomm::commIncoming $chan $comm($chan,tclchan) "localaddr" "localtclchan"
return return
} }
#------------------------- #-------------------------
@ -1133,7 +1132,7 @@ namespace eval ::punk::icomm {
} }
if {![info exists vers]} { if {![info exists vers]} {
close $fid close $fid
if {[info exists comm($chan,silent)] && if {[info exists comm($chan,silent)] &&
[string is true -strict $comm($chan,silent)]} { [string is true -strict $comm($chan,silent)]} {
return return
} }
@ -1382,7 +1381,7 @@ namespace eval ::punk::icomm {
# Unpack the indices, then extract the word. # Unpack the indices, then extract the word.
#foreach {s e step} $cmdrange break #foreach {s e step} $cmdrange break
lassign $cmdrange s e step lassign $cmdrange s e step
set cmd [string range $data $s $e] set cmd [string range $data $s $e]
commDebug {puts stderr "<$chan> cmd <$data>"} commDebug {puts stderr "<$chan> cmd <$data>"}
if {[string equal "" $cmd]} break if {[string equal "" $cmd]} break
@ -1849,7 +1848,7 @@ namespace eval ::punk::icomm {
# backslash-quoted braces we look for double-backslashes # backslash-quoted braces we look for double-backslashes
# as well and skip them. Without this a string like '{puts # as well and skip them. Without this a string like '{puts
# \\}' will incorrectly find a \} at the end, missing the # \\}' will incorrectly find a \} at the end, missing the
# end of the word. # end of the word.
set re {((\\\\)|([{}])|(\\[{}]))} ;#split out for dumb editor to fix highlighting set re {((\\\\)|([{}])|(\\[{}]))} ;#split out for dumb editor to fix highlighting
# ^^ ^ ^ # ^^ ^ ^
# |\\ regular \quoted # |\\ regular \quoted
@ -2018,14 +2017,14 @@ tcl::namespace::eval punk::icomm::lib {
tcl::namespace::path [tcl::namespace::parent] tcl::namespace::path [tcl::namespace::parent]
#*** !doctools #*** !doctools
#[subsection {Namespace punk::icomm::lib}] #[subsection {Namespace punk::icomm::lib}]
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} { #proc utility1 {p1 args} {
# #*** !doctools # #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1 # #[para]Description of utility1
# return 1 # return 1
#} #}
@ -2043,16 +2042,16 @@ tcl::namespace::eval punk::icomm::lib {
#tcl::namespace::eval punk::icomm::system { #tcl::namespace::eval punk::icomm::system {
#*** !doctools #*** !doctools
#[subsection {Namespace punk::icomm::system}] #[subsection {Namespace punk::icomm::system}]
#[para] Internal functions that are not part of the API #[para] Internal functions that are not part of the API
#} #}
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# Sample 'about' function with punk::args documentation # Sample 'about' function with punk::args documentation
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
tcl::namespace::eval punk::icomm { tcl::namespace::eval punk::icomm {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS variable PUNKARGS
@ -2061,7 +2060,7 @@ tcl::namespace::eval punk::icomm {
lappend PUNKARGS [list { lappend PUNKARGS [list {
@id -id "(package)punk::icomm" @id -id "(package)punk::icomm"
@package -name "punk::icomm" -help\ @package -name "punk::icomm" -help\
"taken from tcllib comm package "taken from tcllib comm package
todo - describe changes" todo - describe changes"
}] }]
@ -2076,7 +2075,7 @@ tcl::namespace::eval punk::icomm {
set about_topics [list] set about_topics [list]
foreach f $topic_funs { foreach f $topic_funs {
set tail [namespace tail $f] set tail [namespace tail $f]
lappend about_topics [string range $tail [string length get_topic_] end] lappend about_topics [string range $tail [string length get_topic_] end]
} }
#Adjust this function or 'default_topics' if a different order is required #Adjust this function or 'default_topics' if a different order is required
return [lsort $about_topics] return [lsort $about_topics]
@ -2084,11 +2083,11 @@ tcl::namespace::eval punk::icomm {
proc default_topics {} {return [list Description *]} proc default_topics {} {return [list Description *]}
# ------------------------------------------------------------- # -------------------------------------------------------------
# get_topic_ functions add more to auto-include in about topics # get_topic_ functions add more to auto-include in about topics
# ------------------------------------------------------------- # -------------------------------------------------------------
proc get_topic_Description {} { proc get_topic_Description {} {
punk::args::lib::tstr [string trim { punk::args::lib::tstr [string trim {
package punk::icomm package punk::icomm
description to come.. description to come..
} \n] } \n]
} }
@ -2122,9 +2121,9 @@ tcl::namespace::eval punk::icomm {
# we re-use the argument definition from punk::args::standard_about and override some items # we re-use the argument definition from punk::args::standard_about and override some items
set overrides [dict create] set overrides [dict create]
dict set overrides @id -id "::punk::icomm::about" dict set overrides @id -id "::punk::icomm::about"
dict set overrides @cmd -name "punk::icomm::about" dict set overrides @cmd -name "punk::icomm::about"
dict set overrides @cmd -help [string trim [punk::args::lib::tstr { dict set overrides @cmd -help [string trim [punk::args::lib::tstr {
About punk::icomm About punk::icomm
}] \n] }] \n]
dict set overrides topic -choices [list {*}[punk::icomm::argdoc::about_topics] *] dict set overrides topic -choices [list {*}[punk::icomm::argdoc::about_topics] *]
dict set overrides topic -choicerestricted 1 dict set overrides topic -choicerestricted 1
@ -2140,7 +2139,7 @@ tcl::namespace::eval punk::icomm {
} }
} }
# end of sample 'about' function # end of sample 'about' function
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -2155,11 +2154,11 @@ namespace eval ::punk::args::register {
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready ## Ready
package provide punk::icomm [tcl::namespace::eval punk::icomm { package provide punk::icomm [tcl::namespace::eval punk::icomm {
variable pkg punk::icomm variable pkg punk::icomm
variable version variable version
set version 999999.0a1.0 set version 999999.0a1.0
}] }]
return return

3
src/modules/punk/jtest.tcl

@ -41,4 +41,5 @@
defaultSilent 0 defaultSilent 0
} }
#test #test
set x blah set x blah

407
src/modules/punk/repl-999999.0a1.0.tm

File diff suppressed because it is too large Load Diff

34
tclint.toml

@ -0,0 +1,34 @@
# patterns to exclude when searching directories. defaults to empty list.
# follows gitignore pattern format: https://git-scm.com/docs/gitignore#_pattern_format
# the one exception is that a leading "#" character will be automatically escaped
#exclude = ["ignore_me/", "ignore*.tcl", "/ignore_from_here"]
# lint violations to ignore. defaults to empty list.
# can also supply an inline table with a path and a list of violations to ignore under that path.
#ignore = [
# "unbraced-expr",
# { path = "files_with_long_lines/", rules = ["line-length"] }
#]
# extensions of files to lint when searching directories. defaults to tcl, sdc,
# xdc, and upf.
extensions = ["tcl", "tm", "sdc"]
# path to command spec defining tool-specific commands and arguments, generated by
# `tclint-plugins make-spec`.
#commands = "~/.tclint/openroad.json"
# with the exception of line-length, the [style] settings affect tclfmt rather than tclint.
[style]
# number of spaces to indent. can also be set to "tab". defaults to 4.
#indent = 2
# maximum allowed line length. defaults to 100.
line-length = 400
# maximum allowed number of consecutive blank lines. defaults to 2.
max-blank-lines = 10
# whether to require indenting of "namespace eval" blocks. defaults to true.
#indent-namespace-eval = false
# whether to expect a single space (true) or no spaces (false) surrounding the contents of a braced expression or script argument.
# defaults to false.
#spaces-in-braces = true
Loading…
Cancel
Save