Browse Source

whitespace changes and tclint.toml config for tclint LSP

master
Julian Noble 3 weeks ago
parent
commit
e8c1de935e
  1. 6
      src/modules/canaryspace-999999.0a1.0.tm
  2. 25
      src/modules/punk-0.1.tm
  3. 270
      src/modules/punk/char-999999.0a1.0.tm
  4. 2
      src/modules/punk/console-999999.0a1.0.tm
  5. 51
      src/modules/punk/icomm-999999.0a1.0.tm
  6. 3
      src/modules/punk/jtest.tcl
  7. 407
      src/modules/punk/repl-999999.0a1.0.tm
  8. 34
      tclint.toml

6
src/modules/canaryspace-999999.0a1.0.tm

@ -13,8 +13,8 @@
# Meta summary Diagnostic tool for namespace navigation/introspection to help avoid command conflicts.
# Meta description canaryspace loads the ::canaryspace namespace with wrappers for the set of commands
# Meta description that exist in the global namespace :: at the time the canaryspace package is loaded.
# Meta description These commands just emit info to stderr to assist in determining whether calls are
# Meta description unintentionally being run in the namespace.
# Meta description These commands just emit info to stderr to assist in determining whether calls are
# Meta description unintentionally being run in the namespace.
# Meta description This is often the case with commands which use uplevel 1 or similar constructs to call
# Meta description code in the callers namespace. If such commands need to run in arbitrary namespaces
# Meta description which may have arbitrary commands then uplevelled commands may need to be prefixed with
@ -68,6 +68,6 @@ namespace eval canaryspace {
## Ready
package provide canaryspace [namespace eval canaryspace {
::variable version
::set version 999999.0a1.0
::set version 999999.0a1.0
}]
return

25
src/modules/punk-0.1.tm

@ -1583,7 +1583,7 @@ namespace eval punk {
}
%# {
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%# not string length is not supported"
}
#string length - REVIEW -
@ -1595,7 +1595,7 @@ namespace eval punk {
%%# {
#experimental
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%%# not string length is not supported"
}
#string length - REVIEW -
@ -1606,7 +1606,7 @@ namespace eval punk {
}
%str {
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%str - not string-get is not supported"
}
lappend INDEX_OPERATIONS string-get
@ -1617,7 +1617,7 @@ namespace eval punk {
%sp {
#experimental
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%sp - not string-space is not supported"
}
lappend INDEX_OPERATIONS string-space
@ -1628,7 +1628,7 @@ namespace eval punk {
%empty {
#experimental
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%empty - not string-empty is not supported"
}
lappend INDEX_OPERATIONS string-empty
@ -1638,7 +1638,7 @@ namespace eval punk {
}
@words {
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%words - not list-words-from-string is not supported"
}
lappend INDEX_OPERATIONS list-words-from-string
@ -1650,7 +1650,7 @@ namespace eval punk {
#experimental - leading character based on result not input(?)
#input type is string - but output is list
set active_key_type "list"
if $get_not {
if {$get_not} {
error "!%chars - not list-chars-from-string is not supported"
}
lappend INDEX_OPERATIONS list-from_chars
@ -1662,7 +1662,7 @@ namespace eval punk {
#experimental - flatten one level of list
#join without arg - output is list
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!@join - not list-join-list is not supported"
}
lappend INDEX_OPERATIONS list-join-list
@ -1674,7 +1674,7 @@ namespace eval punk {
#experimental
#input type is list - but output is string
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%join - not string-join-list is not supported"
}
lappend INDEX_OPERATIONS string-join-list
@ -1684,7 +1684,7 @@ namespace eval punk {
}
%ansiview {
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%# not string-ansiview is not supported"
}
lappend INDEX_OPERATIONS string-ansiview
@ -1694,7 +1694,7 @@ namespace eval punk {
}
%ansiviewstyle {
set active_key_type "string"
if $get_not {
if {$get_not} {
error "!%# not string-ansiviewstyle is not supported"
}
lappend INDEX_OPERATIONS string-ansiviewstyle
@ -5368,6 +5368,7 @@ namespace eval punk {
#for var="val {a b c}"
#proc ::punk::val {{v {}}} {tailcall lindex $v}
#proc ::punk::val {{v {}}} {return $v} ;#2023 - approx 2x faster than the tailcall lindex version
#tclint-disable-next-line
proc ::punk::val [list [list v [purelist]]] {return $v}
#----------------
@ -7437,7 +7438,7 @@ namespace eval punk {
foreach v $known_punk {
set c1 [overtype::left $col1 $v]
if {[info exists ::env($v)]} {
set c2 [overtype::left $col2 [set ::env($v)]
set c2 [overtype::left $col2 [set ::env($v)]]
} else {
set c2 [overtype::right $col2 "(NOT SET)"]
}

270
src/modules/punk/char-999999.0a1.0.tm

@ -19,7 +19,7 @@
#[manpage_begin punkshell_module_punk::char 0 999999.0a1.0]
#[copyright "2024"]
#[titledesc {character-set and unicode utilities}] [comment {-- Name section and table of contents description --}]
#[moddesc {character-set nad unicode}] [comment {-- Description at end of page heading --}]
#[moddesc {character-set nad unicode}] [comment {-- Description at end of page heading --}]
#[require punk::char]
#[keywords module encodings]
#[description]
@ -49,11 +49,11 @@
#*** !doctools
#[item] [package {overtype}]
#[para] -
#[para] -
#[item] [package {textblock}]
#[para] -
#[para] -
#[item] [package console]
#[para] -
#[para] -
package require Tcl 8.6-
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
@ -92,20 +92,20 @@ tcl::namespace::eval punk::char {
#just the 7-bit ascii. use [page ascii] for the 8-bit layout
proc ascii {} {return {
00 NUL 01 SOH 02 STX 03 ETX 04 EOT 05 ENQ 06 ACK 07 BEL
08 BS 09 HT 0a LF 0b VT 0c FF 0d CR 0e SO 0f SI
08 BS 09 HT 0a LF 0b VT 0c FF 0d CR 0e SO 0f SI
10 DLE 11 DC1 12 DC2 13 DC3 14 DC4 15 NAK 16 SYN 17 ETB
18 CAN 19 EM 1a SUB 1b ESC 1c FS 1d GS 1e RS 1f US
20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 '
28 ( 29 ) 2a * 2b + 2c , 2d - 2e . 2f /
30 0 31 1 32 2 33 3 34 4 35 5 36 6 37 7
38 8 39 9 3a : 3b ; 3c < 3d = 3e > 3f ?
40 @ 41 A 42 B 43 C 44 D 45 E 46 F 47 G
48 H 49 I 4a J 4b K 4c L 4d M 4e N 4f O
50 P 51 Q 52 R 53 S 54 T 55 U 56 V 57 W
58 X 59 Y 5a Z 5b [ 5c \ 5d ] 5e ^ 5f _
60 ` 61 a 62 b 63 c 64 d 65 e 66 f 67 g
68 h 69 i 6a j 6b k 6c l 6d m 6e n 6f o
70 p 71 q 72 r 73 s 74 t 75 u 76 v 77 w
18 CAN 19 EM 1a SUB 1b ESC 1c FS 1d GS 1e RS 1f US
20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 '
28 ( 29 ) 2a * 2b + 2c , 2d - 2e . 2f /
30 0 31 1 32 2 33 3 34 4 35 5 36 6 37 7
38 8 39 9 3a : 3b ; 3c < 3d = 3e > 3f ?
40 @ 41 A 42 B 43 C 44 D 45 E 46 F 47 G
48 H 49 I 4a J 4b K 4c L 4d M 4e N 4f O
50 P 51 Q 52 R 53 S 54 T 55 U 56 V 57 W
58 X 59 Y 5a Z 5b [ 5c \ 5d ] 5e ^ 5f _
60 ` 61 a 62 b 63 c 64 d 65 e 66 f 67 g
68 h 69 i 6a j 6b k 6c l 6d m 6e n 6f o
70 p 71 q 72 r 73 s 74 t 75 u 76 v 77 w
78 x 79 y 7a z 7b { 7c | 7d } 7e ~ 7f DEL
}}
@ -124,7 +124,7 @@ tcl::namespace::eval punk::char {
} elseif {[tcl::string::length $v] == 0} {
set v " "
}
append out "$k $v "
append out "$k $v "
if {$i > 0 && $i % 8 == 0} {
set out [tcl::string::range $out 0 end-2]
append out \n " "
@ -143,7 +143,7 @@ tcl::namespace::eval punk::char {
set out ""
append out [page dingbats] \n
set unicode_dict [charset_dictget Dingbats]
append out " "
set i 1
tcl::dict::for {k charinfo} $unicode_dict {
@ -155,7 +155,7 @@ tcl::namespace::eval punk::char {
} else {
set displayv $char
}
append out "$k $displayv "
append out "$k $displayv "
if {$i > 0 && $i % 8 == 0} {
set out [tcl::string::range $out 0 end-2]
append out \n " "
@ -205,7 +205,7 @@ tcl::namespace::eval punk::char {
tcl::dict::lappend d $encname $mname
}
}
}
}
foreach enc [lsort $encnames] {
set mime_enc [${encmimens}::mapencoding $enc]
if {$mime_enc ne ""} {
@ -236,7 +236,7 @@ tcl::namespace::eval punk::char {
tailcall page $encname {*}$args
}
#This will not display for example, c0 glyphs for cp437
#This will not display for example, c0 glyphs for cp437
# we could use the punk::ansi::cp437_map dict - but while that might be what some expect to see - it would probably be too much magic for this function - which is intended to align more with what Tcl's encoding convertfrom/to actually does.
# for nonprinting members of the page, 2 and 3 letter codes are used rather than unicode visualisation replacements or even unicode equivalent replacements known to be appropriate for the page
proc page {encname args} {
@ -255,7 +255,7 @@ tcl::namespace::eval punk::char {
#set d_ascii [pagedict_raw ascii]
set d_ascii [basedict]
set d_asciiposn [lreverse $d_ascii] ;#values should be unique except for "???". We are mainly interested in which ones have display-names e.g cr csi
set d_asciiposn [lreverse $d_ascii] ;#values should be unique except for "???". We are mainly interested in which ones have display-names e.g cr csi
#The results of this are best seen by comparing the ebcdic and ascii pages
set d_page [pagedict_raw $encname]
@ -285,7 +285,7 @@ tcl::namespace::eval punk::char {
set displayv $bytedisplay
} else {
if {[tcl::string::length $rawchar] == 0} {
set displayv " "
set displayv " "
} else {
#presumed 1
set displayv " $rawchar "
@ -294,7 +294,7 @@ tcl::namespace::eval punk::char {
}
}
append out "$k $displayv "
append out "$k $displayv "
if {$i > 0 && $i % $cols == 0} {
set out [tcl::string::range $out 0 end-2]
append out \n " "
@ -318,7 +318,7 @@ tcl::namespace::eval punk::char {
set outchar [encoding convertfrom $encpage $ch]
} else {
#here we will use \0xFFFD instead of our replacement string ??? - as the name pagechar implies always returning a single character. REVIEW.
set outchar $::punk::char::invalid_display_char
set outchar $::punk::char::invalid_display_char
}
return $outchar
}
@ -348,7 +348,7 @@ tcl::namespace::eval punk::char {
#set outchar [encoding convertto $encpage [format %c $num]]
set outchar [format %c $num]
} else {
set outchar $::punk::char::invalid_display_char
set outchar $::punk::char::invalid_display_char
}
return $outchar
}
@ -381,7 +381,7 @@ tcl::namespace::eval punk::char {
}
proc pagedict_raw {encname} {
variable invalid ;# ="???"
variable invalid ;# ="???"
set encname [encname $encname]
set d [tcl::dict::create]
for {set i 0} {$i < 256} {incr i} {
@ -409,7 +409,7 @@ tcl::namespace::eval punk::char {
tcl::dict::set d $k [tcl::dict::get $a128 $k]
} else {
#
tcl::dict::set d $k $invalid
tcl::dict::set d $k $invalid
}
if {$i <=32} {
@ -457,7 +457,7 @@ tcl::namespace::eval punk::char {
}
return $d
}
proc basedict {} {
#this gives same result independent of current value of 'encoding system'
set d [tcl::dict::create]
@ -529,10 +529,10 @@ tcl::namespace::eval punk::char {
return $d
}
#-- --- --- --- --- --- --- ---
#-- --- --- --- --- --- --- ---
# encoding convertfrom & encoding convertto can be somewhat confusing to think about. (Need to think in reverse.)
# e.g encoding convertto dingbats <somethingpretty> will output something that doesn't look dingbatty on screen.
#-- --- --- --- --- --- --- ---
#-- --- --- --- --- --- --- ---
#must use Tcl instead of tcl (at least for 8.6)
if {![package vsatisfies [package present Tcl] 8.7-]} {
proc encodable "s {enc [encoding system]}" {
@ -574,7 +574,7 @@ tcl::namespace::eval punk::char {
}
}
}
#-- --- --- --- --- --- --- ---
#-- --- --- --- --- --- --- ---
proc test_japanese {{encoding jis0208}} {
#A very basic test of 2char encodings such as jis0208
set yatbun 日本 ;# encoding convertfrom jis0208 F|K\\
@ -584,7 +584,7 @@ tcl::namespace::eval punk::char {
set ebun [encoding convertto $encoding $bun]
puts "$encoding encoded: ${eyat} ${ebun}"
puts "reencoded: [encoding convertfrom $encoding $eyat] [encoding convertfrom $encoding $ebun]"
return $yatbun
return $yatbun
}
proc test_grave {} {
set g [format %c 0x300]
@ -692,7 +692,7 @@ tcl::namespace::eval punk::char {
#ESC ( B ESC ) B ASCII Set
#ESC ( 0 ESC ) 0 Special Graphics
#ESC ( 1 ESC ) 1 Alternate Character ROM Standard Character Set
#ESC ( 2 ESC ) 2 Alternate Character ROM Special Graphic
#ESC ( 2 ESC ) 2 Alternate Character ROM Special Graphic
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Unicode character sets - some hardcoded - some loadable from data files
@ -700,7 +700,7 @@ tcl::namespace::eval punk::char {
variable charinfo [tcl::dict::create]
variable charsets [tcl::dict::create]
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Aggregate character sets (ones that pick various ranges from underlying unicode ranges)
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
@ -741,7 +741,7 @@ tcl::namespace::eval punk::char {
}
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# Unicode ranges
# Unicode ranges
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
# -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
tcl::dict::set charsets "greek" [list ranges [list {start 880 end 1023} {start 7936 end 8191}] description "Greek and Coptic" settype "other"]
@ -975,9 +975,9 @@ tcl::namespace::eval punk::char {
variable charset_extents_rangenames ;# dict keyed on start,end pointing to list of 2-tuples {setname rangeindex_within_set}
#build 2 indexes for each range.(charsets are not just unicode blocks so can have multiple ranges)
#Note that a range could be as small as a single char (startpoint = endpoint) so there can be many ranges with same start and end if charsets use some of the same subsets.
#as each charset_extents_startpoins,charset_extents_endpoints is built - the associated range name and index is appended to the rangenames dict
#startpoints - key is startpoint of a range, value is list of endpoints one for each range starting at this startpoint-key
#endpoints - key is endpoint of a range, value is list of startpoints one for each range ending at this endpoint-key
#as each charset_extents_startpoins,charset_extents_endpoints is built - the associated range name and index is appended to the rangenames dict
#startpoints - key is startpoint of a range, value is list of endpoints one for each range starting at this startpoint-key
#endpoints - key is endpoint of a range, value is list of startpoints one for each range ending at this endpoint-key
proc _build_charset_extents {} {
variable charsets
variable charset_extents_startpoints
@ -995,7 +995,7 @@ tcl::namespace::eval punk::char {
set end [tcl::dict::get [lindex $ranges 0] end]
if {![tcl::dict::exists $charset_extents_startpoints $start] || $end ni [tcl::dict::get $charset_extents_startpoints $start]} {
#assertion if end wasn't in startpoits list - then start won't be in endpoints list
tcl::dict::lappend charset_extents_startpoints $start $end
tcl::dict::lappend charset_extents_startpoints $start $end
tcl::dict::lappend charset_extents_endpoints $end $start
}
tcl::dict::lappend charset_extents_rangenames ${start},${end} [list $setname 1]
@ -1023,14 +1023,14 @@ tcl::namespace::eval punk::char {
#no need to sort charset_extents_rangenames - lookup only done using dict methods
return [tcl::dict::size $charset_extents_startpoints]
}
_build_charset_extents ;#rebuilds for all charsets
_build_charset_extents ;#rebuilds for all charsets
#nerdfonts are within the Private use E000 - F8FF range
proc load_nerdfonts {} {
variable charsets
variable charinfo
package require fileutil
set ver [package provide punk::char]
set ver [package provide punk::char]
if {$ver ne ""} {
set ifneeded [package ifneeded punk::char [package provide punk::char]]
#puts stderr "punk::char ifneeded script: $ifneeded"
@ -1038,7 +1038,7 @@ tcl::namespace::eval punk::char {
set basedir [file dirname [lindex $sourceinfo end]]
} else {
#review - will only work at package load time
set scr [info script]
set scr [info script]
if {$scr eq ""} {
error "load_nerdfonts unable to determine package folder"
}
@ -1048,7 +1048,7 @@ tcl::namespace::eval punk::char {
set fname [file join $pkg_data_dir nerd-fonts-glyph-list.txt]
if {[file exists $fname]} {
#puts stderr "load_nerdfonts loading $fname"
set data [fileutil::cat -translation binary $fname]
set data [fileutil::cat -translation binary $fname]
set short_seen [tcl::dict::create]
set current_set_range [tcl::dict::create]
set filesets_loading [list]
@ -1066,7 +1066,7 @@ tcl::namespace::eval punk::char {
dict unset charset $setname
}
set newrange [list start $dec end $dec]
tcl::dict::set current_set_range $setname $newrange
tcl::dict::set current_set_range $setname $newrange
tcl::dict::set charsets $setname [list ranges [list $newrange] description "nerd fonts $rawsetname" settype "nerdfonts privateuse"]
lappend filesets_loading $setname
@ -1080,13 +1080,13 @@ tcl::namespace::eval punk::char {
#overwrite last ranges element
set rangelist [lrange [tcl::dict::get $charsets $setname ranges] 0 end-1]
lappend rangelist [tcl::dict::get $current_set_range $setname]
tcl::dict::set charsets $setname ranges $rangelist
tcl::dict::set charsets $setname ranges $rangelist
} else {
#new range for set
tcl::dict::set current_set_range $setname start $dec
tcl::dict::set current_set_range $setname end $dec
set rangelist [tcl::dict::get $charsets $setname ranges]
lappend rangelist [tcl::dict::get $current_set_range $setname]
lappend rangelist [tcl::dict::get $current_set_range $setname]
tcl::dict::set charsets $setname ranges $rangelist
}
@ -1130,7 +1130,7 @@ tcl::namespace::eval punk::char {
proc package_base {} {
#assume punk::char is in .tm form and we can use the package provide statement to determine base location
#review
#review
set pkgver [package present punk::char]
set pkginfo [package ifneeded punk::char $pkgver]
set tmfile [lindex $pkginfo end]
@ -1156,7 +1156,7 @@ tcl::namespace::eval punk::char {
if {[tcl::dict::exists $dictValue {*}$keys]} {
return [tcl::dict::get $dictValue {*}$keys]
} else {
return [lindex $args end]
return [lindex $args end]
}
}
}
@ -1193,7 +1193,7 @@ tcl::namespace::eval punk::char {
if {[set pcolon [tcl::string::first ";" $ln]] > 0} {
set lhs [tcl::string::trim [tcl::string::range $ln 0 $pcolon-1]]
set name [tcl::string::trim [tcl::string::range $ln $pcolon+1 end]]
set lhsparts [split $lhs .]
set lhsparts [split $lhs .]
set start [lindex $lhsparts 0]
set end [lindex $lhsparts end]
#puts "$start -> $end '$name'"
@ -1207,9 +1207,9 @@ tcl::namespace::eval punk::char {
return $block_count
}
#unicode scripts
#unicode scripts
#unicode UnicodeData.txt
#unicode UnicodeData.txt
@ -1225,8 +1225,8 @@ tcl::namespace::eval punk::char {
# -- ---
#A = Ambiguous - All characters that can be sometimes wide and sometimes narrow. (wide in east asian legacy sets, narrow in non-east asian usage) (private use chars considered ambiguous)
#F = East Asian Full-width
#H = East Asian Half-width
#N = Not east Asian (Neutral) - all other characters. (do not occur in legacy East Asian character sets) - treated like Na
#H = East Asian Half-width
#N = Not east Asian (Neutral) - all other characters. (do not occur in legacy East Asian character sets) - treated like Na
#Na = East Asian Narrow - all other characters that are always narrow and have explicit full-width counterparts (e.g includes all of ASCII)
#W = East Asian Wide - all other characters that are always wide (Occur only in the context of Eas Asian Typography)
# -- ---
@ -1304,7 +1304,7 @@ tcl::namespace::eval punk::char {
set initial_fields $known_fields
if {"testwidth" ni $opt_fields} {
if {"testwidth" ni $opt_except} {
lappend opt_except testwidth
lappend opt_except testwidth
}
}
if {"char" ni $opt_fields} {
@ -1369,13 +1369,13 @@ tcl::namespace::eval punk::char {
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
set existing_testwidth ""
if {[tcl::dict::exists $charinfo $dec_char testwidth]} {
set existing_testwidth [tcl::dict::get $charinfo $dec_char testwidth]
set existing_testwidth [tcl::dict::get $charinfo $dec_char testwidth]
}
if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char]
set chwidth [char_info_testwidth $char]
tcl::dict::set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
tcl::dict::set charinfo $dec_char testwidth $chwidth
@ -1387,10 +1387,10 @@ tcl::namespace::eval punk::char {
set char [format %c $dec_char]
tcl::dict::set returninfo char $char
}
memberof {
memberof {
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
set memberof [list]
@ -1435,7 +1435,7 @@ tcl::namespace::eval punk::char {
set splen [tcl::dict::size $charset_extents_startpoints]
set eplen [tcl::dict::size $charset_extents_endpoints]
set s [lsearch -bisect -integer $skeys $dec]
set s_at_or_below [lrange $skeys 0 $s]
set s_at_or_below [lrange $skeys 0 $s]
set e_of_s [list]
foreach sk $s_at_or_below {
lappend e_of_s {*}[tcl::dict::get $charset_extents_startpoints $sk]
@ -1472,16 +1472,16 @@ tcl::namespace::eval punk::char {
set eps [list]
}
return [tcl::dict::create startpoints $splen endpoints $eplen midpoint [expr {floor($eplen/2)}] posn $e lhs_endpoints_to_check "[llength $reduced_endpoints]/[llength $e_of_s]=[llength $sps]ranges" rhs_startpoints_to_check "[llength $reduced_startpoints]/[llength $s_of_e]=[llength $eps]"]
}
#for just a few extra sets such as wgl4 and unicode blocks loaded - this gives e.g 5x + better performance than the simple search above, and up to twice as slow for tcl 8.6
#performance biased towards lower numbered characters (which is not too bad in the context of unicode)
#todo - could be tuned to perform better at end by assuming a fairly even distribution of ranges - and so searching startpoint ranges first for items left of middle, endpoint ranges first for items right of middle
#review with scripts loaded and more defined ranges..
#review with scripts loaded and more defined ranges..
#This algorithm supports arbitrary overlapping ranges and ranges with same start & endpoints
#Should be much better than O(n) for n sets except for pathological case of member of all or nearly-all intervals ?
#review - compare with 'interval tree' algorithms.
#review - compare with 'interval tree' algorithms.
proc char_info_dec_memberof {dec} {
variable charset_extents_startpoints
variable charset_extents_endpoints
@ -1563,7 +1563,7 @@ tcl::namespace::eval punk::char {
set matchcount 0
foreach glob $and_globs {
if {[tcl::string::match -nocase $glob $s] || [tcl::string::match -nocase $glob $d]} {
incr matchcount
incr matchcount
}
}
if {$matchcount == [llength $and_globs]} {
@ -1595,12 +1595,12 @@ tcl::namespace::eval punk::char {
}
#non-overlapping unicode blocks
#non-overlapping unicode blocks
proc char_blocks {{name_or_glob *}} {
variable charsets
#todo - more efficient datastructures?
if {![regexp {[?*]} $name_or_glob]} {
#no glob - just retrieve it
#no glob - just retrieve it
if {[tcl::dict::exists $charsets $name_or_glob]} {
if {[tcl::dict::get $charsets $name_or_glob settype] eq "block"} {
return [tcl::dict::create $name_or_glob [tcl::dict::get $charsets $name_or_glob]]
@ -1630,7 +1630,7 @@ tcl::namespace::eval punk::char {
proc charset_names {{name_or_glob *}} {
variable charsets
if {![regexp {[?*]} $name_or_glob]} {
#no glob - just retrieve it
#no glob - just retrieve it
if {[tcl::dict::exists $charsets $name_or_glob]} {
return [list $name_or_glob]
}
@ -1641,7 +1641,7 @@ tcl::namespace::eval punk::char {
}
} else {
if {$name_or_glob eq "*"} {
return [lsort [tcl::dict::keys $charsets]]
return [lsort [tcl::dict::keys $charsets]]
}
#tcl::dict::keys $dict <pattern> doesn't have option for case insensitive searches
return [lsort [lsearch -all -inline -nocase [tcl::dict::keys $charsets] $name_or_glob]]
@ -1655,7 +1655,7 @@ tcl::namespace::eval punk::char {
variable charsets
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [tcl::dict::keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [tcl::dict::keys $charsets]]
set sortedkeys [lsort -increasing [tcl::dict::keys $charsets]]
if {$namesearch eq "*"} {
return $sortedkeys
}
@ -1664,7 +1664,7 @@ tcl::namespace::eval punk::char {
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
return [lsearch -inline -nocase $sortedkeys $namesearch] ;#no globs
return [lsearch -inline -nocase $sortedkeys $namesearch] ;#no globs
}
}
proc charsets {{namesearch *}} {
@ -1738,7 +1738,7 @@ tcl::namespace::eval punk::char {
set opt_ansi [tcl::dict::get $opts -ansi]
set opt_lined [tcl::dict::get $opts -lined]
# -- --- --- ---
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
if {$opt_ansi} {
set a1 [a BLACK white bold]
@ -1768,7 +1768,7 @@ tcl::namespace::eval punk::char {
}
set i 1
append out \n $prefix $charsetname
append out \n
append out \n
set marker_line $prefix
set line $prefix
@ -1847,7 +1847,7 @@ tcl::namespace::eval punk::char {
} else {
set charset_dict [charset_dictget $charsetname]
}
set col_items_short [list]
set col_items_desc [list]
tcl::dict::for {k inf} $charset_dict {
@ -1873,7 +1873,7 @@ tcl::namespace::eval punk::char {
return $out
}
#use console cursor movements to test and cache the column-width of each char in the set of characters returned by the search criteria
#use console cursor movements to test and cache the column-width of each char in the set of characters returned by the search criteria
proc charset_calibrate {namesearch args} {
variable charsets
variable charinfo
@ -1909,7 +1909,7 @@ tcl::namespace::eval punk::char {
set twidth [tcl::dict::get $charinfo $dec testwidth]
}
if {$twidth eq ""} {
#puts -nonewline stdout "." ;#this
#puts -nonewline stdout "." ;#this
set width [char_info_testwidth $ch] ;#based on console test rather than unicode props
tcl::dict::set charinfo $dec testwidth $width
} else {
@ -1925,10 +1925,10 @@ tcl::namespace::eval punk::char {
#maint warning - also in overtype!
#intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines
#cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy)
#review - effective memory leak on longrunning programs if never cleared
#tradeoff in fragmenting cache and reducing efficiency vs ability to clear in a scoped manner
#tradeoff in fragmenting cache and reducing efficiency vs ability to clear in a scoped manner
proc grapheme_width_cached {ch {key ""}} {
variable grapheme_widths
#if key eq "*" - we won't be able to clear that cache individually. Perhaps that's ok
@ -1975,7 +1975,7 @@ tcl::namespace::eval punk::char {
# - compare with wcswidth returning -1 for entire string containing such in python,perl
proc wcswidth {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#Tcl initial evaluation stack size is 2000 (? review)
#REVIEW - when we cater for grapheme clusters - we can't just split the string at arbitrary points like this!!
set chunksize 2000
@ -1984,14 +1984,14 @@ tcl::namespace::eval punk::char {
set startidx 0
set endidx [expr {$startidx + $chunksize -1}]
for {set i 0} {$i < $chunks_required} {incr i} {
set chunk [tcl::string::range $string $startidx $endidx]
set chunk [tcl::string::range $string $startidx $endidx]
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach c $codes {
if {$c <= 255 && !($c < 31 || $c == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
#todo - compare with python or other lang wcwidth
incr width
} elseif {$c < 917504 || $c > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $c]
@ -2023,7 +2023,7 @@ tcl::namespace::eval punk::char {
set graphemes [list]
while {$i < [tcl::string::length $string]} {
set aftercluster [tk::endOfCluster $string $i]
lappend graphemes [string range $string $i $aftercluster-1]
lappend graphemes [string range $string $i $aftercluster-1]
set i $aftercluster
}
return $graphemes
@ -2052,15 +2052,15 @@ tcl::namespace::eval punk::char {
}
}
incr width $gw
#if {[string first \u200d $g] >=0} {
# incr width 2
# incr width 2
#} else {
# #other joiners???
# incr width [wcswidth_unclustered $g]
#}
} else {
incr width [wcswidth_unclustered $g]
incr width [wcswidth_unclustered $g]
}
set i $aftercluster
}
@ -2071,8 +2071,8 @@ tcl::namespace::eval punk::char {
scan $char %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
return 1
#todo - compare with python or other lang wcwidth
return 1
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
return [textutil::wcswidth_char $dec] ;#note textutil::wcswidth_char takes a decimal codepoint!
@ -2086,8 +2086,8 @@ tcl::namespace::eval punk::char {
scan $c %c dec
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
#todo - compare with python or other lang wcwidth
incr width
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec] ;#takes decimal codepoint
@ -2105,7 +2105,7 @@ tcl::namespace::eval punk::char {
# - compare with wcswidth returning -1 for entire string containing such in python,perl
proc wcswidth_unclustered {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#..but - 'scan' is horrible for 400K+ (Tcl evaluation stack has to be reallocated/copied?)
#Tcl initial evaluation stack size is 2000 (? review)
#we can only split the string at arbitrary points like this because we are specifically dealing with input that has no clusters!.
set chunksize 2000
@ -2114,14 +2114,14 @@ tcl::namespace::eval punk::char {
set startidx 0
set endidx [expr {$startidx + $chunksize -1}]
for {set i 0} {$i < $chunks_required} {incr i} {
set chunk [tcl::string::range $string $startidx $endidx]
set chunk [tcl::string::range $string $startidx $endidx]
set codes [scan $chunk [tcl::string::repeat %c [tcl::string::length $chunk]]]
foreach dec $codes {
if {$dec <= 255 && !($dec < 31 || $dec == 127)} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
incr width
#todo - compare with python or other lang wcwidth
incr width
} elseif {$dec < 917504 || $dec > 917631} {
#TODO - various other joiners and non-printing chars
set w [textutil::wcswidth_char $dec]
@ -2141,8 +2141,8 @@ tcl::namespace::eval punk::char {
proc wcswidth0 {string} {
#faster than textutil::wcswidth (at least for string up to a few K in length)
#..but - 'scan' is horrible for 400K+
#TODO
#..but - 'scan' is horrible for 400K+
#TODO
set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]]
set width 0
foreach dec $codes {
@ -2150,9 +2150,9 @@ tcl::namespace::eval punk::char {
if {$dec < 917504 || $dec > 917631} {
if {$dec <= 255} {
#review - non-printing ascii? why does textutil::wcswidth report 1 ??
#todo - compare with python or other lang wcwidth
#todo - compare with python or other lang wcwidth
if {!($dec < 31 || $dec == 127)} {
incr width
incr width
}
} else {
#TODO - various other joiners and non-printing chars
@ -2179,11 +2179,11 @@ tcl::namespace::eval punk::char {
#prerequisites - no ansi escapes - no newlines - utf8 encoding assumed
#review - what about \r \t \b ?
#NO processing of \b - already handled in ansi::printing_length which then calls this
#this version breaks string into sequences of ascii vs unicode
#this version breaks string into sequences of ascii vs unicode
proc ansifreestring_width {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
#we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2204,11 +2204,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} {
# set text " $text"
# set text " $text"
#}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
# -- --- --- --- --- --- ---
@ -2221,10 +2221,10 @@ tcl::namespace::eval punk::char {
#for now - strip them out
#ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u200b zero width space
#ZWSP \u200b zero width space
#\uFFEFBOM/ ZWNBSP and others that should be zero width
#todo - work out proper way to mark/group zero width.
#todo - work out proper way to mark/group zero width.
#set text [tcl::string::map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text]
set text [tcl::string::map [list \u200b "" \u200c "" \u200d ""] $text]
@ -2241,7 +2241,7 @@ tcl::namespace::eval punk::char {
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
#some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all
#we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here
#we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here
#they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function)
set text [regsub -all {[\u0080-\u009f]+} $text ""]
@ -2262,7 +2262,7 @@ tcl::namespace::eval punk::char {
#set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters
#set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0100-\U10FFFF\])+" $text]
@ -2291,7 +2291,7 @@ tcl::namespace::eval punk::char {
#we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2312,11 +2312,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} {
# set text " $text"
# set text " $text"
#}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
@ -2325,7 +2325,7 @@ tcl::namespace::eval punk::char {
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
@ -2343,10 +2343,10 @@ tcl::namespace::eval punk::char {
}
#review - wcswidth should detect these
set re_ascii_fullwidth {[\uFF01-\uFF5e]}
set re_ascii_fullwidth {[\uFF01-\uFF5e]}
set doublewidth_char_count 0
set zerowidth_char_count 0
set zerowidth_char_count 0
#split just to get the standalone character widths - and then scan for other combiners (?)
#review
#set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
@ -2354,7 +2354,7 @@ tcl::namespace::eval punk::char {
#fixed in bug-4ed788c618 2023-11
#set uc_sequences [regexp -all -inline -indices {[\u0100-\U10FFFF]} $text]
#set uc_sequences [regexp -all -inline -indices "\[\u0100-\U10FFFF\]" $text] ;#e.g for string: \U0001f9d1abc\U001f525ab returns {0 0} {4 4}
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count
@ -2364,12 +2364,12 @@ tcl::namespace::eval punk::char {
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop
#further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results
#- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c]
} else {
@ -2378,7 +2378,7 @@ tcl::namespace::eval punk::char {
set width [textutil::wcswidth_char [scan $c %c]]
}
if {$width == 0} {
incr zerowidth_char_count
incr zerowidth_char_count
} elseif {$width == 2} {
incr doublewidth_char_count
}
@ -2395,7 +2395,7 @@ tcl::namespace::eval punk::char {
#we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#if {[tcl::string::first \033 $text] >= 0} {
# error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first"
@ -2416,11 +2416,11 @@ tcl::namespace::eval punk::char {
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)}
#if {[regexp $re_leading_diacritic $text]} {
# set text " $text"
# set text " $text"
#}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
@ -2437,7 +2437,7 @@ tcl::namespace::eval punk::char {
return [tcl::string::length $text]
}
#slow when ascii mixed with unicode (but why?)
#slow when ascii mixed with unicode (but why?)
return [punk::char::wcswidth $text]
}
#This shouldn't be called on text containing ansi codes!
@ -2516,11 +2516,11 @@ tcl::namespace::eval punk::char {
return [format $fmt {*}$declist]
}
#split into plaintext and runs of combiners (combining diacritical marks - not ZWJ or ZWJNJ)
#split into plaintext and runs of combiners (combining diacritical marks - not ZWJ or ZWJNJ)
proc combiner_split {text} {
#split into odd numbered list (or zero) in a similar way to punk::ansi::ta::_perlish_split
#
#set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
#set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set graphemes [list]
if {[tcl::string::length $text] == 0} {
return {}
@ -2528,12 +2528,12 @@ tcl::namespace::eval punk::char {
set list [list]
set start 0
set strlen [tcl::string::length $text]
#make sure our regexes aren't non-greedy - or we may not have exit condition for loop
#make sure our regexes aren't non-greedy - or we may not have exit condition for loop
#review
while {$start < $strlen && [regexp -start $start -indices -- {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} $text match]} {
lassign $match matchStart matchEnd
#puts "->start $start ->match $matchStart $matchEnd"
lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd]
lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd]
set start [expr {$matchEnd+1}]
}
lappend list [tcl::string::range $text $start end]
@ -2547,7 +2547,7 @@ tcl::namespace::eval punk::char {
#This is difficult in Tcl without unicode property based Character Classes in the regex engine
#review - this needs to be performant - it is used a lot by punk terminal/ansi features
#todo - trie data structures for unicode?
#for now we can at least combine diacritics
#for now we can at least combine diacritics
#should also handle the ZWJ (and the variation selectors? eg \uFE0F) character which should account for emoji clusters
#Note - emoji cluster could be for example 11 code points/41 bytes (family emoji with skin tone modifiers for each member, 3 ZWJs)
#This still leaves a whole class of clusters.. korean etc unhandled :/
@ -2560,7 +2560,7 @@ tcl::namespace::eval punk::char {
set clist [split $pt ""]
lappend graphemes {*}[lrange $clist 0 end-1]
lappend graphemes [tcl::string::cat [lindex $clist end] $combiners]
}
}
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[split [lindex $csplits end] ""]
@ -2575,7 +2575,7 @@ tcl::namespace::eval punk::char {
set combiner_decs [scan $combiners [tcl::string::repeat %c [tcl::string::length $combiners]]]
lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs]
lappend graphemes {*}$pt_decs
}
}
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[scan [lindex $csplits end] [tcl::string::repeat %c [tcl::string::length [lindex $csplits end]]]]
@ -2592,7 +2592,7 @@ tcl::namespace::eval punk::char {
lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs]
}
lappend graphemes {*}$pt_decs
}
}
return $graphemes
}
proc grapheme_split2 {text} {
@ -2601,7 +2601,7 @@ tcl::namespace::eval punk::char {
foreach {pt combiners} [lrange $csplits 0 end-1] {
set clist [split $pt ""]
lappend graphemes {*}[lrange $clist 0 end-1] [tcl::string::cat [lindex $clist end] $combiners]
}
}
#last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme
if {[lindex $csplits end] ne ""} {
lappend graphemes {*}[split [lindex $csplits end] ""]
@ -2645,10 +2645,10 @@ tcl::namespace::eval punk::char {
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
## Ready
package provide punk::char [tcl::namespace::eval punk::char {
variable version
set version 999999.0a1.0
set version 999999.0a1.0
}]
return

2
src/modules/punk/console-999999.0a1.0.tm

@ -777,7 +777,7 @@ namespace eval punk::console {
set extension [lindex [split $waitvar($callid) -] 1]
if {$extension eq ""} {
puts "blank extension $waitvar($callid)"
puts "->[set $waitvar($callid]<-"
puts "->[set $waitvar($callid)]<-"
}
puts stderr "get_ansi_response_payload Extending timeout by $extension"
after cancel $timeoutid($callid)

51
src/modules/punk/icomm-999999.0a1.0.tm

@ -21,7 +21,7 @@
#[manpage_begin shellspy_module_punk::icomm 0 999999.0a1.0]
#[copyright "2025"]
#[titledesc {Module API}] [comment {-- Name section and table of contents description --}]
#[moddesc {-}] [comment {-- Description at end of page heading --}]
#[moddesc {-}] [comment {-- Description at end of page heading --}]
#[require punk::icomm]
#[keywords module]
#[description]
@ -107,7 +107,7 @@ package require punk::args
#
# Note that the actual code was changed in several places (Reordered,
# eval speedup)
#
#
# comm works just like Tk's send, except that it uses sockets.
# These commands work just like "send" and "winfo interps":
#
@ -116,7 +116,7 @@ package require punk::args
#
# See the manual page comm.n for further details on this package.
package require Tcl 8.6-
package require Tcl 8.6-
package require snit ; # comm::future objects.
namespace eval ::punk::icomm {
@ -196,7 +196,7 @@ namespace eval ::punk::icomm {
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[subsection {Namespace punk::icomm}]
#[para] Core API functions for punk::icomm
#[para] Core API functions for punk::icomm
#[list_begin definitions]
variable PUNKARGS
@ -306,7 +306,7 @@ namespace eval ::punk::icomm {
## API: Setup async result generation for a remotely invoked command.
# (future,fid,<fid>) -> list (future)
# (current,async) -> bool (default 0)
# (current,async) -> bool (default 0)
# (current,state) -> list (chan fid cmd ser)
proc comm_cmd_return_async {chan} {
@ -711,7 +711,6 @@ namespace eval ::punk::icomm {
#
# Results:
# None.
proc commConfigure {chan {force 0} args} {
variable comm
@ -891,7 +890,7 @@ namespace eval ::punk::icomm {
#treat as always connected - call commIncoming imediately.
punk::icomm::commIncoming $chan $comm($chan,tclchan) "localaddr" "localtclchan"
return
}
}
#-------------------------
@ -1133,7 +1132,7 @@ namespace eval ::punk::icomm {
}
if {![info exists vers]} {
close $fid
if {[info exists comm($chan,silent)] &&
if {[info exists comm($chan,silent)] &&
[string is true -strict $comm($chan,silent)]} {
return
}
@ -1382,7 +1381,7 @@ namespace eval ::punk::icomm {
# Unpack the indices, then extract the word.
#foreach {s e step} $cmdrange break
lassign $cmdrange s e step
set cmd [string range $data $s $e]
commDebug {puts stderr "<$chan> cmd <$data>"}
if {[string equal "" $cmd]} break
@ -1849,7 +1848,7 @@ namespace eval ::punk::icomm {
# backslash-quoted braces we look for double-backslashes
# as well and skip them. Without this a string like '{puts
# \\}' will incorrectly find a \} at the end, missing the
# end of the word.
# end of the word.
set re {((\\\\)|([{}])|(\\[{}]))} ;#split out for dumb editor to fix highlighting
# ^^ ^ ^
# |\\ regular \quoted
@ -2018,14 +2017,14 @@ tcl::namespace::eval punk::icomm::lib {
tcl::namespace::path [tcl::namespace::parent]
#*** !doctools
#[subsection {Namespace punk::icomm::lib}]
#[para] Secondary functions that are part of the API
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
# #[para]Description of utility1
# return 1
#}
@ -2043,16 +2042,16 @@ tcl::namespace::eval punk::icomm::lib {
#tcl::namespace::eval punk::icomm::system {
#*** !doctools
#[subsection {Namespace punk::icomm::system}]
#[para] Internal functions that are not part of the API
#[para] Internal functions that are not part of the API
#}
# == === === === === === === === === === === === === === ===
# == === === === === === === === === === === === === === ===
# Sample 'about' function with punk::args documentation
# == === === === === === === === === === === === === === ===
# == === === === === === === === === === === === === === ===
tcl::namespace::eval punk::icomm {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS
@ -2061,7 +2060,7 @@ tcl::namespace::eval punk::icomm {
lappend PUNKARGS [list {
@id -id "(package)punk::icomm"
@package -name "punk::icomm" -help\
"taken from tcllib comm package
"taken from tcllib comm package
todo - describe changes"
}]
@ -2076,7 +2075,7 @@ tcl::namespace::eval punk::icomm {
set about_topics [list]
foreach f $topic_funs {
set tail [namespace tail $f]
lappend about_topics [string range $tail [string length get_topic_] end]
lappend about_topics [string range $tail [string length get_topic_] end]
}
#Adjust this function or 'default_topics' if a different order is required
return [lsort $about_topics]
@ -2084,11 +2083,11 @@ tcl::namespace::eval punk::icomm {
proc default_topics {} {return [list Description *]}
# -------------------------------------------------------------
# get_topic_ functions add more to auto-include in about topics
# get_topic_ functions add more to auto-include in about topics
# -------------------------------------------------------------
proc get_topic_Description {} {
punk::args::lib::tstr [string trim {
package punk::icomm
punk::args::lib::tstr [string trim {
package punk::icomm
description to come..
} \n]
}
@ -2122,9 +2121,9 @@ tcl::namespace::eval punk::icomm {
# we re-use the argument definition from punk::args::standard_about and override some items
set overrides [dict create]
dict set overrides @id -id "::punk::icomm::about"
dict set overrides @cmd -name "punk::icomm::about"
dict set overrides @cmd -name "punk::icomm::about"
dict set overrides @cmd -help [string trim [punk::args::lib::tstr {
About punk::icomm
About punk::icomm
}] \n]
dict set overrides topic -choices [list {*}[punk::icomm::argdoc::about_topics] *]
dict set overrides topic -choicerestricted 1
@ -2140,7 +2139,7 @@ tcl::namespace::eval punk::icomm {
}
}
# end of sample 'about' function
# == === === === === === === === === === === === === === ===
# == === === === === === === === === === === === === === ===
# -----------------------------------------------------------------------------
@ -2155,11 +2154,11 @@ namespace eval ::punk::args::register {
# -----------------------------------------------------------------------------
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
## Ready
package provide punk::icomm [tcl::namespace::eval punk::icomm {
variable pkg punk::icomm
variable version
set version 999999.0a1.0
set version 999999.0a1.0
}]
return

3
src/modules/punk/jtest.tcl

@ -41,4 +41,5 @@
defaultSilent 0
}
#test
set x blah
set x blah

407
src/modules/punk/repl-999999.0a1.0.tm

File diff suppressed because it is too large Load Diff

34
tclint.toml

@ -0,0 +1,34 @@
# patterns to exclude when searching directories. defaults to empty list.
# follows gitignore pattern format: https://git-scm.com/docs/gitignore#_pattern_format
# the one exception is that a leading "#" character will be automatically escaped
#exclude = ["ignore_me/", "ignore*.tcl", "/ignore_from_here"]
# lint violations to ignore. defaults to empty list.
# can also supply an inline table with a path and a list of violations to ignore under that path.
#ignore = [
# "unbraced-expr",
# { path = "files_with_long_lines/", rules = ["line-length"] }
#]
# extensions of files to lint when searching directories. defaults to tcl, sdc,
# xdc, and upf.
extensions = ["tcl", "tm", "sdc"]
# path to command spec defining tool-specific commands and arguments, generated by
# `tclint-plugins make-spec`.
#commands = "~/.tclint/openroad.json"
# with the exception of line-length, the [style] settings affect tclfmt rather than tclint.
[style]
# number of spaces to indent. can also be set to "tab". defaults to 4.
#indent = 2
# maximum allowed line length. defaults to 100.
line-length = 400
# maximum allowed number of consecutive blank lines. defaults to 2.
max-blank-lines = 10
# whether to require indenting of "namespace eval" blocks. defaults to true.
#indent-namespace-eval = false
# whether to expect a single space (true) or no spaces (false) surrounding the contents of a braced expression or script argument.
# defaults to false.
#spaces-in-braces = true
Loading…
Cancel
Save