Browse Source

tomlish support for 1.1.0 decoding and basic encoding

master
Julian Noble 7 days ago
parent
commit
6ffa781fad
  1. 2
      punkproject.toml
  2. 4774
      src/bootsupport/modules/overtype-1.6.6.tm
  3. BIN
      src/bootsupport/modules/test/tomlish-1.1.5.tm
  4. 8408
      src/bootsupport/modules/tomlish-1.1.6.tm
  5. 122
      src/modules/punk/cesu-999999.0a1.0.tm
  6. 2
      src/modules/punk/cesu-buildversion.txt
  7. 4774
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.6.tm
  8. BIN
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm
  9. 8408
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.6.tm
  10. 4774
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.6.tm
  11. BIN
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm
  12. 8408
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.6.tm
  13. BIN
      src/vendormodules/test/tomlish-1.1.5.tm
  14. 10327
      src/vendormodules/tomlish-1.1.6.tm
  15. 4774
      src/vfs/_vfscommon.vfs/modules/overtype-1.6.6.tm
  16. 126
      src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.1.tm
  17. BIN
      src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm
  18. BIN
      src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm.x
  19. 42
      src/vfs/_vfscommon.vfs/modules/tomlish-1.1.5.tm
  20. 8408
      src/vfs/_vfscommon.vfs/modules/tomlish-1.1.6.tm

2
punkproject.toml

@ -0,0 +1,2 @@
[project]
name = "punkshell"

4774
src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

122
src/modules/punk/cesu-999999.0a1.0.tm

@ -22,11 +22,11 @@
#[manpage_begin punkshell_module_punk::cesu 0 999999.0a1.0] #[manpage_begin punkshell_module_punk::cesu 0 999999.0a1.0]
#[copyright "2024"] #[copyright "2024"]
#[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}] #[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}]
#[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}] #[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}]
#[require punk::cesu] #[require punk::cesu]
#[keywords module cesu encoding compatibility experimental unofficial] #[keywords module cesu encoding compatibility experimental unofficial]
#[description] #[description]
#[para] experimental #[para] experimental
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
@ -34,8 +34,8 @@
#[section Overview] #[section Overview]
#[para] overview of punk::cesu #[para] overview of punk::cesu
#[subsection Concepts] #[subsection Concepts]
#[para] cesu-8 may be mistaken for utf-8 if no supplementary chars present. #[para] cesu-8 may be mistaken for utf-8 if no supplementary chars present.
#[para] see: https://www.unicode.org/reports/tr26/tr26-4.html #[para] see: https://www.unicode.org/reports/tr26/tr26-4.html
#[para] Particulary note discouragement of use especially in external interchange. #[para] Particulary note discouragement of use especially in external interchange.
@ -52,9 +52,6 @@ package require Tcl 8.6-
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools #*** !doctools
#[list_end] #[list_end]
@ -70,11 +67,11 @@ package require Tcl 8.6-
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::cesu { tcl::namespace::eval punk::cesu {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS variable PUNKARGS
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu}] #[subsection {Namespace punk::cesu}]
#[para] Core API functions for punk::cesu #[para] Core API functions for punk::cesu
#[list_begin definitions] #[list_begin definitions]
@ -127,7 +124,7 @@ tcl::namespace::eval punk::cesu {
binary scan $1 c 1 binary scan $1 c 1
binary scan $2 c 2 binary scan $2 c 2
binary scan $3 c 3 binary scan $3 c 3
puts [list $1 $2 $3] #puts [list $1 $2 $3]
#binary scan $4 c 4 #binary scan $4 c 4
incr 1 ;#// Effectively adds 0x10000 to the codepoint ? incr 1 ;#// Effectively adds 0x10000 to the codepoint ?
@ -155,7 +152,7 @@ tcl::namespace::eval punk::cesu {
[expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \ [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
[expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \ [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
$4] $4]
} else { } else {
puts "Invalid sequence: $char" puts "Invalid sequence: $char"
return $char return $char
@ -177,26 +174,78 @@ tcl::namespace::eval punk::cesu {
#e.g from_surrogatestring "note \ud83f\udd1e etc" #e.g from_surrogatestring "note \ud83f\udd1e etc"
#e.g from_surrogatestring "faces \ud83d\ude10 \ud83d\ude21 \ud83d\ude31" #e.g from_surrogatestring "faces \ud83d\ude10 \ud83d\ude21 \ud83d\ude31"
#note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley #note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley
# but from_surrogatestring \U1f400 returns a mouse. # but from_surrogatestring \U1f400 returns a mouse.
# Tcl bug - fixed some time in 9.x # Tcl bug - fixed some time in 9.x
# surrogated_string shouldn't include non BMP chars anyway (G.I.G.O?) # surrogated_string theoretically shouldn't include non BMP chars anyway (but may in some contexts? mixed surrogate escapes and raw nonbmp?)
lappend PUNKARGS [list { lappend PUNKARGS [list {
@id -id ::punk::cesu::from_surrogatestring @id -id ::punk::cesu::from_surrogatestring
@cmd -name punk::cesu::from_surrogatestring -help\ @cmd -name punk::cesu::from_surrogatestring -help\
"Convert a string containing surrogate pairs "Convert a string containing surrogate pairs
to string with pairs converted to unicode non-BMP to Tcl string with pairs converted to unicode non-BMP
characters" characters"
@values @values
surrogated_string -help\ surrogated_string -help\
"May contain a mix of surrogate pairs and other "May contain a mix of surrogate pairs and other
characters - only the surrogate pairs will be converted." characters - only the surrogate pairs will be converted."
}] }]
proc from_surrogatestring {surrogated_string} {
proc from_surrogatestring {str} {
#high surrogate character rep followed by low surrogate character rep
if {[regexp {[\uD800-\uDBFF][\uDC00-\uDFFF]} $str]} {
set str [string map {\[ \\\[ \] \\\]} $str] ;#Make sure any existing commandlike structures aren't executed
return [subst -novariables -nobackslashes [regsub -all {([\uD800-\uDBFF])([\uDC00-\uDFFF])} $str {[surrogatepair_to_codepoint \1 \2]} ]]
} else {
return $str
}
}
proc surrogatepair_to_codepoint {highchar lowchar} {
if {[string length $highchar] != 1 || [string length $lowchar] !=1} {
error "surrogatepair_to_codepoint expected surrogate pair encoded as 2 characters"
}
#NOTE in tcl8 - we get oddity that 'split <surrogatepair> ""' returns a list of length 1 even though there are 2 chars
#fixed in tcl9
#lassign [split $2_surrogate_chars ""] highSurrogateChar lowSurrogateChar
scan $highchar %c highDecimal
scan $lowchar %c lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
#puts "->codepointDecimal $codepointDecimal"
#In tcl8 - we will get \uFFFD for non BMP codepoints - todo ?
return [format %c $codepointDecimal]
}
#e.g {\ud83d\ude00}
proc escaped_surrogatepair_to_codepoint {spair} {
set spair [string map {" " ""} $spair]
if {[string length $spair] != 12} {
error "escaped_surrogatepair_to_codepoint expected input of form \\uXXXX\\uXXXX"
}
set normalised [regsub -all {\\+u} $spair ""]
set highSurrogate [string range $normalised 0 3]
set lowSurrogate [string range $normalised 4 end]
scan $highSurrogate %x highDecimal
scan $lowSurrogate %x lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
return [format %c $codepointDecimal]
}
proc from_surrogatestring_via_cesu {surrogated_string} {
#we can do this without cesu (from_surrogatestring)
set cesu [encoding convertto cesu-8 $surrogated_string] set cesu [encoding convertto cesu-8 $surrogated_string]
set x [cesu2utf $cesu] set x [cesu2utf $cesu]
encoding convertfrom utf-8 $x encoding convertfrom utf-8 $x
} }
proc _to_test {emoji} { proc _to_test {emoji} {
puts stderr "_to_test incomplete" puts stderr "_to_test incomplete"
set cesu [encoding convertto cesu-8 $e] set cesu [encoding convertto cesu-8 $e]
@ -209,7 +258,7 @@ tcl::namespace::eval punk::cesu {
-format -default escape -choices {raw escape} -choicelabels { -format -default escape -choices {raw escape} -choicelabels {
raw\ raw\
" emit raw surrogate pairs " emit raw surrogate pairs
may not be writable to may not be writable to
output channels" output channels"
escape\ escape\
" emit unprocessed backslash hex " emit unprocessed backslash hex
@ -224,7 +273,7 @@ tcl::namespace::eval punk::cesu {
e.g e.g
>to_surrogatestring -format escape \"mouse: \\U1f400\" >to_surrogatestring -format escape \"mouse: \\U1f400\"
mouse: \\uD83D\\uDC00 mouse: \\uD83D\\uDC00
" "
}] }]
proc to_surrogatestring {args} { proc to_surrogatestring {args} {
set argd [punk::args::parse $args withid ::punk::cesu::to_surrogatestring] set argd [punk::args::parse $args withid ::punk::cesu::to_surrogatestring]
@ -273,14 +322,14 @@ tcl::namespace::eval punk::cesu {
#set lsbinfo [punk::char::char_info_dec $lsbfinal -fields all -except testwidth] ;#don't use all/testwidth will try to emit the char and fail/show error #set lsbinfo [punk::char::char_info_dec $lsbfinal -fields all -except testwidth] ;#don't use all/testwidth will try to emit the char and fail/show error
set esc "\\u$msbhex\\u$lsbhex" set esc "\\u$msbhex\\u$lsbhex"
set raw [format %c $msbfinal][format %c $lsbfinal] set raw [format %c $msbfinal][format %c $lsbfinal]
return [dict create escapes $esc msbdec $msbfinal msbhex $msbhex lsbdec $lsbfinal lsbhex $lsbhex raw $raw] return [dict create escapes $esc msbdec $msbfinal msbhex $msbhex lsbdec $lsbfinal lsbhex $lsbhex raw $raw]
} }
# #
#test_enc_equivalency \U1f400 \U1f600 #test_enc_equivalency \U1f400 \U1f600
proc test_enc_equivalency {c1 c2} { proc test_enc_equivalency {c1 c2} {
package require punk::ansi package require punk::ansi
namespace import ::punk::ansi::a+ ::punk::ansi::a namespace import ::punk::ansi::a+ ::punk::ansi::a
foreach enc [lsort [encoding names]] { foreach enc [lsort [encoding names]] {
puts stdout "testing $enc" puts stdout "testing $enc"
if {$enc in "iso2022 iso2022-jp iso2022-kr"} { if {$enc in "iso2022 iso2022-jp iso2022-kr"} {
@ -315,14 +364,14 @@ tcl::namespace::eval punk::cesu::lib {
tcl::namespace::path [tcl::namespace::parent] tcl::namespace::path [tcl::namespace::parent]
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu::lib}] #[subsection {Namespace punk::cesu::lib}]
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} { #proc utility1 {p1 args} {
# #*** !doctools # #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1 # #[para]Description of utility1
# return 1 # return 1
#} #}
@ -340,15 +389,15 @@ tcl::namespace::eval punk::cesu::lib {
#tcl::namespace::eval punk::cesu::system { #tcl::namespace::eval punk::cesu::system {
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu::system}] #[subsection {Namespace punk::cesu::system}]
#[para] Internal functions that are not part of the API #[para] Internal functions that are not part of the API
#} #}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# Sample 'about' function with punk::args documentation # Sample 'about' function with punk::args documentation
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
tcl::namespace::eval punk::cesu { tcl::namespace::eval punk::cesu {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS variable PUNKARGS
@ -371,7 +420,7 @@ tcl::namespace::eval punk::cesu {
set about_topics [list] set about_topics [list]
foreach f $topic_funs { foreach f $topic_funs {
set tail [namespace tail $f] set tail [namespace tail $f]
lappend about_topics [string range $tail [string length get_topic_] end] lappend about_topics [string range $tail [string length get_topic_] end]
} }
#Adjust this function or 'default_topics' if a different order is required #Adjust this function or 'default_topics' if a different order is required
return [lsort $about_topics] return [lsort $about_topics]
@ -379,12 +428,12 @@ tcl::namespace::eval punk::cesu {
proc default_topics {} {return [list Description *]} proc default_topics {} {return [list Description *]}
# ------------------------------------------------------------- # -------------------------------------------------------------
# get_topic_ functions add more to auto-include in about topics # get_topic_ functions add more to auto-include in about topics
# ------------------------------------------------------------- # -------------------------------------------------------------
proc get_topic_Description {} { proc get_topic_Description {} {
punk::args::lib::tstr [string trim { punk::args::lib::tstr [string trim {
package punk::cesu package punk::cesu
description to come.. cesu and surrogate-pair processing
} \n] } \n]
} }
proc get_topic_License {} { proc get_topic_License {} {
@ -406,7 +455,8 @@ tcl::namespace::eval punk::cesu {
} }
proc get_topic_custom-topic {} { proc get_topic_custom-topic {} {
punk::args::lib::tstr -return string { punk::args::lib::tstr -return string {
nothing to see here This library can be used for surrogate-pair handling.
cesu utilities are used internally in from_surrogatestring
} }
} }
# ------------------------------------------------------------- # -------------------------------------------------------------
@ -415,9 +465,9 @@ tcl::namespace::eval punk::cesu {
# we re-use the argument definition from punk::args::standard_about and override some items # we re-use the argument definition from punk::args::standard_about and override some items
set overrides [dict create] set overrides [dict create]
dict set overrides @id -id "::punk::cesu::about" dict set overrides @id -id "::punk::cesu::about"
dict set overrides @cmd -name "punk::cesu::about" dict set overrides @cmd -name "punk::cesu::about"
dict set overrides @cmd -help [string trim [punk::args::lib::tstr { dict set overrides @cmd -help [string trim [punk::args::lib::tstr {
About punk::cesu About punk::cesu
}] \n] }] \n]
dict set overrides topic -choices [list {*}[punk::cesu::argdoc::about_topics] *] dict set overrides topic -choices [list {*}[punk::cesu::argdoc::about_topics] *]
dict set overrides topic -choicerestricted 1 dict set overrides topic -choicerestricted 1
@ -433,7 +483,7 @@ tcl::namespace::eval punk::cesu {
} }
} }
# end of sample 'about' function # end of sample 'about' function
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -446,11 +496,11 @@ namespace eval ::punk::args::register {
lappend ::punk::args::register::NAMESPACES ::punk::cesu lappend ::punk::args::register::NAMESPACES ::punk::cesu
} }
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
## Ready ## Ready
package provide punk::cesu [tcl::namespace::eval punk::cesu { package provide punk::cesu [tcl::namespace::eval punk::cesu {
variable pkg punk::cesu variable pkg punk::cesu
variable version variable version
set version 999999.0a1.0 set version 999999.0a1.0
}] }]
return return

2
src/modules/punk/cesu-buildversion.txt

@ -1,3 +1,3 @@
0.1.0 0.1.1
#First line must be a semantic version number #First line must be a semantic version number
#all other lines are ignored. #all other lines are ignored.

4774
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

4774
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/vendormodules/test/tomlish-1.1.5.tm

Binary file not shown.

10327
src/vendormodules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

4774
src/vfs/_vfscommon.vfs/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

126
src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.0.tm → src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.1.tm

@ -9,7 +9,7 @@
# http://paste.tclers.tk/5977 # http://paste.tclers.tk/5977
# #
# @@ Meta Begin # @@ Meta Begin
# Application punk::cesu 0.1.0 # Application punk::cesu 0.1.1
# Meta platform tcl # Meta platform tcl
# Meta license <unknown> # Meta license <unknown>
# @@ Meta End # @@ Meta End
@ -19,14 +19,14 @@
# doctools header # doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools #*** !doctools
#[manpage_begin punkshell_module_punk::cesu 0 0.1.0] #[manpage_begin punkshell_module_punk::cesu 0 0.1.1]
#[copyright "2024"] #[copyright "2024"]
#[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}] #[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}]
#[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}] #[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}]
#[require punk::cesu] #[require punk::cesu]
#[keywords module cesu encoding compatibility experimental unofficial] #[keywords module cesu encoding compatibility experimental unofficial]
#[description] #[description]
#[para] experimental #[para] experimental
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
@ -34,8 +34,8 @@
#[section Overview] #[section Overview]
#[para] overview of punk::cesu #[para] overview of punk::cesu
#[subsection Concepts] #[subsection Concepts]
#[para] cesu-8 may be mistaken for utf-8 if no supplementary chars present. #[para] cesu-8 may be mistaken for utf-8 if no supplementary chars present.
#[para] see: https://www.unicode.org/reports/tr26/tr26-4.html #[para] see: https://www.unicode.org/reports/tr26/tr26-4.html
#[para] Particulary note discouragement of use especially in external interchange. #[para] Particulary note discouragement of use especially in external interchange.
@ -52,9 +52,6 @@ package require Tcl 8.6-
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools #*** !doctools
#[list_end] #[list_end]
@ -70,11 +67,11 @@ package require Tcl 8.6-
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::cesu { tcl::namespace::eval punk::cesu {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS variable PUNKARGS
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu}] #[subsection {Namespace punk::cesu}]
#[para] Core API functions for punk::cesu #[para] Core API functions for punk::cesu
#[list_begin definitions] #[list_begin definitions]
@ -127,7 +124,7 @@ tcl::namespace::eval punk::cesu {
binary scan $1 c 1 binary scan $1 c 1
binary scan $2 c 2 binary scan $2 c 2
binary scan $3 c 3 binary scan $3 c 3
puts [list $1 $2 $3] #puts [list $1 $2 $3]
#binary scan $4 c 4 #binary scan $4 c 4
incr 1 ;#// Effectively adds 0x10000 to the codepoint ? incr 1 ;#// Effectively adds 0x10000 to the codepoint ?
@ -155,7 +152,7 @@ tcl::namespace::eval punk::cesu {
[expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \ [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
[expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \ [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
$4] $4]
} else { } else {
puts "Invalid sequence: $char" puts "Invalid sequence: $char"
return $char return $char
@ -177,26 +174,78 @@ tcl::namespace::eval punk::cesu {
#e.g from_surrogatestring "note \ud83f\udd1e etc" #e.g from_surrogatestring "note \ud83f\udd1e etc"
#e.g from_surrogatestring "faces \ud83d\ude10 \ud83d\ude21 \ud83d\ude31" #e.g from_surrogatestring "faces \ud83d\ude10 \ud83d\ude21 \ud83d\ude31"
#note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley #note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley
# but from_surrogatestring \U1f400 returns a mouse. # but from_surrogatestring \U1f400 returns a mouse.
# Tcl bug - fixed some time in 9.x # Tcl bug - fixed some time in 9.x
# surrogated_string shouldn't include non BMP chars anyway (G.I.G.O?) # surrogated_string theoretically shouldn't include non BMP chars anyway (but may in some contexts? mixed surrogate escapes and raw nonbmp?)
lappend PUNKARGS [list { lappend PUNKARGS [list {
@id -id ::punk::cesu::from_surrogatestring @id -id ::punk::cesu::from_surrogatestring
@cmd -name punk::cesu::from_surrogatestring -help\ @cmd -name punk::cesu::from_surrogatestring -help\
"Convert a string containing surrogate pairs "Convert a string containing surrogate pairs
to string with pairs converted to unicode non-BMP to Tcl string with pairs converted to unicode non-BMP
characters" characters"
@values @values
surrogated_string -help\ surrogated_string -help\
"May contain a mix of surrogate pairs and other "May contain a mix of surrogate pairs and other
characters - only the surrogate pairs will be converted." characters - only the surrogate pairs will be converted."
}] }]
proc from_surrogatestring {surrogated_string} {
proc from_surrogatestring {str} {
#high surrogate character rep followed by low surrogate character rep
if {[regexp {[\uD800-\uDBFF][\uDC00-\uDFFF]} $str]} {
set str [string map {\[ \\\[ \] \\\]} $str] ;#Make sure any existing commandlike structures aren't executed
return [subst -novariables -nobackslashes [regsub -all {([\uD800-\uDBFF])([\uDC00-\uDFFF])} $str {[surrogatepair_to_codepoint \1 \2]} ]]
} else {
return $str
}
}
proc surrogatepair_to_codepoint {highchar lowchar} {
if {[string length $highchar] != 1 || [string length $lowchar] !=1} {
error "surrogatepair_to_codepoint expected surrogate pair encoded as 2 characters"
}
#NOTE in tcl8 - we get oddity that 'split <surrogatepair> ""' returns a list of length 1 even though there are 2 chars
#fixed in tcl9
#lassign [split $2_surrogate_chars ""] highSurrogateChar lowSurrogateChar
scan $highchar %c highDecimal
scan $lowchar %c lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
#puts "->codepointDecimal $codepointDecimal"
#In tcl8 - we will get \uFFFD for non BMP codepoints - todo ?
return [format %c $codepointDecimal]
}
#e.g {\ud83d\ude00}
proc escaped_surrogatepair_to_codepoint {spair} {
set spair [string map {" " ""} $spair]
if {[string length $spair] != 12} {
error "escaped_surrogatepair_to_codepoint expected input of form \\uXXXX\\uXXXX"
}
set normalised [regsub -all {\\+u} $spair ""]
set highSurrogate [string range $normalised 0 3]
set lowSurrogate [string range $normalised 4 end]
scan $highSurrogate %x highDecimal
scan $lowSurrogate %x lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
return [format %c $codepointDecimal]
}
proc from_surrogatestring_via_cesu {surrogated_string} {
#we can do this without cesu (from_surrogatestring)
set cesu [encoding convertto cesu-8 $surrogated_string] set cesu [encoding convertto cesu-8 $surrogated_string]
set x [cesu2utf $cesu] set x [cesu2utf $cesu]
encoding convertfrom utf-8 $x encoding convertfrom utf-8 $x
} }
proc _to_test {emoji} { proc _to_test {emoji} {
puts stderr "_to_test incomplete" puts stderr "_to_test incomplete"
set cesu [encoding convertto cesu-8 $e] set cesu [encoding convertto cesu-8 $e]
@ -209,7 +258,7 @@ tcl::namespace::eval punk::cesu {
-format -default escape -choices {raw escape} -choicelabels { -format -default escape -choices {raw escape} -choicelabels {
raw\ raw\
" emit raw surrogate pairs " emit raw surrogate pairs
may not be writable to may not be writable to
output channels" output channels"
escape\ escape\
" emit unprocessed backslash hex " emit unprocessed backslash hex
@ -224,7 +273,7 @@ tcl::namespace::eval punk::cesu {
e.g e.g
>to_surrogatestring -format escape \"mouse: \\U1f400\" >to_surrogatestring -format escape \"mouse: \\U1f400\"
mouse: \\uD83D\\uDC00 mouse: \\uD83D\\uDC00
" "
}] }]
proc to_surrogatestring {args} { proc to_surrogatestring {args} {
set argd [punk::args::parse $args withid ::punk::cesu::to_surrogatestring] set argd [punk::args::parse $args withid ::punk::cesu::to_surrogatestring]
@ -273,14 +322,14 @@ tcl::namespace::eval punk::cesu {
#set lsbinfo [punk::char::char_info_dec $lsbfinal -fields all -except testwidth] ;#don't use all/testwidth will try to emit the char and fail/show error #set lsbinfo [punk::char::char_info_dec $lsbfinal -fields all -except testwidth] ;#don't use all/testwidth will try to emit the char and fail/show error
set esc "\\u$msbhex\\u$lsbhex" set esc "\\u$msbhex\\u$lsbhex"
set raw [format %c $msbfinal][format %c $lsbfinal] set raw [format %c $msbfinal][format %c $lsbfinal]
return [dict create escapes $esc msbdec $msbfinal msbhex $msbhex lsbdec $lsbfinal lsbhex $lsbhex raw $raw] return [dict create escapes $esc msbdec $msbfinal msbhex $msbhex lsbdec $lsbfinal lsbhex $lsbhex raw $raw]
} }
# #
#test_enc_equivalency \U1f400 \U1f600 #test_enc_equivalency \U1f400 \U1f600
proc test_enc_equivalency {c1 c2} { proc test_enc_equivalency {c1 c2} {
package require punk::ansi package require punk::ansi
namespace import ::punk::ansi::a+ ::punk::ansi::a namespace import ::punk::ansi::a+ ::punk::ansi::a
foreach enc [lsort [encoding names]] { foreach enc [lsort [encoding names]] {
puts stdout "testing $enc" puts stdout "testing $enc"
if {$enc in "iso2022 iso2022-jp iso2022-kr"} { if {$enc in "iso2022 iso2022-jp iso2022-kr"} {
@ -315,14 +364,14 @@ tcl::namespace::eval punk::cesu::lib {
tcl::namespace::path [tcl::namespace::parent] tcl::namespace::path [tcl::namespace::parent]
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu::lib}] #[subsection {Namespace punk::cesu::lib}]
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} { #proc utility1 {p1 args} {
# #*** !doctools # #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1 # #[para]Description of utility1
# return 1 # return 1
#} #}
@ -340,15 +389,15 @@ tcl::namespace::eval punk::cesu::lib {
#tcl::namespace::eval punk::cesu::system { #tcl::namespace::eval punk::cesu::system {
#*** !doctools #*** !doctools
#[subsection {Namespace punk::cesu::system}] #[subsection {Namespace punk::cesu::system}]
#[para] Internal functions that are not part of the API #[para] Internal functions that are not part of the API
#} #}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# Sample 'about' function with punk::args documentation # Sample 'about' function with punk::args documentation
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
tcl::namespace::eval punk::cesu { tcl::namespace::eval punk::cesu {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
variable PUNKARGS variable PUNKARGS
@ -371,7 +420,7 @@ tcl::namespace::eval punk::cesu {
set about_topics [list] set about_topics [list]
foreach f $topic_funs { foreach f $topic_funs {
set tail [namespace tail $f] set tail [namespace tail $f]
lappend about_topics [string range $tail [string length get_topic_] end] lappend about_topics [string range $tail [string length get_topic_] end]
} }
#Adjust this function or 'default_topics' if a different order is required #Adjust this function or 'default_topics' if a different order is required
return [lsort $about_topics] return [lsort $about_topics]
@ -379,12 +428,12 @@ tcl::namespace::eval punk::cesu {
proc default_topics {} {return [list Description *]} proc default_topics {} {return [list Description *]}
# ------------------------------------------------------------- # -------------------------------------------------------------
# get_topic_ functions add more to auto-include in about topics # get_topic_ functions add more to auto-include in about topics
# ------------------------------------------------------------- # -------------------------------------------------------------
proc get_topic_Description {} { proc get_topic_Description {} {
punk::args::lib::tstr [string trim { punk::args::lib::tstr [string trim {
package punk::cesu package punk::cesu
description to come.. cesu and surrogate-pair processing
} \n] } \n]
} }
proc get_topic_License {} { proc get_topic_License {} {
@ -406,7 +455,8 @@ tcl::namespace::eval punk::cesu {
} }
proc get_topic_custom-topic {} { proc get_topic_custom-topic {} {
punk::args::lib::tstr -return string { punk::args::lib::tstr -return string {
nothing to see here This library can be used for surrogate-pair handling.
cesu utilities are used internally in from_surrogatestring
} }
} }
# ------------------------------------------------------------- # -------------------------------------------------------------
@ -415,9 +465,9 @@ tcl::namespace::eval punk::cesu {
# we re-use the argument definition from punk::args::standard_about and override some items # we re-use the argument definition from punk::args::standard_about and override some items
set overrides [dict create] set overrides [dict create]
dict set overrides @id -id "::punk::cesu::about" dict set overrides @id -id "::punk::cesu::about"
dict set overrides @cmd -name "punk::cesu::about" dict set overrides @cmd -name "punk::cesu::about"
dict set overrides @cmd -help [string trim [punk::args::lib::tstr { dict set overrides @cmd -help [string trim [punk::args::lib::tstr {
About punk::cesu About punk::cesu
}] \n] }] \n]
dict set overrides topic -choices [list {*}[punk::cesu::argdoc::about_topics] *] dict set overrides topic -choices [list {*}[punk::cesu::argdoc::about_topics] *]
dict set overrides topic -choicerestricted 1 dict set overrides topic -choicerestricted 1
@ -433,7 +483,7 @@ tcl::namespace::eval punk::cesu {
} }
} }
# end of sample 'about' function # end of sample 'about' function
# == === === === === === === === === === === === === === === # == === === === === === === === === === === === === === ===
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -446,11 +496,11 @@ namespace eval ::punk::args::register {
lappend ::punk::args::register::NAMESPACES ::punk::cesu lappend ::punk::args::register::NAMESPACES ::punk::cesu
} }
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
## Ready ## Ready
package provide punk::cesu [tcl::namespace::eval punk::cesu { package provide punk::cesu [tcl::namespace::eval punk::cesu {
variable pkg punk::cesu variable pkg punk::cesu
variable version variable version
set version 0.1.0 set version 0.1.1
}] }]
return return

BIN
src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm

Binary file not shown.

BIN
src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm.x

Binary file not shown.

42
src/vfs/_vfscommon.vfs/modules/tomlish-1.1.5.tm

@ -531,17 +531,34 @@ namespace eval tomlish {
} else { } else {
#we have a table - but is it a tablearray? #we have a table - but is it a tablearray?
set ttype [dictn get $tablenames_info [list $norm_segments type]] set ttype [dictn get $tablenames_info [list $norm_segments type]]
#use a tabletype_unknown type for previous 'created' only tables? #we use a header_unknown type for previous 'created' only tables
if {$ttype ne "header_tablearray"} {
set msg "tablearray name $tablearrayname already appears to be already created as a table not a tablearray - invalid?" if {$ttype eq "header_unknown"} {
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] dictn set tablenames_info [list $norm_segments type] header_tablearray
#raise a specific type of error for tests to check set ttype header_tablearray
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg #assert - must not be 'defined'
#we have seen it before as a supertable ie 'created' only
#Not 'defined' but could still have subtables - treat it as a dict
set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments]
} else {
if {$ttype ne "header_tablearray"} {
#header_table or itable
switch -- $ttype {
itable {set ttypename itable}
header_table {set ttypename table}
default {error "unrecognised type - expected header_table or itable"}
}
set msg "tablearray name $tablearrayname already appears to be already created as '$ttypename' not tablearray - invalid?"
append msg \n [tomlish::dict::_show_tablenames $tablenames_info]
#raise a specific type of error for tests to check
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg
}
#EXISTING tablearray
#add to array
#error "add_to_array not implemented"
#{type ARRAY value <list>}
set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments value]
} }
#add to array
#error "add_to_array not implemented"
#{type ARRAY value <list>}
set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments value]
} }
@ -748,7 +765,7 @@ namespace eval tomlish {
set norm_segments [::tomlish::to_dict::tablename_split $tablename true] ;#true to normalize set norm_segments [::tomlish::to_dict::tablename_split $tablename true] ;#true to normalize
set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL]
if {$T_DEFINED ne "NULL"} { if {$T_DEFINED ni [list NULL header_tablearray]} {
#our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path
set msg "Table name $tablename has already been directly defined in the toml data. Invalid" set msg "Table name $tablename has already been directly defined in the toml data. Invalid"
append msg \n [tomlish::dict::_show_tablenames $tablenames_info] append msg \n [tomlish::dict::_show_tablenames $tablenames_info]
@ -779,7 +796,8 @@ namespace eval tomlish {
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg
} }
#here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here
dictn set tablenames_info [list $supertable type] header_table #we also don't know whether it's a table or a tablearray
dictn set tablenames_info [list $supertable type] header_unknown
#ensure empty tables are still represented in the datastructure #ensure empty tables are still represented in the datastructure
dict set datastructure {*}$supertable [list] dict set datastructure {*}$supertable [list]
} else { } else {

8408
src/vfs/_vfscommon.vfs/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save