Browse Source

tomlish support for 1.1.0 decoding and basic encoding

master
Julian Noble 7 days ago
parent
commit
6ffa781fad
  1. 2
      punkproject.toml
  2. 4774
      src/bootsupport/modules/overtype-1.6.6.tm
  3. BIN
      src/bootsupport/modules/test/tomlish-1.1.5.tm
  4. 8408
      src/bootsupport/modules/tomlish-1.1.6.tm
  5. 68
      src/modules/punk/cesu-999999.0a1.0.tm
  6. 2
      src/modules/punk/cesu-buildversion.txt
  7. 4774
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.6.tm
  8. BIN
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm
  9. 8408
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.6.tm
  10. 4774
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.6.tm
  11. BIN
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm
  12. 8408
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.6.tm
  13. BIN
      src/vendormodules/test/tomlish-1.1.5.tm
  14. 10431
      src/vendormodules/tomlish-1.1.6.tm
  15. 4774
      src/vfs/_vfscommon.vfs/modules/overtype-1.6.6.tm
  16. 74
      src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.1.tm
  17. BIN
      src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm
  18. BIN
      src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm.x
  19. 26
      src/vfs/_vfscommon.vfs/modules/tomlish-1.1.5.tm
  20. 8408
      src/vfs/_vfscommon.vfs/modules/tomlish-1.1.6.tm

2
punkproject.toml

@ -0,0 +1,2 @@
[project]
name = "punkshell"

4774
src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

68
src/modules/punk/cesu-999999.0a1.0.tm

@ -52,9 +52,6 @@ package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end]
@ -127,7 +124,7 @@ tcl::namespace::eval punk::cesu {
binary scan $1 c 1
binary scan $2 c 2
binary scan $3 c 3
puts [list $1 $2 $3]
#puts [list $1 $2 $3]
#binary scan $4 c 4
incr 1 ;#// Effectively adds 0x10000 to the codepoint ?
@ -180,23 +177,75 @@ tcl::namespace::eval punk::cesu {
#note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley
# but from_surrogatestring \U1f400 returns a mouse.
# Tcl bug - fixed some time in 9.x
# surrogated_string shouldn't include non BMP chars anyway (G.I.G.O?)
# surrogated_string theoretically shouldn't include non BMP chars anyway (but may in some contexts? mixed surrogate escapes and raw nonbmp?)
lappend PUNKARGS [list {
@id -id ::punk::cesu::from_surrogatestring
@cmd -name punk::cesu::from_surrogatestring -help\
"Convert a string containing surrogate pairs
to string with pairs converted to unicode non-BMP
to Tcl string with pairs converted to unicode non-BMP
characters"
@values
surrogated_string -help\
"May contain a mix of surrogate pairs and other
characters - only the surrogate pairs will be converted."
}]
proc from_surrogatestring {surrogated_string} {
proc from_surrogatestring {str} {
#high surrogate character rep followed by low surrogate character rep
if {[regexp {[\uD800-\uDBFF][\uDC00-\uDFFF]} $str]} {
set str [string map {\[ \\\[ \] \\\]} $str] ;#Make sure any existing commandlike structures aren't executed
return [subst -novariables -nobackslashes [regsub -all {([\uD800-\uDBFF])([\uDC00-\uDFFF])} $str {[surrogatepair_to_codepoint \1 \2]} ]]
} else {
return $str
}
}
proc surrogatepair_to_codepoint {highchar lowchar} {
if {[string length $highchar] != 1 || [string length $lowchar] !=1} {
error "surrogatepair_to_codepoint expected surrogate pair encoded as 2 characters"
}
#NOTE in tcl8 - we get oddity that 'split <surrogatepair> ""' returns a list of length 1 even though there are 2 chars
#fixed in tcl9
#lassign [split $2_surrogate_chars ""] highSurrogateChar lowSurrogateChar
scan $highchar %c highDecimal
scan $lowchar %c lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
#puts "->codepointDecimal $codepointDecimal"
#In tcl8 - we will get \uFFFD for non BMP codepoints - todo ?
return [format %c $codepointDecimal]
}
#e.g {\ud83d\ude00}
proc escaped_surrogatepair_to_codepoint {spair} {
set spair [string map {" " ""} $spair]
if {[string length $spair] != 12} {
error "escaped_surrogatepair_to_codepoint expected input of form \\uXXXX\\uXXXX"
}
set normalised [regsub -all {\\+u} $spair ""]
set highSurrogate [string range $normalised 0 3]
set lowSurrogate [string range $normalised 4 end]
scan $highSurrogate %x highDecimal
scan $lowSurrogate %x lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
return [format %c $codepointDecimal]
}
proc from_surrogatestring_via_cesu {surrogated_string} {
#we can do this without cesu (from_surrogatestring)
set cesu [encoding convertto cesu-8 $surrogated_string]
set x [cesu2utf $cesu]
encoding convertfrom utf-8 $x
}
proc _to_test {emoji} {
puts stderr "_to_test incomplete"
set cesu [encoding convertto cesu-8 $e]
@ -384,7 +433,7 @@ tcl::namespace::eval punk::cesu {
proc get_topic_Description {} {
punk::args::lib::tstr [string trim {
package punk::cesu
description to come..
cesu and surrogate-pair processing
} \n]
}
proc get_topic_License {} {
@ -406,7 +455,8 @@ tcl::namespace::eval punk::cesu {
}
proc get_topic_custom-topic {} {
punk::args::lib::tstr -return string {
nothing to see here
This library can be used for surrogate-pair handling.
cesu utilities are used internally in from_surrogatestring
}
}
# -------------------------------------------------------------

2
src/modules/punk/cesu-buildversion.txt

@ -1,3 +1,3 @@
0.1.0
0.1.1
#First line must be a semantic version number
#all other lines are ignored.

4774
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

4774
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.5.tm

Binary file not shown.

8408
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

BIN
src/vendormodules/test/tomlish-1.1.5.tm

Binary file not shown.

10431
src/vendormodules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff

4774
src/vfs/_vfscommon.vfs/modules/overtype-1.6.6.tm

File diff suppressed because it is too large Load Diff

74
src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.0.tm → src/vfs/_vfscommon.vfs/modules/punk/cesu-0.1.1.tm

@ -9,7 +9,7 @@
# http://paste.tclers.tk/5977
#
# @@ Meta Begin
# Application punk::cesu 0.1.0
# Application punk::cesu 0.1.1
# Meta platform tcl
# Meta license <unknown>
# @@ Meta End
@ -19,7 +19,7 @@
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::cesu 0 0.1.0]
#[manpage_begin punkshell_module_punk::cesu 0 0.1.1]
#[copyright "2024"]
#[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}]
#[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}]
@ -52,9 +52,6 @@ package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end]
@ -127,7 +124,7 @@ tcl::namespace::eval punk::cesu {
binary scan $1 c 1
binary scan $2 c 2
binary scan $3 c 3
puts [list $1 $2 $3]
#puts [list $1 $2 $3]
#binary scan $4 c 4
incr 1 ;#// Effectively adds 0x10000 to the codepoint ?
@ -180,23 +177,75 @@ tcl::namespace::eval punk::cesu {
#note: from_surrogatestring \U1f600 returns a mouse (\U1f400) instead of smiley
# but from_surrogatestring \U1f400 returns a mouse.
# Tcl bug - fixed some time in 9.x
# surrogated_string shouldn't include non BMP chars anyway (G.I.G.O?)
# surrogated_string theoretically shouldn't include non BMP chars anyway (but may in some contexts? mixed surrogate escapes and raw nonbmp?)
lappend PUNKARGS [list {
@id -id ::punk::cesu::from_surrogatestring
@cmd -name punk::cesu::from_surrogatestring -help\
"Convert a string containing surrogate pairs
to string with pairs converted to unicode non-BMP
to Tcl string with pairs converted to unicode non-BMP
characters"
@values
surrogated_string -help\
"May contain a mix of surrogate pairs and other
characters - only the surrogate pairs will be converted."
}]
proc from_surrogatestring {surrogated_string} {
proc from_surrogatestring {str} {
#high surrogate character rep followed by low surrogate character rep
if {[regexp {[\uD800-\uDBFF][\uDC00-\uDFFF]} $str]} {
set str [string map {\[ \\\[ \] \\\]} $str] ;#Make sure any existing commandlike structures aren't executed
return [subst -novariables -nobackslashes [regsub -all {([\uD800-\uDBFF])([\uDC00-\uDFFF])} $str {[surrogatepair_to_codepoint \1 \2]} ]]
} else {
return $str
}
}
proc surrogatepair_to_codepoint {highchar lowchar} {
if {[string length $highchar] != 1 || [string length $lowchar] !=1} {
error "surrogatepair_to_codepoint expected surrogate pair encoded as 2 characters"
}
#NOTE in tcl8 - we get oddity that 'split <surrogatepair> ""' returns a list of length 1 even though there are 2 chars
#fixed in tcl9
#lassign [split $2_surrogate_chars ""] highSurrogateChar lowSurrogateChar
scan $highchar %c highDecimal
scan $lowchar %c lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
#puts "->codepointDecimal $codepointDecimal"
#In tcl8 - we will get \uFFFD for non BMP codepoints - todo ?
return [format %c $codepointDecimal]
}
#e.g {\ud83d\ude00}
proc escaped_surrogatepair_to_codepoint {spair} {
set spair [string map {" " ""} $spair]
if {[string length $spair] != 12} {
error "escaped_surrogatepair_to_codepoint expected input of form \\uXXXX\\uXXXX"
}
set normalised [regsub -all {\\+u} $spair ""]
set highSurrogate [string range $normalised 0 3]
set lowSurrogate [string range $normalised 4 end]
scan $highSurrogate %x highDecimal
scan $lowSurrogate %x lowDecimal
set highDecimal [expr {$highDecimal - 0xD800}]
set lowDecimal [expr {$lowDecimal - 0xDC00}]
# Combine the values and add 0x10000 to get the original code point
set codepointDecimal [expr {($highDecimal << 10) + $lowDecimal + 0x10000}]
return [format %c $codepointDecimal]
}
proc from_surrogatestring_via_cesu {surrogated_string} {
#we can do this without cesu (from_surrogatestring)
set cesu [encoding convertto cesu-8 $surrogated_string]
set x [cesu2utf $cesu]
encoding convertfrom utf-8 $x
}
proc _to_test {emoji} {
puts stderr "_to_test incomplete"
set cesu [encoding convertto cesu-8 $e]
@ -384,7 +433,7 @@ tcl::namespace::eval punk::cesu {
proc get_topic_Description {} {
punk::args::lib::tstr [string trim {
package punk::cesu
description to come..
cesu and surrogate-pair processing
} \n]
}
proc get_topic_License {} {
@ -406,7 +455,8 @@ tcl::namespace::eval punk::cesu {
}
proc get_topic_custom-topic {} {
punk::args::lib::tstr -return string {
nothing to see here
This library can be used for surrogate-pair handling.
cesu utilities are used internally in from_surrogatestring
}
}
# -------------------------------------------------------------
@ -450,7 +500,7 @@ namespace eval ::punk::args::register {
package provide punk::cesu [tcl::namespace::eval punk::cesu {
variable pkg punk::cesu
variable version
set version 0.1.0
set version 0.1.1
}]
return

BIN
src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm

Binary file not shown.

BIN
src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.5.tm.x

Binary file not shown.

26
src/vfs/_vfscommon.vfs/modules/tomlish-1.1.5.tm

@ -531,18 +531,35 @@ namespace eval tomlish {
} else {
#we have a table - but is it a tablearray?
set ttype [dictn get $tablenames_info [list $norm_segments type]]
#use a tabletype_unknown type for previous 'created' only tables?
#we use a header_unknown type for previous 'created' only tables
if {$ttype eq "header_unknown"} {
dictn set tablenames_info [list $norm_segments type] header_tablearray
set ttype header_tablearray
#assert - must not be 'defined'
#we have seen it before as a supertable ie 'created' only
#Not 'defined' but could still have subtables - treat it as a dict
set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments]
} else {
if {$ttype ne "header_tablearray"} {
set msg "tablearray name $tablearrayname already appears to be already created as a table not a tablearray - invalid?"
#header_table or itable
switch -- $ttype {
itable {set ttypename itable}
header_table {set ttypename table}
default {error "unrecognised type - expected header_table or itable"}
}
set msg "tablearray name $tablearrayname already appears to be already created as '$ttypename' not tablearray - invalid?"
append msg \n [tomlish::dict::_show_tablenames $tablenames_info]
#raise a specific type of error for tests to check
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg
}
#EXISTING tablearray
#add to array
#error "add_to_array not implemented"
#{type ARRAY value <list>}
set ARRAY_ELEMENTS [dict get $datastructure {*}$norm_segments value]
}
}
set object [dict create] ;#array context equivalent of 'datastructure'
@ -748,7 +765,7 @@ namespace eval tomlish {
set norm_segments [::tomlish::to_dict::tablename_split $tablename true] ;#true to normalize
set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL]
if {$T_DEFINED ne "NULL"} {
if {$T_DEFINED ni [list NULL header_tablearray]} {
#our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path
set msg "Table name $tablename has already been directly defined in the toml data. Invalid"
append msg \n [tomlish::dict::_show_tablenames $tablenames_info]
@ -779,7 +796,8 @@ namespace eval tomlish {
return -code error -errorcode {TOMLISH STRUCTURE KEYCOLLISION} $msg
}
#here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here
dictn set tablenames_info [list $supertable type] header_table
#we also don't know whether it's a table or a tablearray
dictn set tablenames_info [list $supertable type] header_unknown
#ensure empty tables are still represented in the datastructure
dict set datastructure {*}$supertable [list]
} else {

8408
src/vfs/_vfscommon.vfs/modules/tomlish-1.1.6.tm

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save