From 779ee8bc4a15a95d497400ae3f34b613fb7b27fb Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Mon, 23 Sep 2024 05:12:21 +1000 Subject: [PATCH] fauxlink package, misc fixes --- src/bootsupport/modules/fauxlink-0.1.0.tm | 468 +++++++++++ .../modules/include_modules.config | 1 + src/bootsupport/modules/punk/du-0.1.0.tm | 1 + src/bootsupport/modules/punk/lib-0.1.1.tm | 45 +- src/bootsupport/modules/punk/mix/cli-0.3.1.tm | 24 +- src/modules/punk/du-999999.0a1.0.tm | 1 + src/modules/punk/lib-999999.0a1.0.tm | 45 +- src/modules/punk/mix/cli-999999.0a1.0.tm | 24 +- src/modules/punk/nav/fs-999999.0a1.0.tm | 120 ++- src/modules/punk/winlnk-999999.0a1.0.tm | 6 +- .../src/bootsupport/modules/fauxlink-0.1.0.tm | 468 +++++++++++ .../modules/include_modules.config | 1 + .../src/bootsupport/modules/punk/du-0.1.0.tm | 1 + .../src/bootsupport/modules/punk/lib-0.1.1.tm | 45 +- .../bootsupport/modules/punk/mix/cli-0.3.1.tm | 24 +- .../src/bootsupport/modules/fauxlink-0.1.0.tm | 468 +++++++++++ .../modules/include_modules.config | 1 + .../src/bootsupport/modules/punk/du-0.1.0.tm | 1 + .../src/bootsupport/modules/punk/lib-0.1.1.tm | 45 +- .../bootsupport/modules/punk/mix/cli-0.3.1.tm | 24 +- src/vendormodules/fauxlink-0.1.0.tm | 170 +++- src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm | 468 +++++++++++ src/vfs/_vfscommon/modules/punk/du-0.1.0.tm | 1 + src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm | 45 +- .../_vfscommon/modules/punk/mix/cli-0.3.1.tm | 24 +- .../punk/mix/commandset/project-0.1.0.tm | 6 +- .../_vfscommon/modules/punk/nav/fs-0.1.0.tm | 120 ++- src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm | 68 +- .../_vfscommon/modules/punk/winlnk-0.1.0.tm | 561 +++++++++++++ .../_vfscommon/modules/test/tomlish-1.1.1.tm | Bin 46243 -> 46279 bytes src/vfs/_vfscommon/modules/tomlish-1.1.1.tm | 745 ++++++++++++------ 31 files changed, 3647 insertions(+), 374 deletions(-) create mode 100644 src/bootsupport/modules/fauxlink-0.1.0.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm create mode 100644 src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm create mode 100644 src/vfs/_vfscommon/modules/punk/winlnk-0.1.0.tm diff --git a/src/bootsupport/modules/fauxlink-0.1.0.tm b/src/bootsupport/modules/fauxlink-0.1.0.tm new file mode 100644 index 00000000..8424ce07 --- /dev/null +++ b/src/bootsupport/modules/fauxlink-0.1.0.tm @@ -0,0 +1,468 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application fauxlink 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin fauxlink_module_fauxlink 0 0.1.0] +#[copyright "2024"] +#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] +#[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] +#[require fauxlink] +#[keywords symlink faux fake shortcut toml] +#[description] +#[para] A cross platform shortcut/symlink alternative. +#[para] Unapologetically ugly - but practical in certain circumstances. +#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as +#[para] archiving and packaging systems. +#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. +#[para] format of name #.fxlnk +#[para] where can be empty - then the effective nominal name is the tail of the +#[para] The + symbol substitutes for forward-slashes. +#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) +#[para] We deliberately treat higher % sequences literally. +#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. +#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 +#[para] e.g a link to a file file#A.txt in parent dir could be: +#[para] file%23A.txt#..+file%23A.txt.fxlnk +#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk +#[para] The can be unrelated to the actual target +#[para] e.g datafile.dat#..+file%23A.txt.fxlnk +#[para] This system has no filesystem support - and must be completely application driven. +#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. +#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined +#[para] Extensions to behaviour should be added in the file as text data in Toml format, +#[para] with custom data being under a single application-chosen table name +#[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. +#[para] Aside from the 2 used for delimiting (+ #) +#[para] certain characters which might normally be allowed in filesystems are required to be encoded +#[para] e.g space and tab are required to be %20 %09 +#[para] Others that require encoding are: * ? \ / | : ; " < > +#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. +#[para] Control characters and other punctuation is optional to encode. +#[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. +#[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX +#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. +#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest +# +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded +# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. +#Using fauxlink - a link would be: +# "my-program-files#++server+c+Program%20Files.fxlnk" +#If we needed the old-style literal %20 it would become +# "my-program-files#++server+c+Program%2520Files.fxlnk" + + +#*** !doctools +#[section Overview] +#[para] overview of fauxlink +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by fauxlink +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::class { + #*** !doctools + #[subsection {Namespace fauxlink::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink { + namespace export {[a-z]*}; # Convention: export all lowercase + + #todo - enforce utf-8 + + #literal unicode chars supported by modern filesystems - leave as is - REVIEW + + + variable encode_map + variable decode_map + #most filesystems don't allow NULL - map to empty string + + #Make sure % is not in encode_map + set encode_map [dict create\ + \x00 ""\ + { } %20\ + \t %09\ + + %2B\ + # %23\ + * %2A\ + ? %3F\ + \\ %5C\ + / %2F\ + | %7C\ + : %3A\ + {;} %3B\ + {"} %22\ + < %3C\ + > %3E\ + ] + #above have some overlap with ctrl codes below. + #no big deal as it's a dict + + #must_encode + # + # * ? \ / | : ; " < > \t + # also NUL to empty string + + # also ctrl chars 01 to 1F (1..31) + for {set i 1} {$i < 32} {incr i} { + set ch [format %c $i] + set enc "%[format %02X $i]" + set enc_lower [string tolower $enc] + dict set encode_map $ch $enc + dict set decode_map $enc $ch + dict set decode_map $enc_lower $ch + } + + variable must_encode + set must_encode [dict keys $encode_map] + + + #if they are in + + #decode map doesn't include + # %00 (nul) + # %2F "/" + # %2f "/" + # %7f (del) + #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. + # + set decode_map [dict merge $decode_map [dict create\ + %09 \t\ + %20 { }\ + %21 "!"\ + %22 {"}\ + %23 "#"\ + %24 "$"\ + %25 "%"\ + %26 "&"\ + %27 "'"\ + %28 "("\ + %29 ")"\ + %2A "*"\ + %2a "*"\ + %2B "+"\ + %2b "+"\ + %2C ","\ + %2c ","\ + %2D "-"\ + %2d "-"\ + %2E "."\ + %2e "."\ + %3A ":"\ + %3a ":"\ + %3B {;}\ + %3b {;}\ + %3D "="\ + %3C "<"\ + %3c "<"\ + %3d "="\ + %3E ">"\ + %3e ">"\ + %3F "?"\ + %3f "?"\ + %40 "@"\ + %5B "\["\ + %5b "\["\ + %5C "\\"\ + %5c "\\"\ + %5D "\]"\ + %5d "\]"\ + %5E "^"\ + %5e "^"\ + %60 "`"\ + %7B "{"\ + %7b "{"\ + %7C "|"\ + %7c "|"\ + %7D "}"\ + %7d "}"\ + %7E "~"\ + %7e "~"\ + ]] + #Don't go above 7f + #if we want to specify p + + + #*** !doctools + #[subsection {Namespace fauxlink}] + #[para] Core API functions for fauxlink + #[list_begin definitions] + + proc resolve {link} { + variable decode_map + variable encode_map + variable must_encode + set ftail [file tail $link] + if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" + } + set linkspec [file rootname $ftail] + # - any # or + within the target path or name should have been uri encoded as %23 and %2b + if {[tcl::string::first # $linkspec] < 0} { + error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" + } + #only the 1st 2 parts of split on # are significant. + #if there are more # chars present - the subsequent parts are effectively a comment + + #check namepec already has required chars encoded + lassign [split $linkspec #] namespec targetspec + #puts stderr "-->namespec $namespec" + set nametest [tcl::string::map $encode_map $namespec] + #puts stderr "-->nametest $nametest" + #nothing should be changed - if there are unencoded chars that must be encoded it is an error + if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { + set err "fauxlink::resolve invalid chars in name part (section prior to first #)" + set idx 0 + foreach ch [split $namespec ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #see comments below regarding 2 rounds and ordering. + set name [decode_unicode_escapes $namespec] + set name [tcl::string::map $decode_map $name] + #puts stderr "-->name: $name" + + set targetsegment [split $targetspec +] + #check each + delimited part of targetspec already has required chars encoded + set s 0 ;#segment index + set result_segments [list] + foreach segment $targetsegment { + set targettest [tcl::string::map $encode_map $segment] + if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { + set err "fauxlink::resolve invalid chars in targetpath (section following first #)" + set idx 0 + foreach ch [split $segment ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " segment $s char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #2 rounds of substitution is possibly asking for trouble.. + #We allow anything in the resultant segments anyway (as %UXXXX... allows all) + #so it's not so much about what can be encoded, + # - but it makes it harder to reason about for users + # In particular - if we map %XX first it makes %25 -> % substitution tricky + # if the user requires a literal %UXXX - they can't do %25UXXX + # the double sub would make it %UXXX -> somechar anyway. + #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. + #There is still the opportunity to use things like %U00000025 followed by hex-chars + # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW + set segment [decode_unicode_escapes $segment] + set segment [tcl::string::map $decode_map $segment] + lappend result_segments $segment + + incr s + } + set targetpath [join $result_segments /] + if {$name eq ""} { + set name [lindex $result_segments end] + } + + return [dict create name $name targetpath $targetpath] + } + variable map + + #default exclusion of / (%U2f and equivs) + #this would allow obfuscation of intention - when we have + for that anyway + proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { + variable map + set ucstart [string first %U $str 0] + if {$ucstart < 0} { + return $str + } + set max 8 + set map [list] + set strend [expr {[string length $str]-1}] + while {$ucstart >= 0} { + set s $ucstart + set i [expr {$s +2}] ;#skip the %U + set hex "" + while {[tcl::string::length $hex] < 8 && $i <= $strend} { + set in [string index $str $i] + if {[tcl::string::is xdigit -strict $in]} { + append hex $in + } else { + break + } + incr i + } + if {$hex ne ""} { + incr i -1 + lappend map $s $i $hex + } + set ucstart [tcl::string::first %U $str $i] + } + set out "" + set lastidx -1 + set e 0 + foreach {s e hex} $map { + append out [string range $str $lastidx+1 $s-1] + set sub [format %c 0x$hex] + if {$sub in $exclusions} { + append out %U$hex ;#put it back + } else { + append out $sub + } + set lastidx $e + } + if {$e < [tcl::string::length $str]-1} { + append out [string range $str $e+1 end] + } + return $out + } + proc link_as {name target} { + + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace fauxlink::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval fauxlink::system { + #*** !doctools + #[subsection {Namespace fauxlink::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide fauxlink [namespace eval fauxlink { + variable pkg fauxlink + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/bootsupport/modules/include_modules.config b/src/bootsupport/modules/include_modules.config index 3993e0c9..68c78f3b 100644 --- a/src/bootsupport/modules/include_modules.config +++ b/src/bootsupport/modules/include_modules.config @@ -7,6 +7,7 @@ set bootsupport_modules [list\ src/vendormodules cksum\ src/vendormodules modpod\ src/vendormodules overtype\ + src/vendormodules fauxlink\ src/vendormodules oolib\ src/vendormodules http\ src/vendormodules dictutils\ diff --git a/src/bootsupport/modules/punk/du-0.1.0.tm b/src/bootsupport/modules/punk/du-0.1.0.tm index 1eca1f47..f2ee38b5 100644 --- a/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/bootsupport/modules/punk/du-0.1.0.tm @@ -942,6 +942,7 @@ namespace eval punk::du { #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' #remove links and . .. from directories, remove links from files + #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [struct::set difference [concat $hfiles $files[unset files]] $links] set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] diff --git a/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/bootsupport/modules/punk/lib-0.1.1.tm index 872e4807..8f51075e 100644 --- a/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -372,20 +372,40 @@ namespace eval punk::lib { proc lswap {lvar a z} { upvar $lvar l if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { - #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #lindex_resolve_basic returns only -1 if out of range + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -3 and -2 special results being lower and upper bound breaches respectively (-1 never returned) set a_index [lindex_resolve $l $a] set a_msg "" switch -- $a_index { -2 { - "$a is greater th + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } -3 { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" } } - error "lswap cannot indices $a and $z $a is out of range" + set z_index [lindex_resolve $l $z] + set z_msg "" + switch -- $z_index { + -2 { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + -3 { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] @@ -397,6 +417,7 @@ namespace eval punk::lib { # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} + proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower @@ -3021,6 +3042,10 @@ namespace eval punk::lib { set localeid [twapi::get_system_default_lcid] } } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 set results [list] set nums [objclone $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list @@ -3036,15 +3061,21 @@ namespace eval punk::lib { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { - if {$delim eq ""} {set delim ","} - if {$groupsize eq ""} {set groupsize 3} + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults - set delim "," - set groupsize 3 + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } lappend results [delimit_number $number $delim $groupsize] } diff --git a/src/bootsupport/modules/punk/mix/cli-0.3.1.tm b/src/bootsupport/modules/punk/mix/cli-0.3.1.tm index cd6f3025..39346d5d 100644 --- a/src/bootsupport/modules/punk/mix/cli-0.3.1.tm +++ b/src/bootsupport/modules/punk/mix/cli-0.3.1.tm @@ -692,14 +692,22 @@ namespace eval punk::mix::cli { #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files } #zipfs mkzip does exactly what we need anyway in this case - set wd [pwd] - cd $buildfolder - puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" - zipfs mkzip $zipfile #modpod-$basename-$module_build_version - cd $wd - - package require modpod - modpod::lib::make_zip_modpod $zipfile $modulefile + #unfortunately it's not available in all Tclsh versions we might be running.. + if {[llength [info commands zipfs]]} { + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + } else { + #TODO - review punk::zip::mkzip and/or external zip to provide a fallback? + set had_err 1 + lappend notest "zipfs_unavailable" + puts stderr "WARNING: zipfs unavailable can't build $modulefile" + } if {$had_error} { diff --git a/src/modules/punk/du-999999.0a1.0.tm b/src/modules/punk/du-999999.0a1.0.tm index 554c3eda..3a9332de 100644 --- a/src/modules/punk/du-999999.0a1.0.tm +++ b/src/modules/punk/du-999999.0a1.0.tm @@ -942,6 +942,7 @@ namespace eval punk::du { #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' #remove links and . .. from directories, remove links from files + #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [struct::set difference [concat $hfiles $files[unset files]] $links] set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] diff --git a/src/modules/punk/lib-999999.0a1.0.tm b/src/modules/punk/lib-999999.0a1.0.tm index 5463c32e..3e9884e8 100644 --- a/src/modules/punk/lib-999999.0a1.0.tm +++ b/src/modules/punk/lib-999999.0a1.0.tm @@ -372,20 +372,40 @@ namespace eval punk::lib { proc lswap {lvar a z} { upvar $lvar l if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { - #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #lindex_resolve_basic returns only -1 if out of range + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -3 and -2 special results being lower and upper bound breaches respectively (-1 never returned) set a_index [lindex_resolve $l $a] set a_msg "" switch -- $a_index { -2 { - "$a is greater th + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } -3 { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" } } - error "lswap cannot indices $a and $z $a is out of range" + set z_index [lindex_resolve $l $z] + set z_msg "" + switch -- $z_index { + -2 { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + -3 { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] @@ -397,6 +417,7 @@ namespace eval punk::lib { # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} + proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower @@ -3021,6 +3042,10 @@ namespace eval punk::lib { set localeid [twapi::get_system_default_lcid] } } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 set results [list] set nums [objclone $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list @@ -3036,15 +3061,21 @@ namespace eval punk::lib { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { - if {$delim eq ""} {set delim ","} - if {$groupsize eq ""} {set groupsize 3} + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults - set delim "," - set groupsize 3 + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } lappend results [delimit_number $number $delim $groupsize] } diff --git a/src/modules/punk/mix/cli-999999.0a1.0.tm b/src/modules/punk/mix/cli-999999.0a1.0.tm index daab2d76..0a0ddcef 100644 --- a/src/modules/punk/mix/cli-999999.0a1.0.tm +++ b/src/modules/punk/mix/cli-999999.0a1.0.tm @@ -692,14 +692,22 @@ namespace eval punk::mix::cli { #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files } #zipfs mkzip does exactly what we need anyway in this case - set wd [pwd] - cd $buildfolder - puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" - zipfs mkzip $zipfile #modpod-$basename-$module_build_version - cd $wd - - package require modpod - modpod::lib::make_zip_modpod $zipfile $modulefile + #unfortunately it's not available in all Tclsh versions we might be running.. + if {[llength [info commands zipfs]]} { + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + } else { + #TODO - review punk::zip::mkzip and/or external zip to provide a fallback? + set had_err 1 + lappend notest "zipfs_unavailable" + puts stderr "WARNING: zipfs unavailable can't build $modulefile" + } if {$had_error} { diff --git a/src/modules/punk/nav/fs-999999.0a1.0.tm b/src/modules/punk/nav/fs-999999.0a1.0.tm index c897eeee..3c609beb 100644 --- a/src/modules/punk/nav/fs-999999.0a1.0.tm +++ b/src/modules/punk/nav/fs-999999.0a1.0.tm @@ -203,14 +203,15 @@ tcl::namespace::eval punk::nav::fs { } set matchinfo [dirfiles_dict -searchbase [pwd]] } - set dircount [llength [dict get $matchinfo dirs]] - set filecount [llength [dict get $matchinfo files]] + set dircount [llength [dict get $matchinfo dirs]] + set filecount [llength [dict get $matchinfo files]] + set symlinkcount [llength [dict get $matchinfo links]] ;#doesn't include windows shelllinks (.lnk) #set location [file normalize [dict get $matchinfo location]] set location [dict get $matchinfo location] #result for glob is count of matches - use dirfiles etc for script access to results - set result [list location $location dircount $dircount filecount $filecount] + set result [list location $location dircount $dircount filecount $filecount symlinks $symlinkcount] set filesizes [dict get $matchinfo filesizes] if {[llength $filesizes]} { set filesizes [lsearch -all -inline -not $filesizes na] @@ -383,6 +384,7 @@ tcl::namespace::eval punk::nav::fs { set location [file normalize [dict get $matchinfo location]] if {[string match //xzipfs:/* $location] || $location ne $last_location} { + #REVIEW - zipfs test disabled with leading x #emit previous result if {[dict size $this_result]} { dict set this_result filebytes [punk::lib::format_number [dict get $this_result filebytes]] @@ -1035,9 +1037,6 @@ tcl::namespace::eval punk::nav::fs { lappend nonportable {*}[dict get $contents nonportable] ;# illegal file/folder names from windows perspective lappend vfsmounts {*}[dict get $contents vfsmounts] } - if {$opt_formatsizes} { - set filesizes [punk::lib::format_number $filesizes] - } if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1049,33 +1048,124 @@ tcl::namespace::eval punk::nav::fs { } set $fileset $stripped } + #Note: we need to remember to use common_base to rebuild the key when we need to query the dict-based elements: sizes & times - because we didn't strip those keys. } + + # -- --- --- --- --- --- --- --- --- --- --- + #assign symlinks to the dirs or files collection (the punk::du system doesn't sort this out + #As at 2024-09 for windows symlinks - Tcl can't do file readlink on symlinks created with mklink /D name target (SYMLINKD) or mklink name target (SYMLINK) + #We can't read the target information - best we can do is classify it as a file or a dir + #we can't use 'file type' as that will report just 'link' - but file isfile and file isdirectory work and should work for links on all platforms - REVIEW + set file_symlinks [list] + set dir_symlinks [list] + set dir_shortcuts [list] ;#windows shell links (.lnk) that have a target that is a directory + foreach s $links { + if {[file isfile $s]} { + lappend file_symlinks $s + #will be appended in finfo_plus later + } elseif {[file isdirectory $s]} { + lappend dir_symlinks $s + lappend dirs $s + } else { + #dunno - warn for now + puts stderr "Warning - cannot determine link type for link $s" + } + } + #we now have the issue that our symlinks aren't sorted within the dir/file categorisation - they currently will have to appear at beginning or end - TODO + # -- --- --- --- --- --- --- --- --- --- --- + + #todo - sort whilst maintaining order for metadata? + #we need to co-sort files only with filesizes (other info such as times is keyed on fname so cosorting not required) + + #we can't sort on filesize after format_number (unless we were to enforce delim _ which we don't want to do) + if {$opt_formatsizes} { + set filesizes [punk::lib::format_number $filesizes] ;#accepts a list and will process each + } #col2 with subcolumns #remove punk::pipedata dependency - allow use of punk::nav::fs without punk package #set widest2a [punk::pipedata [list {*}$files ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #widest2a.= concat $files [list ""] |> .=>2 lmap v {string length $v} |> .=>* tcl::mathfunc::max - set widest2a [tcl::mathfunc::max {*}[lmap v [concat $files [list ""]] {string length $v}]] + set widest2a [tcl::mathfunc::max {*}[lmap v [list {*}$files {*}$file_symlinks ""] {string length $v}]] set c2a [string repeat " " [expr {$widest2a + 1}]] #set widest2b [punk::pipedata [list {*}$filesizes ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] - set widest2b [tcl::mathfunc::max {*}[lmap v [concat $filesizes [list ""]] {string length $v}]] + set widest2b [tcl::mathfunc::max {*}[lmap v [list {*}$filesizes ""] {string length $v}]] set c2b [string repeat " " [expr {$widest2b + 1}]] set finfo [list] foreach f $files s $filesizes { #note - the display entry isn't necessarily a valid tcl list e.g filename with unbalanced curly braces - #hence we need to keep the filename, as well properly protected as a list element + #hence we need to keep the filename as well, properly protected as a list element lappend finfo [list file $f display "[overtype::left $c2a $f] [overtype::right $c2b $s]"] } + set flink_style [punk::ansi::a+ undercurly underline undt-green] ;#curly green underline with fallback to normal underline + set dlink_style [punk::ansi::a+ undercurly underline undt-green] + #We use an underline so the visual styling of a link can coexist with fg/bg colors applied for other attributes such as hidden + foreach flink $file_symlinks { + lappend finfo [list file $flink display "$flink_style[overtype::left $c2a $flink] [overtype::right $c2b 0]"] + } + + set fshortcut_style [punk::ansi::a+ underdotted underline undt-hotpink] + set dshortcut_style [punk::ansi::a+ underdotted underline undt-hotpink] + #examine windows .lnk shell link files (shortcuts) - these could be encountered on other platforms too - we should still be able to read them + #review - symlink to shortcut? hopefully will just work + #classify as file or directory - fallback to file if unknown/undeterminable + set finfo_plus [list] + foreach fdict $finfo { + set fname [dict get $fdict file] + if {[file extension $fname] eq ".lnk"} { + if {![catch {package require punk::winlnk}]} { + set shortcutinfo [punk::winlnk::file_get_info $fname] + set target_type "file" ;#default/fallback + if {[dict exists $shortcutinfo link_target]} { + set tgt [dict get $shortcutinfo link_target] + if {[file exists $tgt]} { + #file type could return 'link' - we will use ifile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } + } + switch -- $target_type { + file { + set display [dict get $fdict display] + set display $fshortcut_style$display ;# + dict set fdict display $display + lappend finfo_plus $fdict + } + directory { + #target of link is a dir - for display/categorisation purposes we want to see it as a dir + #will be styled later based on membership of dir_shortcuts + lappend dirs $fname + lappend dir_shortcuts $fname + } + } + } + #if we don't have punk::winlnk to read the .lnk - it will get no special highlighting and just appear as an ordinary file even if it points to a dir + } else { + lappend finfo_plus $fdict + } + } + unset finfo + + #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] set widest1 [tcl::mathfunc::max {*}[lmap v [concat $dirs [list ""]] {string length $v}]] set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] - foreach d $dirs filerec $finfo { + set RST [punk::ansi::a] + foreach d $dirs filerec $finfo_plus { set d1 [punk::ansi::a+ cyan bold] set d2 [punk::ansi::a+ defaultfg defaultbg normal] #set f1 [punk::ansi::a+ white bold] @@ -1088,7 +1178,7 @@ tcl::namespace::eval punk::nav::fs { } if {$d in $vfsmounts} { if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. color will be overridden giving no indicatio of 'hidden' status - REVIEW + #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) #mark it differently for now.. (todo bug report?) if {$d in $nonportable} { @@ -1108,6 +1198,12 @@ tcl::namespace::eval punk::nav::fs { set d1 [punk::ansi::a+ red bold] } } + #dlink-style & dshortcut_style are for underlines - can be added with colours already set + if {$d in $dir_symlinks} { + append d1 $dlink_style + } elseif {$d in $dir_shortcuts} { + append d1 $dshortcut_style + } } if {[llength $filerec]} { set fname [dict get $filerec file] @@ -1120,7 +1216,7 @@ tcl::namespace::eval punk::nav::fs { } } } - lappend displaylist $d1[overtype::left $col1 $d]$d2$f1$fdisp$f2 + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] diff --git a/src/modules/punk/winlnk-999999.0a1.0.tm b/src/modules/punk/winlnk-999999.0a1.0.tm index a9fa21e3..2925f40e 100644 --- a/src/modules/punk/winlnk-999999.0a1.0.tm +++ b/src/modules/punk/winlnk-999999.0a1.0.tm @@ -440,16 +440,16 @@ tcl::namespace::eval punk::winlnk { set localbase_path "" set suffix_path "" set linkinfocontent [dict get $linkinfo_content_dict content] - set link_file "" + set link_target "" if {$linkinfocontent ne ""} { set linkfields [LinkInfo_get_fields $linkinfocontent] set localbase_path [dict get $linkfields localbasepath] set suffix_path [dict get $linkfields commonpathsuffix] - set link_file [file join $localbase_path $suffix_path] + set link_target [file join $localbase_path $suffix_path] } set result [dict create\ - link_file $link_file\ + link_target $link_target\ link_flags $flags_enabled\ file_attributes ""\ create_time ""\ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm new file mode 100644 index 00000000..8424ce07 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm @@ -0,0 +1,468 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application fauxlink 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin fauxlink_module_fauxlink 0 0.1.0] +#[copyright "2024"] +#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] +#[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] +#[require fauxlink] +#[keywords symlink faux fake shortcut toml] +#[description] +#[para] A cross platform shortcut/symlink alternative. +#[para] Unapologetically ugly - but practical in certain circumstances. +#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as +#[para] archiving and packaging systems. +#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. +#[para] format of name #.fxlnk +#[para] where can be empty - then the effective nominal name is the tail of the +#[para] The + symbol substitutes for forward-slashes. +#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) +#[para] We deliberately treat higher % sequences literally. +#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. +#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 +#[para] e.g a link to a file file#A.txt in parent dir could be: +#[para] file%23A.txt#..+file%23A.txt.fxlnk +#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk +#[para] The can be unrelated to the actual target +#[para] e.g datafile.dat#..+file%23A.txt.fxlnk +#[para] This system has no filesystem support - and must be completely application driven. +#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. +#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined +#[para] Extensions to behaviour should be added in the file as text data in Toml format, +#[para] with custom data being under a single application-chosen table name +#[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. +#[para] Aside from the 2 used for delimiting (+ #) +#[para] certain characters which might normally be allowed in filesystems are required to be encoded +#[para] e.g space and tab are required to be %20 %09 +#[para] Others that require encoding are: * ? \ / | : ; " < > +#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. +#[para] Control characters and other punctuation is optional to encode. +#[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. +#[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX +#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. +#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest +# +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded +# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. +#Using fauxlink - a link would be: +# "my-program-files#++server+c+Program%20Files.fxlnk" +#If we needed the old-style literal %20 it would become +# "my-program-files#++server+c+Program%2520Files.fxlnk" + + +#*** !doctools +#[section Overview] +#[para] overview of fauxlink +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by fauxlink +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::class { + #*** !doctools + #[subsection {Namespace fauxlink::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink { + namespace export {[a-z]*}; # Convention: export all lowercase + + #todo - enforce utf-8 + + #literal unicode chars supported by modern filesystems - leave as is - REVIEW + + + variable encode_map + variable decode_map + #most filesystems don't allow NULL - map to empty string + + #Make sure % is not in encode_map + set encode_map [dict create\ + \x00 ""\ + { } %20\ + \t %09\ + + %2B\ + # %23\ + * %2A\ + ? %3F\ + \\ %5C\ + / %2F\ + | %7C\ + : %3A\ + {;} %3B\ + {"} %22\ + < %3C\ + > %3E\ + ] + #above have some overlap with ctrl codes below. + #no big deal as it's a dict + + #must_encode + # + # * ? \ / | : ; " < > \t + # also NUL to empty string + + # also ctrl chars 01 to 1F (1..31) + for {set i 1} {$i < 32} {incr i} { + set ch [format %c $i] + set enc "%[format %02X $i]" + set enc_lower [string tolower $enc] + dict set encode_map $ch $enc + dict set decode_map $enc $ch + dict set decode_map $enc_lower $ch + } + + variable must_encode + set must_encode [dict keys $encode_map] + + + #if they are in + + #decode map doesn't include + # %00 (nul) + # %2F "/" + # %2f "/" + # %7f (del) + #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. + # + set decode_map [dict merge $decode_map [dict create\ + %09 \t\ + %20 { }\ + %21 "!"\ + %22 {"}\ + %23 "#"\ + %24 "$"\ + %25 "%"\ + %26 "&"\ + %27 "'"\ + %28 "("\ + %29 ")"\ + %2A "*"\ + %2a "*"\ + %2B "+"\ + %2b "+"\ + %2C ","\ + %2c ","\ + %2D "-"\ + %2d "-"\ + %2E "."\ + %2e "."\ + %3A ":"\ + %3a ":"\ + %3B {;}\ + %3b {;}\ + %3D "="\ + %3C "<"\ + %3c "<"\ + %3d "="\ + %3E ">"\ + %3e ">"\ + %3F "?"\ + %3f "?"\ + %40 "@"\ + %5B "\["\ + %5b "\["\ + %5C "\\"\ + %5c "\\"\ + %5D "\]"\ + %5d "\]"\ + %5E "^"\ + %5e "^"\ + %60 "`"\ + %7B "{"\ + %7b "{"\ + %7C "|"\ + %7c "|"\ + %7D "}"\ + %7d "}"\ + %7E "~"\ + %7e "~"\ + ]] + #Don't go above 7f + #if we want to specify p + + + #*** !doctools + #[subsection {Namespace fauxlink}] + #[para] Core API functions for fauxlink + #[list_begin definitions] + + proc resolve {link} { + variable decode_map + variable encode_map + variable must_encode + set ftail [file tail $link] + if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" + } + set linkspec [file rootname $ftail] + # - any # or + within the target path or name should have been uri encoded as %23 and %2b + if {[tcl::string::first # $linkspec] < 0} { + error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" + } + #only the 1st 2 parts of split on # are significant. + #if there are more # chars present - the subsequent parts are effectively a comment + + #check namepec already has required chars encoded + lassign [split $linkspec #] namespec targetspec + #puts stderr "-->namespec $namespec" + set nametest [tcl::string::map $encode_map $namespec] + #puts stderr "-->nametest $nametest" + #nothing should be changed - if there are unencoded chars that must be encoded it is an error + if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { + set err "fauxlink::resolve invalid chars in name part (section prior to first #)" + set idx 0 + foreach ch [split $namespec ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #see comments below regarding 2 rounds and ordering. + set name [decode_unicode_escapes $namespec] + set name [tcl::string::map $decode_map $name] + #puts stderr "-->name: $name" + + set targetsegment [split $targetspec +] + #check each + delimited part of targetspec already has required chars encoded + set s 0 ;#segment index + set result_segments [list] + foreach segment $targetsegment { + set targettest [tcl::string::map $encode_map $segment] + if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { + set err "fauxlink::resolve invalid chars in targetpath (section following first #)" + set idx 0 + foreach ch [split $segment ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " segment $s char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #2 rounds of substitution is possibly asking for trouble.. + #We allow anything in the resultant segments anyway (as %UXXXX... allows all) + #so it's not so much about what can be encoded, + # - but it makes it harder to reason about for users + # In particular - if we map %XX first it makes %25 -> % substitution tricky + # if the user requires a literal %UXXX - they can't do %25UXXX + # the double sub would make it %UXXX -> somechar anyway. + #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. + #There is still the opportunity to use things like %U00000025 followed by hex-chars + # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW + set segment [decode_unicode_escapes $segment] + set segment [tcl::string::map $decode_map $segment] + lappend result_segments $segment + + incr s + } + set targetpath [join $result_segments /] + if {$name eq ""} { + set name [lindex $result_segments end] + } + + return [dict create name $name targetpath $targetpath] + } + variable map + + #default exclusion of / (%U2f and equivs) + #this would allow obfuscation of intention - when we have + for that anyway + proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { + variable map + set ucstart [string first %U $str 0] + if {$ucstart < 0} { + return $str + } + set max 8 + set map [list] + set strend [expr {[string length $str]-1}] + while {$ucstart >= 0} { + set s $ucstart + set i [expr {$s +2}] ;#skip the %U + set hex "" + while {[tcl::string::length $hex] < 8 && $i <= $strend} { + set in [string index $str $i] + if {[tcl::string::is xdigit -strict $in]} { + append hex $in + } else { + break + } + incr i + } + if {$hex ne ""} { + incr i -1 + lappend map $s $i $hex + } + set ucstart [tcl::string::first %U $str $i] + } + set out "" + set lastidx -1 + set e 0 + foreach {s e hex} $map { + append out [string range $str $lastidx+1 $s-1] + set sub [format %c 0x$hex] + if {$sub in $exclusions} { + append out %U$hex ;#put it back + } else { + append out $sub + } + set lastidx $e + } + if {$e < [tcl::string::length $str]-1} { + append out [string range $str $e+1 end] + } + return $out + } + proc link_as {name target} { + + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace fauxlink::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval fauxlink::system { + #*** !doctools + #[subsection {Namespace fauxlink::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide fauxlink [namespace eval fauxlink { + variable pkg fauxlink + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config index 3993e0c9..68c78f3b 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config @@ -7,6 +7,7 @@ set bootsupport_modules [list\ src/vendormodules cksum\ src/vendormodules modpod\ src/vendormodules overtype\ + src/vendormodules fauxlink\ src/vendormodules oolib\ src/vendormodules http\ src/vendormodules dictutils\ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm index 1eca1f47..f2ee38b5 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm @@ -942,6 +942,7 @@ namespace eval punk::du { #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' #remove links and . .. from directories, remove links from files + #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [struct::set difference [concat $hfiles $files[unset files]] $links] set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm index 872e4807..8f51075e 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -372,20 +372,40 @@ namespace eval punk::lib { proc lswap {lvar a z} { upvar $lvar l if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { - #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #lindex_resolve_basic returns only -1 if out of range + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -3 and -2 special results being lower and upper bound breaches respectively (-1 never returned) set a_index [lindex_resolve $l $a] set a_msg "" switch -- $a_index { -2 { - "$a is greater th + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } -3 { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" } } - error "lswap cannot indices $a and $z $a is out of range" + set z_index [lindex_resolve $l $z] + set z_msg "" + switch -- $z_index { + -2 { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + -3 { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] @@ -397,6 +417,7 @@ namespace eval punk::lib { # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} + proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower @@ -3021,6 +3042,10 @@ namespace eval punk::lib { set localeid [twapi::get_system_default_lcid] } } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 set results [list] set nums [objclone $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list @@ -3036,15 +3061,21 @@ namespace eval punk::lib { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { - if {$delim eq ""} {set delim ","} - if {$groupsize eq ""} {set groupsize 3} + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults - set delim "," - set groupsize 3 + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } lappend results [delimit_number $number $delim $groupsize] } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm index cd6f3025..39346d5d 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm @@ -692,14 +692,22 @@ namespace eval punk::mix::cli { #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files } #zipfs mkzip does exactly what we need anyway in this case - set wd [pwd] - cd $buildfolder - puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" - zipfs mkzip $zipfile #modpod-$basename-$module_build_version - cd $wd - - package require modpod - modpod::lib::make_zip_modpod $zipfile $modulefile + #unfortunately it's not available in all Tclsh versions we might be running.. + if {[llength [info commands zipfs]]} { + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + } else { + #TODO - review punk::zip::mkzip and/or external zip to provide a fallback? + set had_err 1 + lappend notest "zipfs_unavailable" + puts stderr "WARNING: zipfs unavailable can't build $modulefile" + } if {$had_error} { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm new file mode 100644 index 00000000..8424ce07 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/fauxlink-0.1.0.tm @@ -0,0 +1,468 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application fauxlink 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin fauxlink_module_fauxlink 0 0.1.0] +#[copyright "2024"] +#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] +#[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] +#[require fauxlink] +#[keywords symlink faux fake shortcut toml] +#[description] +#[para] A cross platform shortcut/symlink alternative. +#[para] Unapologetically ugly - but practical in certain circumstances. +#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as +#[para] archiving and packaging systems. +#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. +#[para] format of name #.fxlnk +#[para] where can be empty - then the effective nominal name is the tail of the +#[para] The + symbol substitutes for forward-slashes. +#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) +#[para] We deliberately treat higher % sequences literally. +#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. +#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 +#[para] e.g a link to a file file#A.txt in parent dir could be: +#[para] file%23A.txt#..+file%23A.txt.fxlnk +#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk +#[para] The can be unrelated to the actual target +#[para] e.g datafile.dat#..+file%23A.txt.fxlnk +#[para] This system has no filesystem support - and must be completely application driven. +#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. +#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined +#[para] Extensions to behaviour should be added in the file as text data in Toml format, +#[para] with custom data being under a single application-chosen table name +#[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. +#[para] Aside from the 2 used for delimiting (+ #) +#[para] certain characters which might normally be allowed in filesystems are required to be encoded +#[para] e.g space and tab are required to be %20 %09 +#[para] Others that require encoding are: * ? \ / | : ; " < > +#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. +#[para] Control characters and other punctuation is optional to encode. +#[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. +#[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX +#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. +#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest +# +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded +# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. +#Using fauxlink - a link would be: +# "my-program-files#++server+c+Program%20Files.fxlnk" +#If we needed the old-style literal %20 it would become +# "my-program-files#++server+c+Program%2520Files.fxlnk" + + +#*** !doctools +#[section Overview] +#[para] overview of fauxlink +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by fauxlink +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::class { + #*** !doctools + #[subsection {Namespace fauxlink::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink { + namespace export {[a-z]*}; # Convention: export all lowercase + + #todo - enforce utf-8 + + #literal unicode chars supported by modern filesystems - leave as is - REVIEW + + + variable encode_map + variable decode_map + #most filesystems don't allow NULL - map to empty string + + #Make sure % is not in encode_map + set encode_map [dict create\ + \x00 ""\ + { } %20\ + \t %09\ + + %2B\ + # %23\ + * %2A\ + ? %3F\ + \\ %5C\ + / %2F\ + | %7C\ + : %3A\ + {;} %3B\ + {"} %22\ + < %3C\ + > %3E\ + ] + #above have some overlap with ctrl codes below. + #no big deal as it's a dict + + #must_encode + # + # * ? \ / | : ; " < > \t + # also NUL to empty string + + # also ctrl chars 01 to 1F (1..31) + for {set i 1} {$i < 32} {incr i} { + set ch [format %c $i] + set enc "%[format %02X $i]" + set enc_lower [string tolower $enc] + dict set encode_map $ch $enc + dict set decode_map $enc $ch + dict set decode_map $enc_lower $ch + } + + variable must_encode + set must_encode [dict keys $encode_map] + + + #if they are in + + #decode map doesn't include + # %00 (nul) + # %2F "/" + # %2f "/" + # %7f (del) + #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. + # + set decode_map [dict merge $decode_map [dict create\ + %09 \t\ + %20 { }\ + %21 "!"\ + %22 {"}\ + %23 "#"\ + %24 "$"\ + %25 "%"\ + %26 "&"\ + %27 "'"\ + %28 "("\ + %29 ")"\ + %2A "*"\ + %2a "*"\ + %2B "+"\ + %2b "+"\ + %2C ","\ + %2c ","\ + %2D "-"\ + %2d "-"\ + %2E "."\ + %2e "."\ + %3A ":"\ + %3a ":"\ + %3B {;}\ + %3b {;}\ + %3D "="\ + %3C "<"\ + %3c "<"\ + %3d "="\ + %3E ">"\ + %3e ">"\ + %3F "?"\ + %3f "?"\ + %40 "@"\ + %5B "\["\ + %5b "\["\ + %5C "\\"\ + %5c "\\"\ + %5D "\]"\ + %5d "\]"\ + %5E "^"\ + %5e "^"\ + %60 "`"\ + %7B "{"\ + %7b "{"\ + %7C "|"\ + %7c "|"\ + %7D "}"\ + %7d "}"\ + %7E "~"\ + %7e "~"\ + ]] + #Don't go above 7f + #if we want to specify p + + + #*** !doctools + #[subsection {Namespace fauxlink}] + #[para] Core API functions for fauxlink + #[list_begin definitions] + + proc resolve {link} { + variable decode_map + variable encode_map + variable must_encode + set ftail [file tail $link] + if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" + } + set linkspec [file rootname $ftail] + # - any # or + within the target path or name should have been uri encoded as %23 and %2b + if {[tcl::string::first # $linkspec] < 0} { + error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" + } + #only the 1st 2 parts of split on # are significant. + #if there are more # chars present - the subsequent parts are effectively a comment + + #check namepec already has required chars encoded + lassign [split $linkspec #] namespec targetspec + #puts stderr "-->namespec $namespec" + set nametest [tcl::string::map $encode_map $namespec] + #puts stderr "-->nametest $nametest" + #nothing should be changed - if there are unencoded chars that must be encoded it is an error + if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { + set err "fauxlink::resolve invalid chars in name part (section prior to first #)" + set idx 0 + foreach ch [split $namespec ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #see comments below regarding 2 rounds and ordering. + set name [decode_unicode_escapes $namespec] + set name [tcl::string::map $decode_map $name] + #puts stderr "-->name: $name" + + set targetsegment [split $targetspec +] + #check each + delimited part of targetspec already has required chars encoded + set s 0 ;#segment index + set result_segments [list] + foreach segment $targetsegment { + set targettest [tcl::string::map $encode_map $segment] + if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { + set err "fauxlink::resolve invalid chars in targetpath (section following first #)" + set idx 0 + foreach ch [split $segment ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " segment $s char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #2 rounds of substitution is possibly asking for trouble.. + #We allow anything in the resultant segments anyway (as %UXXXX... allows all) + #so it's not so much about what can be encoded, + # - but it makes it harder to reason about for users + # In particular - if we map %XX first it makes %25 -> % substitution tricky + # if the user requires a literal %UXXX - they can't do %25UXXX + # the double sub would make it %UXXX -> somechar anyway. + #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. + #There is still the opportunity to use things like %U00000025 followed by hex-chars + # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW + set segment [decode_unicode_escapes $segment] + set segment [tcl::string::map $decode_map $segment] + lappend result_segments $segment + + incr s + } + set targetpath [join $result_segments /] + if {$name eq ""} { + set name [lindex $result_segments end] + } + + return [dict create name $name targetpath $targetpath] + } + variable map + + #default exclusion of / (%U2f and equivs) + #this would allow obfuscation of intention - when we have + for that anyway + proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { + variable map + set ucstart [string first %U $str 0] + if {$ucstart < 0} { + return $str + } + set max 8 + set map [list] + set strend [expr {[string length $str]-1}] + while {$ucstart >= 0} { + set s $ucstart + set i [expr {$s +2}] ;#skip the %U + set hex "" + while {[tcl::string::length $hex] < 8 && $i <= $strend} { + set in [string index $str $i] + if {[tcl::string::is xdigit -strict $in]} { + append hex $in + } else { + break + } + incr i + } + if {$hex ne ""} { + incr i -1 + lappend map $s $i $hex + } + set ucstart [tcl::string::first %U $str $i] + } + set out "" + set lastidx -1 + set e 0 + foreach {s e hex} $map { + append out [string range $str $lastidx+1 $s-1] + set sub [format %c 0x$hex] + if {$sub in $exclusions} { + append out %U$hex ;#put it back + } else { + append out $sub + } + set lastidx $e + } + if {$e < [tcl::string::length $str]-1} { + append out [string range $str $e+1 end] + } + return $out + } + proc link_as {name target} { + + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace fauxlink::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval fauxlink::system { + #*** !doctools + #[subsection {Namespace fauxlink::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide fauxlink [namespace eval fauxlink { + variable pkg fauxlink + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config index 3993e0c9..68c78f3b 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config @@ -7,6 +7,7 @@ set bootsupport_modules [list\ src/vendormodules cksum\ src/vendormodules modpod\ src/vendormodules overtype\ + src/vendormodules fauxlink\ src/vendormodules oolib\ src/vendormodules http\ src/vendormodules dictutils\ diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm index 1eca1f47..f2ee38b5 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm @@ -942,6 +942,7 @@ namespace eval punk::du { #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' #remove links and . .. from directories, remove links from files + #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [struct::set difference [concat $hfiles $files[unset files]] $links] set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm index 872e4807..8f51075e 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -372,20 +372,40 @@ namespace eval punk::lib { proc lswap {lvar a z} { upvar $lvar l if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { - #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #lindex_resolve_basic returns only -1 if out of range + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -3 and -2 special results being lower and upper bound breaches respectively (-1 never returned) set a_index [lindex_resolve $l $a] set a_msg "" switch -- $a_index { -2 { - "$a is greater th + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } -3 { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" } } - error "lswap cannot indices $a and $z $a is out of range" + set z_index [lindex_resolve $l $z] + set z_msg "" + switch -- $z_index { + -2 { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + -3 { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] @@ -397,6 +417,7 @@ namespace eval punk::lib { # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} + proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower @@ -3021,6 +3042,10 @@ namespace eval punk::lib { set localeid [twapi::get_system_default_lcid] } } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 set results [list] set nums [objclone $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list @@ -3036,15 +3061,21 @@ namespace eval punk::lib { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { - if {$delim eq ""} {set delim ","} - if {$groupsize eq ""} {set groupsize 3} + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults - set delim "," - set groupsize 3 + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } lappend results [delimit_number $number $delim $groupsize] } diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm index cd6f3025..39346d5d 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm @@ -692,14 +692,22 @@ namespace eval punk::mix::cli { #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files } #zipfs mkzip does exactly what we need anyway in this case - set wd [pwd] - cd $buildfolder - puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" - zipfs mkzip $zipfile #modpod-$basename-$module_build_version - cd $wd - - package require modpod - modpod::lib::make_zip_modpod $zipfile $modulefile + #unfortunately it's not available in all Tclsh versions we might be running.. + if {[llength [info commands zipfs]]} { + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + } else { + #TODO - review punk::zip::mkzip and/or external zip to provide a fallback? + set had_err 1 + lappend notest "zipfs_unavailable" + puts stderr "WARNING: zipfs unavailable can't build $modulefile" + } if {$had_error} { diff --git a/src/vendormodules/fauxlink-0.1.0.tm b/src/vendormodules/fauxlink-0.1.0.tm index a7b1e264..8424ce07 100644 --- a/src/vendormodules/fauxlink-0.1.0.tm +++ b/src/vendormodules/fauxlink-0.1.0.tm @@ -9,7 +9,7 @@ # @@ Meta Begin # Application fauxlink 0.1.0 # Meta platform tcl -# Meta license +# Meta license MIT # @@ Meta End @@ -19,15 +19,55 @@ #*** !doctools #[manpage_begin fauxlink_module_fauxlink 0 0.1.0] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] +#[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] #[require fauxlink] -#[keywords module] +#[keywords symlink faux fake shortcut toml] #[description] -#[para] - - +#[para] A cross platform shortcut/symlink alternative. +#[para] Unapologetically ugly - but practical in certain circumstances. +#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as +#[para] archiving and packaging systems. +#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. +#[para] format of name #.fxlnk +#[para] where can be empty - then the effective nominal name is the tail of the +#[para] The + symbol substitutes for forward-slashes. +#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) +#[para] We deliberately treat higher % sequences literally. +#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. +#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 +#[para] e.g a link to a file file#A.txt in parent dir could be: +#[para] file%23A.txt#..+file%23A.txt.fxlnk +#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk +#[para] The can be unrelated to the actual target +#[para] e.g datafile.dat#..+file%23A.txt.fxlnk +#[para] This system has no filesystem support - and must be completely application driven. +#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. +#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined +#[para] Extensions to behaviour should be added in the file as text data in Toml format, +#[para] with custom data being under a single application-chosen table name +#[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. +#[para] Aside from the 2 used for delimiting (+ #) +#[para] certain characters which might normally be allowed in filesystems are required to be encoded +#[para] e.g space and tab are required to be %20 %09 +#[para] Others that require encoding are: * ? \ / | : ; " < > +#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. +#[para] Control characters and other punctuation is optional to encode. +#[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. +#[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX +#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. +#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest +# # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded +# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. +#Using fauxlink - a link would be: +# "my-program-files#++server+c+Program%20Files.fxlnk" +#If we needed the old-style literal %20 it would become +# "my-program-files#++server+c+Program%2520Files.fxlnk" + + #*** !doctools #[section Overview] #[para] overview of fauxlink @@ -126,10 +166,14 @@ namespace eval fauxlink { < %3C\ > %3E\ ] + #above have some overlap with ctrl codes below. + #no big deal as it's a dict + #must_encode # + # * ? \ / | : ; " < > \t # also NUL to empty string - # also ctrl chars 01 to 1F (1..31) + + # also ctrl chars 01 to 1F (1..31) for {set i 1} {$i < 32} {incr i} { set ch [format %c $i] set enc "%[format %02X $i]" @@ -143,7 +187,17 @@ namespace eval fauxlink { set must_encode [dict keys $encode_map] - set decode_map [dict create\ + #if they are in + + #decode map doesn't include + # %00 (nul) + # %2F "/" + # %2f "/" + # %7f (del) + #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. + # + set decode_map [dict merge $decode_map [dict create\ + %09 \t\ %20 { }\ %21 "!"\ %22 {"}\ @@ -160,8 +214,10 @@ namespace eval fauxlink { %2b "+"\ %2C ","\ %2c ","\ - %2F "/"\ - %2f "/"\ + %2D "-"\ + %2d "-"\ + %2E "."\ + %2e "."\ %3A ":"\ %3a ":"\ %3B {;}\ @@ -192,8 +248,9 @@ namespace eval fauxlink { %7d "}"\ %7E "~"\ %7e "~"\ - ] - + ]] + #Don't go above 7f + #if we want to specify p #*** !doctools @@ -206,8 +263,8 @@ namespace eval fauxlink { variable encode_map variable must_encode set ftail [file tail $link] - if {[file extension $ftail] ne ".fauxlink"} { - error "fauxlink::resolve refusing to process link $link - file extension must be .fauxlink" + if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" } set linkspec [file rootname $ftail] # - any # or + within the target path or name should have been uri encoded as %23 and %2b @@ -229,13 +286,19 @@ namespace eval fauxlink { foreach ch [split $namespec ""] { if {$ch in $must_encode} { set enc [dict get $encode_map $ch] - append err " char $idx should be encoded as $enc" \n + if {[dict exists $decode_map $enc]} { + append err " char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } } incr idx } error $err } - set name [tcl::string::map $decode_map $namespec] + #see comments below regarding 2 rounds and ordering. + set name [decode_unicode_escapes $namespec] + set name [tcl::string::map $decode_map $name] #puts stderr "-->name: $name" set targetsegment [split $targetspec +] @@ -250,20 +313,89 @@ namespace eval fauxlink { foreach ch [split $segment ""] { if {$ch in $must_encode} { set enc [dict get $encode_map $ch] - append err " segment $s char $idx should be encoded as $enc" \n + if {[dict exists $decode_map $enc]} { + append err " segment $s char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } } incr idx } error $err } - lappend result_segments [tcl::string::map $decode_map $segment] + #2 rounds of substitution is possibly asking for trouble.. + #We allow anything in the resultant segments anyway (as %UXXXX... allows all) + #so it's not so much about what can be encoded, + # - but it makes it harder to reason about for users + # In particular - if we map %XX first it makes %25 -> % substitution tricky + # if the user requires a literal %UXXX - they can't do %25UXXX + # the double sub would make it %UXXX -> somechar anyway. + #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. + #There is still the opportunity to use things like %U00000025 followed by hex-chars + # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW + set segment [decode_unicode_escapes $segment] + set segment [tcl::string::map $decode_map $segment] + lappend result_segments $segment + incr s } set targetpath [join $result_segments /] + if {$name eq ""} { + set name [lindex $result_segments end] + } return [dict create name $name targetpath $targetpath] } - + variable map + + #default exclusion of / (%U2f and equivs) + #this would allow obfuscation of intention - when we have + for that anyway + proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { + variable map + set ucstart [string first %U $str 0] + if {$ucstart < 0} { + return $str + } + set max 8 + set map [list] + set strend [expr {[string length $str]-1}] + while {$ucstart >= 0} { + set s $ucstart + set i [expr {$s +2}] ;#skip the %U + set hex "" + while {[tcl::string::length $hex] < 8 && $i <= $strend} { + set in [string index $str $i] + if {[tcl::string::is xdigit -strict $in]} { + append hex $in + } else { + break + } + incr i + } + if {$hex ne ""} { + incr i -1 + lappend map $s $i $hex + } + set ucstart [tcl::string::first %U $str $i] + } + set out "" + set lastidx -1 + set e 0 + foreach {s e hex} $map { + append out [string range $str $lastidx+1 $s-1] + set sub [format %c 0x$hex] + if {$sub in $exclusions} { + append out %U$hex ;#put it back + } else { + append out $sub + } + set lastidx $e + } + if {$e < [tcl::string::length $str]-1} { + append out [string range $str $e+1 end] + } + return $out + } proc link_as {name target} { } diff --git a/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm b/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm new file mode 100644 index 00000000..8424ce07 --- /dev/null +++ b/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm @@ -0,0 +1,468 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application fauxlink 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin fauxlink_module_fauxlink 0 0.1.0] +#[copyright "2024"] +#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] +#[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] +#[require fauxlink] +#[keywords symlink faux fake shortcut toml] +#[description] +#[para] A cross platform shortcut/symlink alternative. +#[para] Unapologetically ugly - but practical in certain circumstances. +#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as +#[para] archiving and packaging systems. +#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. +#[para] format of name #.fxlnk +#[para] where can be empty - then the effective nominal name is the tail of the +#[para] The + symbol substitutes for forward-slashes. +#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) +#[para] We deliberately treat higher % sequences literally. +#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. +#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 +#[para] e.g a link to a file file#A.txt in parent dir could be: +#[para] file%23A.txt#..+file%23A.txt.fxlnk +#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk +#[para] The can be unrelated to the actual target +#[para] e.g datafile.dat#..+file%23A.txt.fxlnk +#[para] This system has no filesystem support - and must be completely application driven. +#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. +#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined +#[para] Extensions to behaviour should be added in the file as text data in Toml format, +#[para] with custom data being under a single application-chosen table name +#[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. +#[para] Aside from the 2 used for delimiting (+ #) +#[para] certain characters which might normally be allowed in filesystems are required to be encoded +#[para] e.g space and tab are required to be %20 %09 +#[para] Others that require encoding are: * ? \ / | : ; " < > +#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. +#[para] Control characters and other punctuation is optional to encode. +#[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. +#[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX +#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. +#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest +# +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded +# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. +#Using fauxlink - a link would be: +# "my-program-files#++server+c+Program%20Files.fxlnk" +#If we needed the old-style literal %20 it would become +# "my-program-files#++server+c+Program%2520Files.fxlnk" + + +#*** !doctools +#[section Overview] +#[para] overview of fauxlink +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by fauxlink +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::class { + #*** !doctools + #[subsection {Namespace fauxlink::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink { + namespace export {[a-z]*}; # Convention: export all lowercase + + #todo - enforce utf-8 + + #literal unicode chars supported by modern filesystems - leave as is - REVIEW + + + variable encode_map + variable decode_map + #most filesystems don't allow NULL - map to empty string + + #Make sure % is not in encode_map + set encode_map [dict create\ + \x00 ""\ + { } %20\ + \t %09\ + + %2B\ + # %23\ + * %2A\ + ? %3F\ + \\ %5C\ + / %2F\ + | %7C\ + : %3A\ + {;} %3B\ + {"} %22\ + < %3C\ + > %3E\ + ] + #above have some overlap with ctrl codes below. + #no big deal as it's a dict + + #must_encode + # + # * ? \ / | : ; " < > \t + # also NUL to empty string + + # also ctrl chars 01 to 1F (1..31) + for {set i 1} {$i < 32} {incr i} { + set ch [format %c $i] + set enc "%[format %02X $i]" + set enc_lower [string tolower $enc] + dict set encode_map $ch $enc + dict set decode_map $enc $ch + dict set decode_map $enc_lower $ch + } + + variable must_encode + set must_encode [dict keys $encode_map] + + + #if they are in + + #decode map doesn't include + # %00 (nul) + # %2F "/" + # %2f "/" + # %7f (del) + #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. + # + set decode_map [dict merge $decode_map [dict create\ + %09 \t\ + %20 { }\ + %21 "!"\ + %22 {"}\ + %23 "#"\ + %24 "$"\ + %25 "%"\ + %26 "&"\ + %27 "'"\ + %28 "("\ + %29 ")"\ + %2A "*"\ + %2a "*"\ + %2B "+"\ + %2b "+"\ + %2C ","\ + %2c ","\ + %2D "-"\ + %2d "-"\ + %2E "."\ + %2e "."\ + %3A ":"\ + %3a ":"\ + %3B {;}\ + %3b {;}\ + %3D "="\ + %3C "<"\ + %3c "<"\ + %3d "="\ + %3E ">"\ + %3e ">"\ + %3F "?"\ + %3f "?"\ + %40 "@"\ + %5B "\["\ + %5b "\["\ + %5C "\\"\ + %5c "\\"\ + %5D "\]"\ + %5d "\]"\ + %5E "^"\ + %5e "^"\ + %60 "`"\ + %7B "{"\ + %7b "{"\ + %7C "|"\ + %7c "|"\ + %7D "}"\ + %7d "}"\ + %7E "~"\ + %7e "~"\ + ]] + #Don't go above 7f + #if we want to specify p + + + #*** !doctools + #[subsection {Namespace fauxlink}] + #[para] Core API functions for fauxlink + #[list_begin definitions] + + proc resolve {link} { + variable decode_map + variable encode_map + variable must_encode + set ftail [file tail $link] + if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { + error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" + } + set linkspec [file rootname $ftail] + # - any # or + within the target path or name should have been uri encoded as %23 and %2b + if {[tcl::string::first # $linkspec] < 0} { + error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" + } + #only the 1st 2 parts of split on # are significant. + #if there are more # chars present - the subsequent parts are effectively a comment + + #check namepec already has required chars encoded + lassign [split $linkspec #] namespec targetspec + #puts stderr "-->namespec $namespec" + set nametest [tcl::string::map $encode_map $namespec] + #puts stderr "-->nametest $nametest" + #nothing should be changed - if there are unencoded chars that must be encoded it is an error + if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { + set err "fauxlink::resolve invalid chars in name part (section prior to first #)" + set idx 0 + foreach ch [split $namespec ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #see comments below regarding 2 rounds and ordering. + set name [decode_unicode_escapes $namespec] + set name [tcl::string::map $decode_map $name] + #puts stderr "-->name: $name" + + set targetsegment [split $targetspec +] + #check each + delimited part of targetspec already has required chars encoded + set s 0 ;#segment index + set result_segments [list] + foreach segment $targetsegment { + set targettest [tcl::string::map $encode_map $segment] + if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { + set err "fauxlink::resolve invalid chars in targetpath (section following first #)" + set idx 0 + foreach ch [split $segment ""] { + if {$ch in $must_encode} { + set enc [dict get $encode_map $ch] + if {[dict exists $decode_map $enc]} { + append err " segment $s char $idx should be encoded as $enc" \n + } else { + append err " no %xx encoding available. Use %UXX if really required" \n + } + } + incr idx + } + error $err + } + #2 rounds of substitution is possibly asking for trouble.. + #We allow anything in the resultant segments anyway (as %UXXXX... allows all) + #so it's not so much about what can be encoded, + # - but it makes it harder to reason about for users + # In particular - if we map %XX first it makes %25 -> % substitution tricky + # if the user requires a literal %UXXX - they can't do %25UXXX + # the double sub would make it %UXXX -> somechar anyway. + #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. + #There is still the opportunity to use things like %U00000025 followed by hex-chars + # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW + set segment [decode_unicode_escapes $segment] + set segment [tcl::string::map $decode_map $segment] + lappend result_segments $segment + + incr s + } + set targetpath [join $result_segments /] + if {$name eq ""} { + set name [lindex $result_segments end] + } + + return [dict create name $name targetpath $targetpath] + } + variable map + + #default exclusion of / (%U2f and equivs) + #this would allow obfuscation of intention - when we have + for that anyway + proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { + variable map + set ucstart [string first %U $str 0] + if {$ucstart < 0} { + return $str + } + set max 8 + set map [list] + set strend [expr {[string length $str]-1}] + while {$ucstart >= 0} { + set s $ucstart + set i [expr {$s +2}] ;#skip the %U + set hex "" + while {[tcl::string::length $hex] < 8 && $i <= $strend} { + set in [string index $str $i] + if {[tcl::string::is xdigit -strict $in]} { + append hex $in + } else { + break + } + incr i + } + if {$hex ne ""} { + incr i -1 + lappend map $s $i $hex + } + set ucstart [tcl::string::first %U $str $i] + } + set out "" + set lastidx -1 + set e 0 + foreach {s e hex} $map { + append out [string range $str $lastidx+1 $s-1] + set sub [format %c 0x$hex] + if {$sub in $exclusions} { + append out %U$hex ;#put it back + } else { + append out $sub + } + set lastidx $e + } + if {$e < [tcl::string::length $str]-1} { + append out [string range $str $e+1 end] + } + return $out + } + proc link_as {name target} { + + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval fauxlink::lib { + namespace export {[a-z]*}; # Convention: export all lowercase + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace fauxlink::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval fauxlink::system { + #*** !doctools + #[subsection {Namespace fauxlink::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide fauxlink [namespace eval fauxlink { + variable pkg fauxlink + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vfs/_vfscommon/modules/punk/du-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/du-0.1.0.tm index 1eca1f47..f2ee38b5 100644 --- a/src/vfs/_vfscommon/modules/punk/du-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/du-0.1.0.tm @@ -942,6 +942,7 @@ namespace eval punk::du { #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' #remove links and . .. from directories, remove links from files + #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [struct::set difference [concat $hfiles $files[unset files]] $links] set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] diff --git a/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm b/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm index 872e4807..8f51075e 100644 --- a/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm +++ b/src/vfs/_vfscommon/modules/punk/lib-0.1.1.tm @@ -372,20 +372,40 @@ namespace eval punk::lib { proc lswap {lvar a z} { upvar $lvar l if {[lindex_resolve_basic $l $a] < 0 || [lindex_resolve_basic $l $z] < 0} { - #if we didn't do this check - we could raise an error on second lset - leaving list corrupted because only one lset occurred + #lindex_resolve_basic returns only -1 if out of range + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -3 and -2 special results being lower and upper bound breaches respectively (-1 never returned) set a_index [lindex_resolve $l $a] set a_msg "" switch -- $a_index { -2 { - "$a is greater th + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } -3 { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" } } - error "lswap cannot indices $a and $z $a is out of range" + set z_index [lindex_resolve $l $z] + set z_msg "" + switch -- $z_index { + -2 { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + -3 { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] @@ -397,6 +417,7 @@ namespace eval punk::lib { # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} + proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower @@ -3021,6 +3042,10 @@ namespace eval punk::lib { set localeid [twapi::get_system_default_lcid] } } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 set results [list] set nums [objclone $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list @@ -3036,15 +3061,21 @@ namespace eval punk::lib { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { - if {$delim eq ""} {set delim ","} - if {$groupsize eq ""} {set groupsize 3} + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults - set delim "," - set groupsize 3 + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } lappend results [delimit_number $number $delim $groupsize] } diff --git a/src/vfs/_vfscommon/modules/punk/mix/cli-0.3.1.tm b/src/vfs/_vfscommon/modules/punk/mix/cli-0.3.1.tm index cd6f3025..39346d5d 100644 --- a/src/vfs/_vfscommon/modules/punk/mix/cli-0.3.1.tm +++ b/src/vfs/_vfscommon/modules/punk/mix/cli-0.3.1.tm @@ -692,14 +692,22 @@ namespace eval punk::mix::cli { #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files } #zipfs mkzip does exactly what we need anyway in this case - set wd [pwd] - cd $buildfolder - puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" - zipfs mkzip $zipfile #modpod-$basename-$module_build_version - cd $wd - - package require modpod - modpod::lib::make_zip_modpod $zipfile $modulefile + #unfortunately it's not available in all Tclsh versions we might be running.. + if {[llength [info commands zipfs]]} { + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + } else { + #TODO - review punk::zip::mkzip and/or external zip to provide a fallback? + set had_err 1 + lappend notest "zipfs_unavailable" + puts stderr "WARNING: zipfs unavailable can't build $modulefile" + } if {$had_error} { diff --git a/src/vfs/_vfscommon/modules/punk/mix/commandset/project-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/mix/commandset/project-0.1.0.tm index 9afc685c..80cab2a7 100644 --- a/src/vfs/_vfscommon/modules/punk/mix/commandset/project-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/mix/commandset/project-0.1.0.tm @@ -157,6 +157,9 @@ namespace eval punk::mix::commandset::project { set opt_force [dict get $opts -force] set opt_confirm [string tolower [dict get $opts -confirm]] # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- set opt_modules [dict get $opts -modules] if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { #if not specified - add a single module matching project name @@ -169,9 +172,6 @@ namespace eval punk::mix::commandset::project { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_layout [dict get $opts -layout] - set opt_update [dict get $opts -update] - # -- --- --- --- --- --- --- --- --- --- --- --- --- #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache diff --git a/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm index f1934d3c..fdffa091 100644 --- a/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/nav/fs-0.1.0.tm @@ -203,14 +203,15 @@ tcl::namespace::eval punk::nav::fs { } set matchinfo [dirfiles_dict -searchbase [pwd]] } - set dircount [llength [dict get $matchinfo dirs]] - set filecount [llength [dict get $matchinfo files]] + set dircount [llength [dict get $matchinfo dirs]] + set filecount [llength [dict get $matchinfo files]] + set symlinkcount [llength [dict get $matchinfo links]] ;#doesn't include windows shelllinks (.lnk) #set location [file normalize [dict get $matchinfo location]] set location [dict get $matchinfo location] #result for glob is count of matches - use dirfiles etc for script access to results - set result [list location $location dircount $dircount filecount $filecount] + set result [list location $location dircount $dircount filecount $filecount symlinks $symlinkcount] set filesizes [dict get $matchinfo filesizes] if {[llength $filesizes]} { set filesizes [lsearch -all -inline -not $filesizes na] @@ -383,6 +384,7 @@ tcl::namespace::eval punk::nav::fs { set location [file normalize [dict get $matchinfo location]] if {[string match //xzipfs:/* $location] || $location ne $last_location} { + #REVIEW - zipfs test disabled with leading x #emit previous result if {[dict size $this_result]} { dict set this_result filebytes [punk::lib::format_number [dict get $this_result filebytes]] @@ -1035,9 +1037,6 @@ tcl::namespace::eval punk::nav::fs { lappend nonportable {*}[dict get $contents nonportable] ;# illegal file/folder names from windows perspective lappend vfsmounts {*}[dict get $contents vfsmounts] } - if {$opt_formatsizes} { - set filesizes [punk::lib::format_number $filesizes] - } if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1049,33 +1048,124 @@ tcl::namespace::eval punk::nav::fs { } set $fileset $stripped } + #Note: we need to remember to use common_base to rebuild the key when we need to query the dict-based elements: sizes & times - because we didn't strip those keys. } + + # -- --- --- --- --- --- --- --- --- --- --- + #assign symlinks to the dirs or files collection (the punk::du system doesn't sort this out + #As at 2024-09 for windows symlinks - Tcl can't do file readlink on symlinks created with mklink /D name target (SYMLINKD) or mklink name target (SYMLINK) + #We can't read the target information - best we can do is classify it as a file or a dir + #we can't use 'file type' as that will report just 'link' - but file isfile and file isdirectory work and should work for links on all platforms - REVIEW + set file_symlinks [list] + set dir_symlinks [list] + set dir_shortcuts [list] ;#windows shell links (.lnk) that have a target that is a directory + foreach s $links { + if {[file isfile $s]} { + lappend file_symlinks $s + #will be appended in finfo_plus later + } elseif {[file isdirectory $s]} { + lappend dir_symlinks $s + lappend dirs $s + } else { + #dunno - warn for now + puts stderr "Warning - cannot determine link type for link $s" + } + } + #we now have the issue that our symlinks aren't sorted within the dir/file categorisation - they currently will have to appear at beginning or end - TODO + # -- --- --- --- --- --- --- --- --- --- --- + + #todo - sort whilst maintaining order for metadata? + #we need to co-sort files only with filesizes (other info such as times is keyed on fname so cosorting not required) + + #we can't sort on filesize after format_number (unless we were to enforce delim _ which we don't want to do) + if {$opt_formatsizes} { + set filesizes [punk::lib::format_number $filesizes] ;#accepts a list and will process each + } #col2 with subcolumns #remove punk::pipedata dependency - allow use of punk::nav::fs without punk package #set widest2a [punk::pipedata [list {*}$files ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #widest2a.= concat $files [list ""] |> .=>2 lmap v {string length $v} |> .=>* tcl::mathfunc::max - set widest2a [tcl::mathfunc::max {*}[lmap v [concat $files [list ""]] {string length $v}]] + set widest2a [tcl::mathfunc::max {*}[lmap v [list {*}$files {*}$file_symlinks ""] {string length $v}]] set c2a [string repeat " " [expr {$widest2a + 1}]] #set widest2b [punk::pipedata [list {*}$filesizes ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] - set widest2b [tcl::mathfunc::max {*}[lmap v [concat $filesizes [list ""]] {string length $v}]] + set widest2b [tcl::mathfunc::max {*}[lmap v [list {*}$filesizes ""] {string length $v}]] set c2b [string repeat " " [expr {$widest2b + 1}]] set finfo [list] foreach f $files s $filesizes { #note - the display entry isn't necessarily a valid tcl list e.g filename with unbalanced curly braces - #hence we need to keep the filename, as well properly protected as a list element + #hence we need to keep the filename as well, properly protected as a list element lappend finfo [list file $f display "[overtype::left $c2a $f] [overtype::right $c2b $s]"] } + set flink_style [punk::ansi::a+ undercurly underline undt-green] ;#curly green underline with fallback to normal underline + set dlink_style [punk::ansi::a+ undercurly underline undt-green] + #We use an underline so the visual styling of a link can coexist with fg/bg colors applied for other attributes such as hidden + foreach flink $file_symlinks { + lappend finfo [list file $flink display "$flink_style[overtype::left $c2a $flink] [overtype::right $c2b 0]"] + } + + set fshortcut_style [punk::ansi::a+ underdotted underline undt-hotpink] + set dshortcut_style [punk::ansi::a+ underdotted underline undt-hotpink] + #examine windows .lnk shell link files (shortcuts) - these could be encountered on other platforms too - we should still be able to read them + #review - symlink to shortcut? hopefully will just work + #classify as file or directory - fallback to file if unknown/undeterminable + set finfo_plus [list] + foreach fdict $finfo { + set fname [dict get $fdict file] + if {[file extension $fname] eq ".lnk"} { + if {![catch {package require punk::winlnk}]} { + set shortcutinfo [punk::winlnk::file_get_info $fname] + set target_type "file" ;#default/fallback + if {[dict exists $shortcutinfo link_target]} { + set tgt [dict get $shortcutinfo link_target] + if {[file exists $tgt]} { + #file type could return 'link' - we will use ifile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } + } + switch -- $target_type { + file { + set display [dict get $fdict display] + set display $fshortcut_style$display ;# + dict set fdict display $display + lappend finfo_plus $fdict + } + directory { + #target of link is a dir - for display/categorisation purposes we want to see it as a dir + #will be styled later based on membership of dir_shortcuts + lappend dirs $fname + lappend dir_shortcuts $fname + } + } + } + #if we don't have punk::winlnk to read the .lnk - it will get no special highlighting and just appear as an ordinary file even if it points to a dir + } else { + lappend finfo_plus $fdict + } + } + unset finfo + + #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] set widest1 [tcl::mathfunc::max {*}[lmap v [concat $dirs [list ""]] {string length $v}]] set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] - foreach d $dirs filerec $finfo { + set RST [punk::ansi::a] + foreach d $dirs filerec $finfo_plus { set d1 [punk::ansi::a+ cyan bold] set d2 [punk::ansi::a+ defaultfg defaultbg normal] #set f1 [punk::ansi::a+ white bold] @@ -1088,7 +1178,7 @@ tcl::namespace::eval punk::nav::fs { } if {$d in $vfsmounts} { if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. color will be overridden giving no indicatio of 'hidden' status - REVIEW + #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) #mark it differently for now.. (todo bug report?) if {$d in $nonportable} { @@ -1108,6 +1198,12 @@ tcl::namespace::eval punk::nav::fs { set d1 [punk::ansi::a+ red bold] } } + #dlink-style & dshortcut_style are for underlines - can be added with colours already set + if {$d in $dir_symlinks} { + append d1 $dlink_style + } elseif {$d in $dir_shortcuts} { + append d1 $dshortcut_style + } } if {[llength $filerec]} { set fname [dict get $filerec file] @@ -1120,7 +1216,7 @@ tcl::namespace::eval punk::nav::fs { } } } - lappend displaylist $d1[overtype::left $col1 $d]$d2$f1$fdisp$f2 + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] diff --git a/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm index cf0bf70c..10250a9b 100644 --- a/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/ns-0.1.0.tm @@ -1707,6 +1707,7 @@ tcl::namespace::eval punk::ns { lassign [internal::get_run_opts {-vars -nowarnings} {} $args] _r runopts _c cmdargs set use_vars [expr {"-vars" in $runopts}] set no_warnings [expr {"-nowarnings" in $runopts}] + set ver "" #todo support leading solo flags such as -capture to control whether we do a static capture of local vars in the ns @@ -1717,15 +1718,68 @@ tcl::namespace::eval punk::ns { } default { if {[string match ::* $pkg_or_existing_ns]} { + set pkg_unqualified [string range $pkg_or_existing_ns 2 end] if {![tcl::namespace::exists $pkg_or_existing_ns]} { - set ver [package require [string range $pkg_or_existing_ns 2 end]] + set ver [package require $pkg_unqualified] } else { set ver "" } set ns $pkg_or_existing_ns } else { - set ver [package require $pkg_or_existing_ns] - set ns ::$pkg_or_existing_ns + set pkg_unqualified $pkg_or_existing_ns + set ver [package require $pkg_unqualified] + set ns ::$pkg_unqualified + } + #some packages don't create their namespace immediately and/or don't populate it with commands and instead put entries in ::auto_index + set previous_command_count 0 + if {[namespace exists $ns]} { + set previous_command_count [llength [info commands ${ns}::*]] + } + + + #also if a sub package was loaded first - then the namespace for the base or lower package may exist but have no commands + #for the purposes of pkguse - which most commonly interactive - we want the namespace populated + #It may still not be *fully* populated because we stop at first source that adds commands - REVIEW + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + + if {!$ns_populated} { + #we will catch-run an auto_index entry if any + #auto_index entry may or may not be prefixed with :: + set keys [list] + #first look for exact pkg_unqualified and ::pkg_unqualified + #leave these at beginning of keys list + if {[array exists ::auto_index($pkg_unqualified)]} { + lappend keys $pkg_unqualified + } + if {[array exists ::auto_index(::$pkg_unqualified)]} { + lappend keys ::$pkg_unqualified + } + #as auto_index is an array - we could get keys in arbitrary order + set matches [lsort [array names ::auto_index ${pkg_unqualified}::*]] + lappend keys {*}$matches + set matches [lsort [array names ::auto_index ::${pkg_unqualified}::*]] + lappend keys {*}$matches + set ns_populated 0 + set i 0 + set already_sourced [list] ;#often multiple triggers for the same source - don't waste time re-sourcing + set ns_depth [llength [punk::ns::nsparts [string trimleft $ns :]]] + while {!$ns_populated && $i < [llength $keys]} { + #todo - skip sourcing deeper entries from a subpkg which may have been loaded earlier than the base + #e.g if we are loading ::x::y + #only source for keys the same depth, or one deeper ie ::x::y, x::y, ::x::y::z not ::x or ::x::y::z::etc + set k [lindex $keys $i] + set k_depth [llength [punk::ns::nsparts [string trimleft $k :]]] + if {$k_depth == $ns_depth || $k_depth == $ns_depth + 1} { + set auto_source [set ::auto_index($k)] + if {$auto_source ni $already_sourced} { + uplevel 1 $auto_source + lappend already_sourced $auto_source + set ns_populated [expr {[tcl::namespace::exists $ns] && [llength [info commands ${ns}::*]] > $previous_command_count}] + } + } + incr i + } + } } } @@ -1799,7 +1853,13 @@ tcl::namespace::eval punk::ns { return $out } } else { - error "Namespace $ns not found." + if {$ver eq ""} { + error "Namespace $ns not found. No package version found." + } else { + set out "(no package namespace found) remaining in [uplevel 1 {namespace current}]" + append out \n $ver + return $out + } } return $out } diff --git a/src/vfs/_vfscommon/modules/punk/winlnk-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/winlnk-0.1.0.tm new file mode 100644 index 00000000..6a5d4a6d --- /dev/null +++ b/src/vfs/_vfscommon/modules/punk/winlnk-0.1.0.tm @@ -0,0 +1,561 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 0.1.0 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::winlnk 0 0.1.0] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [MS_SHLLINK].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::winlnk::class { + #*** !doctools + #[subsection {Namespace punk::winlnk::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + proc Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Get_HeaderSize $contents] eq $magic_HeaderSize && [Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + proc Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + variable LinkFlags + set LinkFlags [dict create\ + hasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Has_LinkFlag error - flagname $flagname not known" + } + } + + + + #https://github.com/libyal/liblnk/blob/main/documentation/Windows%20Shortcut%20File%20(LNK)%20format.asciidoc + + #offset 24 4 bytes + #File attribute flags + + #offset 28 8 bytes + #creation date and time + + #offset 36 8 bytes + #last access date and time + + #offset 44 8 bytes + #last modification date and time + + #offset 52 4 bytes - unsigned int + #file size in bytes (of target) + proc Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset 56 4 bytes signed integer + #icon index value + + #offset 60 4 bytes - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset 64 Bytes 2 + #Hot key + + #offset 66 2 bytes - reserved + + #offset 68 4 bytes - reserved + + #offset 72 4 bytes - reserved + + #next 76 + + proc Get_LinkTargetIDList_size {contents} { + if {[Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Has_LinkFlag $contents B]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $4bytes i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] ;# a + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] ;# a + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix] + } + + proc contents_get_info {contents} { + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-0.1.0/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.0\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.0\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set link_target "" + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + set link_target [file join $localbase_path $suffix_path] + } + + set result [dict create\ + link_target $link_target\ + link_flags $flags_enabled\ + file_attributes ""\ + create_time ""\ + last_accessed_time ""\ + target_length [Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey ""\ + relative_path "?"\ + ] + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + proc file_get_info {path} { + #*** !doctools + #[call [fun file_get_info] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [contents_get_info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + proc file_show_info {path} { + package require punk::lib + punk::lib::showdict [file_get_info $path] * + } + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm b/src/vfs/_vfscommon/modules/test/tomlish-1.1.1.tm index bad6de441c761d692f326991fdc29ce99febd07c..8405fae74a50ec468daaff5adfc86c3c68353219 100644 GIT binary patch delta 5155 zcmZWsbzD?i*Pc0a52fS)(nCl|UW5TD2?wM@M3KILfYb;hp-3YKfdK|->5vBLRuK_F zLO@cGk`QUWk;~_M-`{)w*k?V@e%3mB@3VhvubsUN^ehAAJxsJWD<#d+2u+Q#gb;}O z1qehDq(yRqf>=(>=_vsO0tMDg|8|>@hMl+^M}(x@fJ$)gFL7}+Tp0~sxaXkApz6X@ zx}DdoNH1X(4sb=os1+>d16hDvAd-pfY z;n{7yM$$Su@+YddyeaO9jjq4pxq@8?cqzvQCkc7po0BYv`Dpc$$t>yy7iH5zFHiWO zi$h#?ZpXIDn7J*CvXC>wt%fKucfYtm+U!!o5u(_(%RkAAZyE)%+EFKKT_PF4@op^ecOYt=Lp7c>&nNl zm`%Trdb7Y>ktaS_{K@WP`(pLr8dcFIE={#hAwj;@v>s-~sFQg&wa;sO%NskQDrOqY7&JW!K^!lqaWtP&5<1)tp{CyE%i7e zXa}h*`ZIWZAjNQooG~)EH?u^r_u+)0rIh>Qzj6LsvNaDGWa3e^&jWPsdprf$O;<~3 zIRZ7mDCOx^4B32urd=a{%tx7kqi z-6@wC=9iPZd$cxGkKTZPGHN{Wz`~+{LZ-+z{EV(e{A&8~robYexBT&$Kj_cdE4?Bl#2lb|dauiPkaJM$-+SviM?D8dwfi@9-E zZy;l!G6T9XbSdl>^U*>c{7T-)Hyh1&5+egzm{3XH#5r;0y()#C~e#7_Im3o%C8Y`k{|M4==r9 zZkS|VdmuKKA8hUBrj80c>5MBlpbuzV^=XbCFL@LFW2D|HeWjs^FDC0V$B)S(Yu5~0 zd!`C@w5Xe|tNM}Z6EvM{0>a*b16Hk(D0s8|*sy4$4XboPxB1yTs}{aliJPan5#5$i ziP~2p=?PVfJfUfCYXOVc*+4oZ$M38K6LIr!w0Cs1aXC+4m5gu>0Y`F56&9q$;}>Bem#TOXg>KpRgLV&CV8fdVOojf-qK?yBt8;VyOff+OLK2mQF2M+76=2+&?9x**>-lmZsdZ7}Kdu$CUm3 zQJc&?Z85>!G8`fCc9zm|wv%y3zvE;WjXqgiJ?21Jr3LzWdfzACcb_`gno%?qvmPEd z8zWy^41$-gu}(^VXuXnI`6wVqk_yABFtMs{oE=)j<~}NCO2RG|UN);dE}F9z^6Il2 zx@qV$CjErKD0iAH%L5oboNBC$#7xgPH3vOM>*ee8CGbV@`=W<^CZvUoAxufa*|#5i zU((w)_ScnLl)V=2 zDS{51Ypw6;-B7sp&97wZsJj&;NT11vnh7w`DG9hq!kItBt^>?U${4t$F_WJf_KdS zI~?-aS@uql>)9uRq6a_W30V)Ns3|W;@*qy?65h)vM4Uc7;V2Bw9>1FVbK;Zrg5E;tv3n{TzWBmved1L)6vTGx%c)P?LOi}25XChDW=&7nH_{mY-(^d z+OD-sW?3d$i`Qa)dUq!HVJDt11rI$>YFO{MocD%@?ZSBZ2VxUU!&cmAgt#aAq5vt_ z5IC>A$!Do-{C!(6qt3seRC1eHCpp1fs{ir66fyjwFcYuc=H&QFUY#3;`bmoJ@gB4w zW%6x8zSnBSul(MW6)oJ`!3<>M-3L5|1{@B&c3U*4^?AueUbY8OJ#;?pBOi9Sy@eu@ zE_bz7P_UF5H1=e>?%knvl-g9*8rf?f zd&|8k#U^_{wC7c-p3;FcrZyFq-)|Zs(zKIML~!%Lr`QJwYC}bKqn0K`C8ee%E+sob zXtxcWs*Pzu~R=J|#`-`_pdz_CIkWfhRv# z8q>|x&=Vz8m&5cXB}bVQ`&C;?fWEZ8uk#ipL5*D(Lm?DU?@@?Fn*197$DPK;Pw9aU zS?vCSAI4hSaV*+u>X--1Yr$^jPZP!!>AEu`mH6|tx#_)3VSe4-;WD`Q2Me+DNiS4h zKlVLz*{qR{HM-Ec|1DhXa@7~NQtj|kX6L{-=EsZ$RJ-(kFqO-pQ*Ekl&(?R@LY#wP zxM!s4Pc71Y!=n4^B)9T>kjCQ3-~@q3noO?%BL}^HedwEPaSKe`I!i$Rm2Ewu!=p@i zfN@J+qlB@lKp5&ONHA*z% zyx*r(1j(f_e+lEz^&-%x*AcFf#gkaeQ;OXC!ZV$mibWPm^*q!5}2gjs()0sJ)) z1oB1%0+9z_2yudX5gY_i2&D3UL^ly_8U7g4WOCuJ!84Kg>Rs_n03 z((Vvr2;f*%vh=#q4*znuO(u-Gmdu=1U(gI))K(R)TPVbJI+@d76SxPR3X5O-f_~rM z$2(D(E#IB()WQ-=7(U%gp%S#gD*`io$8dciBh6MoB<2xEC=w={G4^4Oc2w5(gU%0? z`n0^OqtI#Edb?X~FdC%HjhO=^L6>nuaypW)jr{-h;(a&g(x`@ChMG zt&&tSvrv=KPz()sT~PkKN9jZ=p{>Q-PdioqioKkU_X8U#!_m-C$EP<*0AF+2@)Lfe z#zw2KJ&$q60U}ypJLk#H$f3(u!BaVBAJ)87i3CskoGnK9@g>ra!S%oKuy@G4gP z{R~e+afTC46UwlF=`tod)s$cgKgRh?n5yn1A&aVjmY9&E?PW8^dusyKB0C$n;jYBu zZn7~FN35LZqW7q$iuMJatXdeB(&4f4A`yKxn4<$E&eOF7sXUdSue>~RRB-a9^y^z| zH6f5*7XDlq$!9^ztEFuQN2PN)V7OIxEy=kIm|u&r+t$5jY!vt)s`vC-x5HTEqcVe# zZ$|fwo~S@x5GZ#Q^z|aY-yS$L=el(-FQ*J^1Kv?)y45yjHcg)_yO`*t?jub1`HrWq z$O!Jc0eMABqm)q(MdN#_nf4@OB2<-Xxh4IO=UZ36oYv0Zv#{7An;&^L-!IIl%uLNq zqy>>^Sp#n!_YebEzQvHbLgA<1esfE@-3KNcK6a}EEv1kzuGpF}UbES=IP0g!7I~eG zd(m%QZHW&lxvWp`CI5K9VEG5=ILfN6)Zv#>`P6Fh6w7WjP(!FGp!dL0Rt8x@NE3BgVY0?jHvb$CLOg_$BMQ@n>-F zQV|^4bap5*pqg15O1OV5#qv+G$G=w7ze_QAbmG*HKmFOJVjXRrz@ z3Gi|Kf2|C1rXYZQ-g8I|Heo5jDH_l>l@f$y6M}Ou zSOEVke`t*iNnlg@3|^I?1luwYfVu2BBmqlhiNV}V7^s@b0-Pv=gPH1o;_M+|6!>I( z{%nCCGW~%Kl{3%3RLQ`YECldW^$Z@X)1C#XQKzQP3~BJAqy%3Sz=5taD!xz*+69UiqJg)@AXkwFNL&a5jZLVj{yj$&$XSF2W=zgv zOcf~sbEe?4;(u20elZ;S4vZ`I2YSu%q%zoCLVOm)*8)cQpCBF<=bduEB}@GD1i)LC z7eU!l1fXg22d3JPo$bN8QaBK4ho?EfW;-U3rVIg$IsSoFSVl0c3=WkAq2*%WNEre; zg17Dh7}s+NF&J4c3k-Vv3sQr7<&uEB&mU^lml<@fKme7#f2h|LI)JSI8C(yf0~KB& zfX2Xc$ohL+od12Rgh0gqGe5j_GvrLH7r_u;i2x80=ZO8x^?y$C|8iB0I}`tNu)~B= u+E9Q2@=ulUzsJMBJeW2{5_}Mhswo_x>OKu{!Vo delta 5073 zcmZWs2{=^k`<^-WeXQ9F5o0Io*vUGRv1N}W*|+S<$i63s?6M2VSW6^DhU`n$lqG9a zGG(dwP15)Of7f@;b)D{IX7>$y*+m zS`}TE<(UuOBT6zhGx9#4<4XXo3!mvf8Fj{Drjz=QKD>!){rD8eF3-K1Mr zXiIEEMN&d9-Wm<34rrAcKzathcS+Gc-DxlCE3}e;@snfgW&s)Fc)b*4U0ly|P*OJf z1sTC-QnaM`t{o+N|&Uz~SoKDFR8faLfuHrVwEpXuQ}OvaV*;Aj zM-I+XVq7H2S4QZ11Gdjoy4g+UnD#o5Jl^VbZtOuRF3HLBcABu5^7f_Xd5!suzdi_9 zF%=t%lc|XhD3b&;nw^tAUA9Pm{{eu%D;jTkH50v7Wv`z# z=6KkIBjw}+3nne7ElQoGwS2Mowy^u^cnPj6w!6+Q7^a%JJP=ysR=q6 zF0&8vhvFNsRI4yh(X*v_l-lbiO)_Q=l|^7cq}PMam;@P_npJvmH8 zbJ^jtQtf>KSmkVY0JL>fX zuUw4lk(4z+EwpmTP?V8OF~1Z#e`*6b|4tY{HPrZAeT?84Uey~8-(64DG{<0CYIGti z2P~QBGm&@`h7p_iH;w>Ina>njn2foNidN31dqOW{OQygm;NJK7j}fdhWr&Z5E%b#ubu9O-BKmnX5S{)wynb#W_IJamEu&%pyW4= z*7&&6-eqIgBJBb8(4Sf3&1LJtQ#~UFcAG}uZ@*YY3w-r&Mu%0_i7J14sBDc%R(=_x zy=VD!W~ZTy?iBitx|IZtZm9`(zYljkM*-FxW0N92BRPA&^YW0Cuj}6a&NtSoS~=>p z5c#ISzOdTaC$m*^!=K(xUvF+N;c(zTSOnTb2PPRFOtE!44;A#8<|a!X5id9$8T9(3 zeCIX_#mn=R4mBQ#TYUAPUqXm-wvR0$L=Ua4Ztgg;l0J)`Ia-(H!CT@nZ8S$v@Cg?u zloaGaa{mrhpol94>Cq8D2v-e!+^2~M7M~p1ze+IAhb#24gfLeizXJM^LLjZG5QrqW z?ZE?jH!y<5kt`SpmxQ}SDqSbc{Y8$SvlNr2BYC6Ntd`>_eWgIb!cqHDBBy>p-jsjd z!!p$#`e3C5+-i`Hl>*unP7!g+gf)&~JT8J^GQ9y4?+_7b>Zp4s+=w;ZA<2IU5MRmd zrN0^4H?C_+*~r};tG37FTO4P4Eu*rk6iuFXbs*4)35C%ONZu$cm1FLNXD~mtW%7|K zXK)i5HY^SIoK(6elyxEhhm~wWobh_dZ2jynm5rOc{CvNF*>@=Ca_2*q#VO&zuAn47 z^$T2j%mHH68fq_yMk(SBdPR7gcwq7MFKiLN!I3 zk1f~t3o0@gt{m?5FQPWRRd;UnGZc7tB#znzW$(IHK7A~_tMj;8w17SFxjM5p_6$hz zYH;q%J9$f6vybZ~ca{0|<(VIKJy6IpmH%i}9azD!VJ#b;{RjxTO0^mwI1y63*aDwE zTeB5p7tx?kS-F)UP!wHi=$_f|BK+Q?g({$Z=Mlw>7GXGlZsZiS?$bk=e<#D zBKDJhRSRBMuO}+Mc1=fT6445Wek|m}`KZ#CdT7pw3I5pmaQcilbwgqwUB&8iP0S%V z*RCTv`!0q8#b)TE{^p}wF0C*m$>5#|R>Qikm`Bf4i7vH`R@W&$$EzcrwbZfDIeTS;Yh*)(*G48_q{MbK5tuu`6s@D|rLrvU8iP`sLg}d9s`E24 zQ3yS32QeZ%)#@3iZsG?Qg|98I16-R58pvSM)r{v}u{>~T-&o-e%q@=5?95{G4mEos z`svQE@t-^7cnIfGgM?$>UBS=U)owXo^rn*F4PpF0KgkyWVI~<@)YY|p)HY?xzMz60 zcI9JED^(aoS@Pte?dS*Ht=(zjx5BM`TA@8A9-)>nh0$CRFm2g$s1Vf5q}<@7@j`0r z%8OWa7Rx-b87cGaatdsjwN%r~8gJnyOGtE~2re5+3}4K1R6HkAF5zVCI`R;B?Nulcgjs-lbn+>V}cA>$@Y_I zQ*l3USqLZF?NEu>c5TG}^5&DxJpHPpj)q+f_Y_wmw&y_a!nCc1I%nm-h^1x6SXW*T zj;HI+EVO6P&bq;DkrrRy6)RY7Sm3Rkd&Oig0$191ow0YO=E^x&lQ)HWhO*19L!20{ zvyw6II!35`@v}b~8o(Wc4>;tjtHjW-n^9-`+`rFR#3IS1G72LpR^|l4+0=;)2U13M z&%ozzvGm((oWj+85|~&u_Q=MueYI912E`l3UzG&y(@|YS|RzD|` z%^#KrCfjaIzyB;|__66t*wMF-u?Es!0Yx=tq6h=qvCwh5935YdmJ|im7YofP>&-iY z~NXj)(Wi5CRvy_*0 z5tEdAWpqhsw!71>N5ATHued4+9Tj)837s`#wG&Ir{?14$;Xfm}_<>%F( z5wo$`J5E0$T1mou8k3OXpPTxrz8uu}80qzBm!4&dsUIp-ohDi2F!_}+zl@pP7i0}l zjXC7`{Y;50+P%7l)&AT&tV*VNRO?%lSv&pZp+-H=b)tLpkBsx5z0tGFA6X}jEr40@ zWRl4;yr_|GufAM(Q@pR&otyd@LbJ93?<0Y~9H&AaaG0>PlwcvZdnBq-L1H$3gxgzF zMH>(iGeIB_atK`l+;SidR7{{Id-B<#_+5bI=KLJ`W{T5+<{Mz4FwCx%B8f3VQ>bBjSrVU6nSk* zLzT_VF`b=lm*?s<-qNx!PoL4SXSj#<@xC>I)em#xswm1$H$a5yz*XnxM3uz#gEuj= zT==B%r@>JQjFY$m#J9}8Zl$=K=*7W~YgPh}c}?p5p65JvK!t0gEfnY5Bm7$%D)x6o zD7FNDEg8sP@AswvYbM;(1h!Rud6IdN;`p%7*R8R0Q+*Q?!R&>nCCLQyj!d)1|`TPY`TO#ygw%EDEq-UD(3g&I(Tb@_1~4QePdn@ z@N#34^EKZZ4Fl#bJL$mkO;K*MYOrUYYgrjIXCkOtD1Fynpc&<&yJ7ka{rc?F2A8ZX5&EK3 zzjg;VI`277MsKQHIlXJ!580F{;-?ZAA<+^bnmLmi+^Mz2F9e43w{+JQ!ixL5HWOtq z8_(-G_V79*-5-ckbokvcsd~_&vo1n?M6acZJsf)qhBV?#NxhI{Y@Rn>iBUEtGV~Gmr#MH4$j`net@=X<(+G!t;3(OW;cXOI5V}->$_V$;-6APrYl*Np2 zC3ZDU*_g!oSG1dIaDEa)&27K-yrKEzYnd4|eGVov*@b^wpBsJ&6O}zqLxoh(DR@~r zp!^MGfrqbEb$s)9P8{f%`dz|Gzd@iz9D+`AFF zt#mHMOf-dJr;V1^FSjrsUrO^d@8$JM{9bS4X3e#%1;qi&F0mbjh7$HEGvYELB2Nx4 znVBAS4go`rcdk!2Fs8nZ*r>AR&J45gnMJZ54dx43$o*-|MpbsaDK1}Rci!x~ugbE6`;2}!6xNwiW-PYSH(l#Rm8$eIc6!X=P=bid%#3Oq zmsO!7SfofL#`%5zIkHWacZkfawn8~6Fi`8ftLyc8Kf^l@4g~KQ$FWoV7)pa-{@-yy z5h6i5#`=cO;J_@LASr-%2#H@=7zV;~;Xst|i6jTmh@LmC=&yD3KYRxdGbK1_z9#1VG_mZz+OV51f;t1$PVJfSmL{G~~WCKzaUnwILZgaQ8kO@Rd2CtYB&>H5gvV4Xn$8 z^hIjme4#kh7=#u10N>S)eZ-1sz}_M_u%UiJ)qquvV^~=%4TNhR!#PcQP_P6JJkUO- z$t7CAyxuWX)~6y2hk@rx*@0F4e&*6B;M zG=%i?&(#TzRQUqB=Eu|2nE#oksG1u(4R%!D24*b?1_kibLmDvtAspzkJfUhpht&xr z1HEe`psrw1O$fklLl6Z)?4whl%_BIVcIAZX1CdUkQ5`?n@5BIlWK)BxwcNmn%RdsK zi90@?)dX3><5>Pudmb443n@V}tTYhld2H$Mqy?X0;Q*oK{ypVy`seUcgIa+M|9U$q z5MAd2R0k0pRY02HKgW``p0G&B@mOp{8E1zU`znF z>!rcAdge3#N!0&x7fmHNiV<=)y$IR>3NS(br2hY;fPW2AOgkQA)W}Li`1wKiMh+k` c9UMs)WI-E{5PDb$M1^qIazG&OvVODw0p79*T>t<8 diff --git a/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm b/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm index 25ecc083..3e13e75d 100644 --- a/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm +++ b/src/vfs/_vfscommon/modules/tomlish-1.1.1.tm @@ -19,12 +19,20 @@ #*** !doctools #[manpage_begin tomlish_module_tomlish 0 1.1.1] #[copyright "2024"] -#[titledesc {Module API}] [comment {-- Name section and table of contents description --}] -#[moddesc {-}] [comment {-- Description at end of page heading --}] +#[titledesc {tomlish toml parser}] [comment {-- Name section and table of contents description --}] +#[moddesc {tomlish}] [comment {-- Description at end of page heading --}] #[require tomlish] -#[keywords module] +#[keywords module parsing toml configuration] #[description] -#[para] - +#[para] tomlish is an intermediate representation of toml data in a tree structure (tagged lists representing type information) +#[para] The design goals are for tomlish to be whitespace and comment preserving ie byte-for byte preservation during roundtrips from toml to tomlish and back to toml +#[para] The tomlish representation can then be converted to a Tcl dict structure or to other formats such as json, +#[para] although these other formats are generally unlikely to retain whitespace or comments +#[para] A further goal is to allow at least a useful subset of in-place editing operations which also preserve whitespace and comments. +#[para] e.g leaf key value editing, and table reordering/sorting, key-renaming at any level, key insertions/deletions +#[para] The API for editing (tomldoc object?) may require explicit setting of type if accessing an existing key +#[para] e.g setting a key that already exists and is a different type (especially if nested structure such as a table or array) +#[para] will need a -type option (-force ?) to force overriding with another type such as an int. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -138,6 +146,9 @@ namespace eval tomlish { #find the value # 3 is the earliest index at which the value could occur (depending on whitespace) set found_sub [list] + if {[lindex $keyval_element 2] ne "="} { + error ">>>_get_keyval_value doesn't seem to be a properly structured { = } list" + } foreach sub [lrange $keyval_element 2 end] { #note that a barekey/quotedkey won't occur directly inside a barekey/quotedkey switch -exact -- [lindex $sub 0] { @@ -169,12 +180,24 @@ namespace eval tomlish { #REVIEW set result [list type $type value $value] } - TABLE - ITABLE - ARRAY - MULTISTRING { - #jmn2024 - added ITABLE - review + TABLE { + #invalid? + error "_get_keyval_value invalid to have type TABLE on rhs of =" + } + ITABLE { + set result [::tomlish::get_dict [list $found_sub]] + } + ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! set result [list type $type value [::tomlish::get_dict [list $found_sub]]] } + MULTISTRING - MULTILITERAL { + #review - mapping these to STRING might make some conversions harder? + #if we keep the MULTI - we know we have to look for newlines for example when converting to json + #without specific types we'd have to check every STRING - and lose info about how best to map chars within it + set result [list type $type value [::tomlish::get_dict [list $found_sub]]] + } default { error "Unexpected value type '$type' found in keyval '$keyval_element'" } @@ -182,6 +205,48 @@ namespace eval tomlish { return $result } + proc _get_dottedkey_info {dottedkeyrecord} { + set key_hierarchy [list] + set key_hierarchy_raw [list] + if {[lindex $dottedkeyrecord 0] ne "DOTTEDKEY"} { + error "_get_dottedkey_info error. Supplied list doesn't appear to be a DOTTEDKEY (tag: [lindex $dottedkeyrecord 0])" + } + set compoundkeylist [lindex $dottedkeyrecord 1] + set expect_sep 0 + foreach part $compoundkeylist { + set parttag [lindex $part 0] + if {$parttag eq "WS"} { + continue + } + if {$expect_sep} { + if {$parttag ne "DOTSEP"} { + error "DOTTEDKEY missing dot separator between parts. '$dottedkeyrecord'" + } + set expect_sep 0 + } else { + set val [lindex $part 1] + switch -exact -- $parttag { + KEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw $val + } + QKEY { + lappend key_hierarchy [::tomlish::utils::unescape_string $val] + lappend key_hierarchy_raw \"$val\" + } + SQKEY { + lappend key_hierarchy $val + lappend key_hierarchy_raw "'$val'" + } + default { + error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" + } + } + set expect_sep 1 + } + } + return [dict create keys $key_hierarchy keys_raw $key_hierarchy_raw] + } #get_dict is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. # get_dict is primarily for reading toml data. @@ -235,68 +300,40 @@ namespace eval tomlish { } DOTTEDKEY { log::debug "--> processing $tag: $item" - set compoundkey [lindex $item 1] ;#sequence of KEY|QKEY|SQKEY,DOTSEP,KEY|QKEY|SQKEY with possible WS - #if more than one KEY,and DOTSEP is missing then invalid toml - set name_segments [list] - set key_hierarchy [list] - set key_hierarchy_raw [list] - - set expect_sep 0 - foreach part $compoundkey { - set parttag [lindex $part 0] - if {$parttag eq "WS"} { - continue - } - if {$expect_sep} { - if {$parttag ne "DOTSEP"} { - error "DOTTEDKEY missing dot separator between parts. '$item'" - } - set expect_sep 0 - } else { - set val [lindex $part 1] - switch -exact -- $parttag { - KEY { - lappend key_hierarchy $val - lappend key_hierarchy_raw $val - } - QKEY { - lappend key_hierarchy [::tomlish::utils::unescape_string $val] - lappend key_hierarchy_raw \"$val\" - } - SQKEY { - lappend key_hierarchy $val - lappend key_hierarchy_raw "'$val'" - } - default { - error "DOTTED key unexpected part '$parttag' - ensure dot separator is between key parts. '$item'" - } - } - set expect_sep 1 - } - } - if {[llength $key_hierarchy] == 0} { + set dkey_info [_get_dottedkey_info $item] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + + #a.b.c = 1 + #table_key_hierarchy -> a b + #leafkey -> c + if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively error "DOTTED key has no parts - invalid? '$item'" - } elseif {[llength $key_hierarchy] == 1} { + } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] + set leafkey [lindex $dotted_key_hierarchy 0] } else { - set table_hierarchy [lrange $key_hierarchy 0 end-1] - set table_hierarchy_raw [lrange $key_hierarchy_raw 0 end-1] - + set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] + set table_hierarchy_raw [lrange $dotted_key_hierarchy_raw 0 end-1] + set leafkey [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure - set subkey [list] + set pathkeys [list] foreach k $table_hierarchy { - lappend subkey $k - if {![dict exists $datastructure {*}$subkey]} { - dict set datastructure {*}$subkey [list] + lappend pathkeys $k + if {![dict exists $datastructure {*}$pathkeys]} { + dict set datastructure {*}$pathkeys [list] } else { - tomlish::log::notice "get_dict datastructure at subkey $subkey already had data: [dict get $datastructure {*}$subkey]" + tomlish::log::notice "get_dict datastructure at key path $pathkeys already had data: [dict get $datastructure {*}$pathkeys]" } } + + set keyval_dict [_get_keyval_value $item] + dict set datastructure {*}$pathkeys $leafkey $keyval_dict } TABLE { set tablename [lindex $item 1] @@ -312,8 +349,8 @@ namespace eval tomlish { #toml spec rule - all segments mst be non-empty #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - set key_hierarchy [list] - set key_hierarchy_raw [list] + set table_key_hierarchy [list] + set table_key_hierarchy_raw [list] foreach rawseg $name_segments { @@ -334,16 +371,16 @@ namespace eval tomlish { #if {$rawseg eq ""} { # error "Table name '[lindex $item 1]' is not valid. All segments (parts between dots) must be non-empty" #} - lappend key_hierarchy $seg - lappend key_hierarchy_raw $rawseg + lappend table_key_hierarchy $seg + lappend table_key_hierarchy_raw $rawseg - if {[dict exists $datastructure {*}$key_hierarchy]} { + if {[dict exists $datastructure {*}$table_key_hierarchy]} { #It's ok for this key to already exist *if* it was defined by a previous tablename, # but not if it was defined as a key/qkey/skey ? - set testkey [join $key_hierarchy_raw .] + set testkey [join $table_key_hierarchy_raw .] - set testkey_length [llength $key_hierarchy_raw] + set testkey_length [llength $table_key_hierarchy_raw] set found_testkey 0 if {$testkey in $tablenames_seen} { set found_testkey 1 @@ -372,8 +409,8 @@ namespace eval tomlish { } if {$found_testkey == 0} { - #the raw key_hierarchy is better to display in the error message, although it's not the actual dict keyset - set msg "key [join $key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." + #the raw table_key_hierarchy is better to display in the error message, although it's not the actual dict keyset + set msg "key [join $table_key_hierarchy_raw .] already exists in datastructure, but wasn't defined by a supertable." append msg "tablenames_seen:" foreach ts $tablenames_seen { append msg " " $ts \n @@ -385,13 +422,13 @@ namespace eval tomlish { } #ensure empty tables are still represented in the datastructure - set subkey [list] - foreach k $key_hierarchy { - lappend subkey $k - if {![dict exists $datastructure {*}$subkey]} { - dict set datastructure {*}$subkey [list] + set table_keys [list] + foreach k $table_key_hierarchy { + lappend table_keys $k + if {![dict exists $datastructure {*}$table_keys]} { + dict set datastructure {*}$table_keys [list] } else { - tomlish::log::notice "get_dict datastructure at subkey $subkey already had data: [dict get $datastructure {*}$subkey]" + tomlish::log::notice "get_dict datastructure at (TABLE) subkey $table_keys already had data: [dict get $datastructure {*}$table_keys]" } } @@ -399,26 +436,48 @@ namespace eval tomlish { lappend tablenames_seen $tablename - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" - log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy : $table_key_hierarchy" + log::debug ">>>>>>>>>>>>>>>>>>>>table_key_hierarchy_raw: $table_key_hierarchy_raw" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } + } + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict + } KEY - QKEY - SQKEY { + #obsolete ? set keyval_key [lindex $element 1] if {$type eq "QKEY"} { set keyval_key [::tomlish::utils::unescape_string $keyval_key] } - if {[dict exists $datastructure {*}$key_hierarchy $keyval_key]} { - error "Duplicate key '$key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$dotted_key_hierarchy $keyval_key]} { + error "Duplicate key '$dotted_key_hierarchy $key'. The key already exists at this level in the toml data. The toml data is not valid." } set keyval_dict [_get_keyval_value $element] - dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict - } - DOTTEDKEY { - error "todo dotted key in table context" + dict set datastructure {*}$dotted_key_hierarchy $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore @@ -437,13 +496,30 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEY - QKEY - SQKEY { - set keyval_key [lindex $element 1] - set keyval_dict [_get_keyval_value $element] - if {$type eq "QKEY"} { - set keyval_key [::tomlish::utils::unescape_string $keyval_key] + DOTTEDKEY { + set dkey_info [_get_dottedkey_info $element] + set dotted_key_hierarchy [dict get $dkey_info keys] + set dotted_key_hierarchy_raw [dict get $dkey_info keys_raw] + set leaf_key [lindex $dotted_key_hierarchy end] + set dkeys [lrange $dotted_key_hierarchy 0 end-1] + + #ensure empty keys are still represented in the datastructure + set table_keys [list] ;#We don't know the context - next level up will have to check for key collisions? + set test_keys $table_keys + foreach k $dkeys { + lappend test_keys $k + if {![dict exists $datastructure {*}$test_keys]} { + dict set datastructure {*}$test_keys [list] + } else { + tomlish::log::notice "get_dict datastructure at (DOTTEDKEY) subkey $test_keys already had data: [dict get $datastructure {*}$test_keys]" + } } - dict set datastructure $keyval_key $keyval_dict + + if {[dict exists $datastructure {*}$table_keys {*}$dkeys $leaf_key]} { + error "Duplicate key '$table_keys $dkeys $leaf_key'. The key already exists at this level in the toml data. The toml data is not valid." + } + set keyval_dict [_get_keyval_value $element] + dict set datastructure {*}$table_keys {*}$dkeys $leaf_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore @@ -474,7 +550,7 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value $value] } - ITABLE - TABLE - ARRAY - MULTISTRING { + ITABLE - TABLE - ARRAY - MULTISTRING - MULTILITERAL { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } @@ -487,6 +563,49 @@ namespace eval tomlish { } } } + MULTILITERAL { + #triple squoted string + #first newline stripped only if it is the very first element + #(ie *immediately* following the opening delims) + #All whitespace other than newlines is within LITERALPARTS + # ------------------------------------------------------------------------- + #todo - consider extension to toml to allow indent-aware multiline literals + # how - propose as issue in toml github? Use different delim? e.g ^^^ ? + #e.g + # xxx=?'''abc + # def + # etc + # ''' + # - we would like to trimleft each line to the column following the opening delim + # ------------------------------------------------------------------------- + + log::debug "--> processing multiliteral: $item" + set parts [lrange $item 1 end] + if {[lindex $parts 0 0] eq "NEWLINE"} { + set parts [lrange $parts 1 end] ;#skip it + } + for {set idx 0} {$idx < [llength $parts]} {incr idx} { + set element [lindex $parts $idx] + set type [lindex $element 0] + switch -exact -- $type { + LITERALPART { + append stringvalue [lindex $element 1] + } + NEWLINE { + set val [lindex $element 1] + if {$val eq "nl"} { + append stringvalue \n + } else { + append stringvalue \r\n + } + } + default { + error "Unexpected value type '$type' found in multistring" + } + } + } + set datastructure $stringvalue + } MULTISTRING { #triple dquoted string log::debug "--> processing multistring: $item" @@ -496,9 +615,15 @@ namespace eval tomlish { for {set idx 0} {$idx < [llength $parts]} {incr idx} { set element [lindex $parts $idx] set type [lindex $element 0] - #todo - do away with STRINGPART + #We use STRINGPART in the tomlish representation as a distinct element to STRING - which would imply wrapping quotes to be reinserted switch -exact -- $type { - STRING - STRINGPART { + STRING { + #todo - do away with STRING ? + #we don't build MULTISTRINGS containing STRING - but should we accept it? + tomlish::log::warn "doulbe quoting a STRING found in MULTISTRING - should be STRINGPART?" + append stringvalue "\"[::tomlish::utils::unescape_string [lindex $element 1]]\"" + } + STRINGPART { append stringvalue [::tomlish::utils::unescape_string [lindex $element 1]] } CONT { @@ -755,7 +880,7 @@ namespace eval tomlish::encode { } MULTISTRING { #explicitly list the valid child tags - set ms_parts {STRINGPART WS NEWLINE CONT} + set ms_parts {STRING STRINGPART WS NEWLINE CONT} if {$tag ni $ms_parts} { error "Invalid tag '$tag' encountered within a MULTISTRING must belong to: $ms_parts" } @@ -884,8 +1009,9 @@ namespace eval tomlish::encode { #multiliteral could be handled as a single literal if we allowed literal to contain newlines #- except that the first newline must be retained for roundtripping tomlish <-> toml but # the first newline is not part of the data. - # we elect instead to maintain a basic LITERAL that must not contain newlines.. - # and to compose MULTILITERAL of multiple NEWLINE LITERAL parts, with the datastructure representation dropping the first one when building the value. + # we elect instead to maintain a basic LITERALPART that must not contain newlines.. + # and to compose MULTILITERAL of multiple NEWLINE LITERALPART parts, + #with the datastructure representation dropping the first newline (if immediately following opening delim) when building the value. set literal "" foreach part [lrange $item 1 end] { append literal [::tomlish::encode::tomlish [list $part] $nextcontext] @@ -953,6 +1079,7 @@ namespace eval tomlish::decode { # - we have no way of knowing if it was originally specified as \t \u0009 or \U00000009 or directly as a tab character. # For this reason, we also do absolutely no line-ending transformations based on platform. # All line-endings are maintained as is, and even a file with mixed cr crlf line-endings will be correctly interpreted and can be 'roundtripped' + proc toml {s} { #*** !doctools #[call [fun toml] [arg s]] @@ -1038,7 +1165,12 @@ namespace eval tomlish::decode { lappend v(0) [list ERROR tokentype $tokenType state $prevstate to $state leveldata [set v($nest)]] return $v(0) } - + # --------------------------------------------------------- + #NOTE there may already be a token_waiting at this point + #set_token_waiting can raise an error here, + # in which case the space_action branch needs to be rewritten to handle the existing token_waiting + # --------------------------------------------------------- + if {$space_action eq "pop"} { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. @@ -1053,15 +1185,17 @@ namespace eval tomlish::decode { # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 + tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? #tomlish::parse::set_token_waiting type literal value "" complete 1 - tomlish::parse::set_token_waiting type double_squote value "" complete 1 + tomlish::parse::set_token_waiting type double_squote value "" complete 1 startindex [expr {$i - 2}] } ''' { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { switch -exact -- $prevstate { @@ -1070,7 +1204,7 @@ namespace eval tomlish::decode { #we should have emitted the triple and left the last for next loop } trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] #todo integrate left squote with nest data at this level set lastpart [lindex $v($parentlevel) end] switch -- [lindex $lastpart 0] { @@ -1100,7 +1234,7 @@ namespace eval tomlish::decode { #we should have emitted the triple and left the following squotes for next loop } trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] #todo integrate left 2 squotes with nest data at this level set lastpart [lindex $v($parentlevel) end] switch -- [lindex $lastpart 0] { @@ -1162,7 +1296,7 @@ namespace eval tomlish::decode { if {$prevstate eq "dottedkey-space"} { tomlish::log::debug "---- equal ending dottedkey-space for last_space_action pop" #re-emit for parent space - tomlish::parse::set_token_waiting type equal value = complete 1 + tomlish::parse::set_token_waiting type equal value = complete 1 startindex [expr {$i-1}] } } newline { @@ -1219,13 +1353,30 @@ namespace eval tomlish::decode { set tok $starttok_val } } - barekey - squotedkey { - #set v($nest) [list KEY $tok] ;#$tok is the keyname - if {$prevstate eq "table-space"} { - set v($nest) [list DOTTEDKEY] + squotedkey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } + } + #todo - check not something already waiting? + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } + barekey { + switch -exact -- $prevstate { + table-space - itable-space { + set v($nest) [list DOTTEDKEY] + } } #todo - check not something already waiting? - tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 ;#re-submit token in the newly pushed space + set waiting [tomlish::parse::get_token_waiting] + if {[llength $waiting]} { + set i [dict get $waiting startindex] + tomlish::parse::clear_token_waiting + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } else { + tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space + } } startsquote { set next_tokenType_known 1 @@ -2086,6 +2237,7 @@ namespace eval tomlish::utils { } } + #review - we proc is_datetime {str} { #e.g 1979-05-27 #e.g 1979-05-27T00:32:00Z @@ -2093,6 +2245,15 @@ namespace eval tomlish::utils { #e.g 1979-05-27 00:32:00+10:00 #e.g 1979-05-27 00:32:00.999999-07:00 + #review + #minimal datetimes? + # 2024 ok - shortest valid 4 digit year? + # 02:00 ok + # 05-17 ok + if {[string length $str] < 4} { + return 0 + } + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] if {[tcl::string::length $str] == $matches} { #all characters in legal range @@ -2125,21 +2286,25 @@ namespace eval tomlish::parse { #[para] #[list_begin definitions] - #This is a very curly mix of a half-baked statemachine littered with special cases. - #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are not so trivial to parse using more standard methods either: - #a) some kind of backtracking required if using an ABNF parser - #b) Some of the rules of context-free grammars are violated by the spec. + #This is a somewhat curly mix of a statemachine and toml-nesting-stack littered with special cases. + #The code is a pig's-nest - but it should be noted that for example trailing single double quotes in multiline strings are perhaps not so trivial to parse using more standard methods either: + # - e.g some kind of backtracking required if using an ABNF parser? + #I don't know the precise technical name for this sort of parser; probably something like "Dog's Breakfast" + #More seriously, we don't have distinct lex/parse steps - so it is basically a 'fused lexer' or 'scannerless parser' + + #It is also desirable for this system to be useful in 'interactive' use. review - would a separate lexer make this easier or harder? - #I don't know what the technical name for this sort of parser is. probably something like "Dog's Breakfast" + #A possible alternative more structured approach might be to use a PEG (Parsing Expression Grammar) + variable is_parsing 0 ;#whether we are in the middle of parsing tomlish text variable state # states: - # table-space, curly-space, array-space + # table-space, itable-space, array-space # value-expected, keyval-syntax, # quoted-key, squoted-key - # string, literal-state, multistring... + # string-state, literal-state, multistring... # # notes: # only the -space states are also 'spaces' ie a container which is pushed/popped on the spacestack @@ -2153,7 +2318,7 @@ namespace eval tomlish::parse { # - PUSHSPACE is a simple push onto the spacestack, zeropoppushspace also pushes, but will first do a pop *if* the current space level is greater than zero (ie if only if not already in root table-space) # -- --- --- --- --- --- - #token/state naming guide (todo - make implementation match!) + #token/state naming guide # -- --- --- --- --- --- #tokens : underscore separated or bare name e.g newline, start_quote, start_squote #private tokens: always have a leading underscore (These are private 'temporary state' tokens that are never returned as actual tokens e.g _start_squote_sequence @@ -2170,7 +2335,7 @@ namespace eval tomlish::parse { #e.g {PUSHSPACE } or POPSPACE or SAMESPACE - #SAMESPACE - got to same space as parent without popping a level, but use transition as if we popped there? strange concept - review usecases + #SAMESPACE - got to same space as parent without popping a level, but has it's own autotransition lookup - strange concept - review usecases variable stateMatrix set stateMatrix [dict create] @@ -2188,13 +2353,13 @@ namespace eval tomlish::parse { dict set stateMatrix\ table-space { + bom "table-space"\ whitespace "table-space"\ newline "table-space"\ - bom "table-space"\ barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ startquote "quoted-key"\ - startsquote "squoted-key"\ + XXXstartsquote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ @@ -2205,6 +2370,22 @@ namespace eval tomlish::parse { equal "err-state"\ } + #itable-space/ curly-syntax : itables + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}}\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + startquote "quoted-key"\ + startsquote {TOSTATE "squoted-key" comment "jn-ok"}\ + comma "itable-space"\ + comment "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ keyval-space {\ @@ -2228,14 +2409,14 @@ namespace eval tomlish::parse { keyval-value-expected {\ whitespace "keyval-value-expected"\ untyped_value {TOSTATE "keyval-tail" note ""}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE curly-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate keyval-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} dict set stateMatrix\ @@ -2256,6 +2437,60 @@ namespace eval tomlish::parse { eof "end-state"\ } + dict set stateMatrix\ + itable-keyval-syntax {\ + whitespace "itable-keyval-syntax"\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + equal "itable-keyval-value-expected"\ + newline "err-state"\ + eof "err-state"\ + } + dict set stateMatrix\ + itable-keyval-value-expected {\ + whitespace "itable-keyval-value-expected"\ + untyped_value {TOSTATE "itable-val-tail" note ""}\ + squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ + startquote {TOSTATE "string-state" returnstate itable-val-tail}\ + startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ + double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ + startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ + startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ + } + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + + dict set stateMatrix\ + itable-val-tail {\ + whitespace "itable-val-tail"\ + endinlinetable "POPSPACE"\ + comma "POPSPACE"\ + Xnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ + newline "err-state"\ + comment "itable-val-tail"\ + eof "err-state"\ + } + #dict set stateMatrix\ + # itable-quoted-key {\ + # whitespace "NA"\ + # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endquote "itable-keyval-syntax"\ + # } + #dict set stateMatrix\ + # itable-squoted-key {\ + # whitespace "NA"\ + # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ + # newline "err-state"\ + # endsquote "itable-keyval-syntax"\ + # } + + @@ -2267,7 +2502,7 @@ namespace eval tomlish::parse { startsquote "literal-state"\ startmultiquote {PUSHSPACE "multistring-space"}\ triple_squote {PUSHSPACE "multiliteral-space"}\ - startinlinetable {PUSHSPACE curly-space}\ + startinlinetable {PUSHSPACE itable-space}\ startarray {PUSHSPACE array-space}\ comment "err-state-value-expected-got-comment"\ comma "err-state"\ @@ -2293,21 +2528,6 @@ namespace eval tomlish::parse { - #curly-space/ curly-syntax : itables - dict set stateMatrix\ - curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {PUSHSPACE "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - itablesquotedkey "itablekeyval-space"\ - endinlinetable "POPSPACE"\ - startquote "itable-quoted-key"\ - startsquote "itable-squoted-key"\ - comma "curly-space"\ - comment "err-state"\ - eof "err-state"\ - } #REVIEW #toml spec looks like heading towards allowing newlines within inline tables @@ -2316,15 +2536,15 @@ namespace eval tomlish::parse { curly-syntax {\ whitespace "curly-syntax"\ newline "curly-syntax"\ - barekey {PUSHSPACE "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ + barekey {PUSHSPACE "itable-keyval-space"}\ + itablequotedkey "itable-keyval-space"\ endinlinetable "POPSPACE"\ startquote "itable-quoted-key"\ - comma "curly-space"\ - comment "curly-space"\ + comma "itable-space"\ + comment "itable-space"\ eof "err-state"\ } - #review comment "err-state" vs comment "curly-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -2338,7 +2558,7 @@ namespace eval tomlish::parse { startarray {PUSHSPACE "array-space"}\ endarray "POPSPACE"\ startmultiquote {PUSHSPACE multistring-space}\ - startinlinetable {PUSHSPACE curly-space}\ + startinlinetable {PUSHSPACE itable-space}\ startquote "string-state"\ startsquote "literal-state"\ triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ @@ -2360,43 +2580,6 @@ namespace eval tomlish::parse { comment "err-state"\ } - dict set stateMatrix\ - itablekeyval-syntax {\ - whitespace "itablekeyval-syntax"\ - endquote "itablekeyval-syntax"\ - endsquote "itablekeyval-syntax"\ - newline "err-state"\ - equal "value-expected"\ - eof "err-state"\ - } - dict set stateMatrix\ - itablekeyval-space {} - - dict set stateMatrix\ - itablevaltail {\ - whitespace "itablevaltail"\ - endinlinetable "POPSPACE"\ - comma "POPSPACE"\ - newline "itablevaltail"\ - comment "itablevaltail"\ - eof "err-state"\ - } - dict set stateMatrix\ - itable-quoted-key {\ - whitespace "NA"\ - itablequotedkey {PUSHSPACE "itablekeyval-space"}\ - newline "err-state"\ - endquote "itablekeyval-syntax"\ - } - dict set stateMatrix\ - itable-squoted-key {\ - whitespace "NA"\ - itablesquotedkey {PUSHSPACE "itablekeyval-space"}\ - newline "err-state"\ - endsquote "itablekeyval-syntax"\ - } - - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space @@ -2412,8 +2595,8 @@ namespace eval tomlish::parse { whitespace "NA"\ squotedkey "squoted-key"\ newline "err-state"\ - endsquote {PUSHSPACE "keyval-space"}\ } + # endsquote {PUSHSPACE "keyval-space"} dict set stateMatrix\ string-state {\ @@ -2519,9 +2702,10 @@ namespace eval tomlish::parse { dict set stateMatrix nostate $tok "err-nostate-received-token-$tok" } - #This seems hacky... - #see also spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #purpose - debugging? remove? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #build a list of 'push triggers' from the stateMatrix # ie tokens which can push a new space onto spacestack set push_trigger_tokens [list] @@ -2538,8 +2722,12 @@ namespace eval tomlish::parse { } } ::tomlish::log::debug "push_trigger_tokens: $push_trigger_tokens" - #!todo - hard code once stateMatrix finalised? + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + #This seems hacky... (deprecate in favour of explicit arguments to the instructions in stateMatrix?) + #spacePopTransitions, spacePushTransitions, spaceSameTransitions below for auto state redirections on POPSPACE,PUSHSPACE,SAMESPACE #mainly for the -space states: #redirect to another state $c based on a state transition from $whatever to $b @@ -2549,28 +2737,28 @@ namespace eval tomlish::parse { #Push to, next #default first states when we push to these spaces variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - table-space tablename-state + keyval-space keyval-syntax + itable-keyval-space itable-keyval-syntax + array-space array-space + table-space tablename-state } + #itable-space itable-space #Pop to, next variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - itablekeyval-space itablevaltail + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from value-expected? we don't always want to go to keyval-tail #leave it out and make the POPSPACE caller explicitly specify it #keyval-space keyval-tail variable spaceSameTransitions { - array-space array-syntax - curly-space curly-syntax - itablekeyval-space itablevaltail + array-space array-syntax } + #itable-space curly-syntax + #itable-keyval-space itable-val-tail variable state_list ;#reset every tomlish::decode::toml @@ -2816,26 +3004,71 @@ namespace eval tomlish::parse { } } + proc get_token_waiting {} { + variable token_waiting + return [lindex $token_waiting 0] + } + proc clear_token_waiting {} { + variable token_waiting + set token_waiting [list] + } + + #token_waiting is a list - but our standard case is to have only one + #in certain circumstances such as near eof we may have 2 + #the set_token_waiting function only allows setting when there is not already one waiting. + #we want to catch cases of inadvertently trying to set multiple + # - the reason being that the state transition triggered by the previous token may have invalidated the assumptions made when a token was added as waiting. proc set_token_waiting {args} { if {[llength $args] %2 != 0} { error "set_token_waiting must have args of form: type value complete 0|1" } variable token_waiting + + if {[llength $token_waiting] && [dict get [lindex $token_waiting end] type] ne "eof"} { + #tokloop already set a token_waiting - but something (post tokloop processing?) is trying to set another + #we may need to remove the existing token_waiting and reset the tokloop index to the previous char so it's reprocessed in the possibly new context + #rather than attempt to make the right decision here - we raise an error and require the caller to check/handle it + set err "set_token_waiting already has token_waiting: [lindex $token_waiting 0]" + append err \n " - cannot add token_waiting: $args" + error $err + #set tomlish::parse::i [expr {[dict get $token_waiting startindex] -1}] + #set token_waiting [list] + } + + set waiting [dict create] dict for {k v} $args { switch -exact $k { type - complete { - dict set token_waiting $k $v + dict set waiting $k $v } value { - dict set token_waiting tok $v + dict set waiting tok $v + } + startindex { + dict set waiting startindex $v } default { error "set_token_waiting error - unrecognised key $k. known keys: [dict keys $args]" } } } - if {![tcl::string::is boolean -strict [dict get $token_waiting complete]]} { - error "set_token_waiting error - 'complete' must be a boolean. got [dict get $token_waiting complete]" + if {![tcl::string::is boolean -strict [dict get $waiting complete]]} { + error "set_token_waiting error - 'complete' must be a boolean. got [dict get $waiting complete]" + } + if {![llength $token_waiting]} { + set token_waiting [list $waiting] + } else { + #an extra sanity-check that we don't have more than just the eof.. + if {[llength $token_waiting] > 1} { + set err "Unexpected. Existing token_waiting count > 1.\n" + foreach tw $token_waiting { + append err " $tw" \n + } + append err " - cannot add token_waiting: $waiting" + error $err + } + #last entry must be a waiting eof + set token_waiting [list $waiting [lindex $token_waiting end]] } return } @@ -2844,6 +3077,8 @@ namespace eval tomlish::parse { #tomlish::parse::tok #we attempt to do this without lookahead (potential use in streaming toml? for what benefit?) todo -final flag # - the possible benefit is being able to more easily process in arbitrarily split chunks (although we would still have to watch crlf splitting ?) + # - interactive use? + proc tok {s} { variable nest variable v @@ -2867,14 +3102,16 @@ namespace eval tomlish::parse { #------------------------------ #Previous run found another (presumably single-char) token - #This token_waiting mechanism only allows for a single *completed* token to be specified. + #The normal case is for there to be only one dict in the list + #multiple is an exception - primarily for eof variable token_waiting - if {[dict size $token_waiting]} { - set tokenType [dict get $token_waiting type] - set tok [dict get $token_waiting tok] - dict unset token_waiting type - dict unset token_waiting tok - dict unset token_waiting complete + if {[llength $token_waiting]} { + set waiting [lindex $token_waiting 0] + + set tokenType [dict get $waiting type] + set tok [dict get $waiting tok] + #todo: dict get $token_waiting complete + set token_waiting [lrange $token_waiting 1 end] return 1 } #------------------------------ @@ -2894,9 +3131,10 @@ namespace eval tomlish::parse { } set c [tcl::string::index $s $i] + set cindex $i tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" - incr i ;#must incr here because we do'returns'inside the loop + incr i ;#must incr here because we do returns inside the loop set ctest [tcl::string::map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { @@ -3025,7 +3263,7 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - keyval-value-expected - value-expected { + itable-keyval-value-expected - keyval-value-expected - value-expected { #switch last key to tablename?? set_tokenType "startinlinetable" set tok "\{" @@ -3097,18 +3335,18 @@ namespace eval tomlish::parse { starttablename - tablename { if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endinlinetable value "" complete 1 + set_token_waiting type endinlinetable value "" complete 1 startindex $cindex return 1 } starttablearrayname - tablearrayname { if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex return 1 } - itablevaltail { + itable-val-tail { #review - error "right-curly in itablevaltail" + error "right-curly in itable-val-tail" } default { #end any other token @@ -3131,6 +3369,11 @@ namespace eval tomlish::parse { set tok "\}" return 1 } + itable-space { + set_tokenType "endinlinetable" + set tok "\}" + return 1 + } tablename-state { #e.g [] - empty tablename - allowed or not? #empty tablename/tablearrayname ? @@ -3146,25 +3389,25 @@ namespace eval tomlish::parse { set tok "" ;#no output into the tomlish list for this token return 1 } - curly-syntax - curly-space { + array-syntax - array-space { + #invalid set_tokenType "endinlinetable" set tok "\}" return 1 } - array-syntax - array-space { - #invalid + curly-syntax { set_tokenType "endinlinetable" set tok "\}" return 1 } - itablevaltail { + itable-val-tail { set_tokenType "endinlinetable" set tok "" #we need to pop the keyval - and then reprocess to pop the inlinetable - so we incr -1 incr i -1 return 1 } - itablekeyval-syntax { + itable-keyval-syntax { error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" } multistring-space { @@ -3234,7 +3477,7 @@ namespace eval tomlish::parse { } else { if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { #invalid at this point - state machine should disallow table -> starttablearrayname - set_token_waiting type starttablearrayname value "" complete 1 + set_token_waiting type starttablearrayname value "" complete 1 startindex $cindex return 1 } else { #we appear to still be in single or double quoted section @@ -3255,7 +3498,7 @@ namespace eval tomlish::parse { } else { #$slash_active not relevant when no tokenType switch -exact -- $state { - keyval-value-expected - value-expected { + keyval-value-expected - itable-keyval-value-expected - value-expected { set_tokenType "startarray" set tok "\[" return 1 @@ -3348,7 +3591,7 @@ namespace eval tomlish::parse { append tok "\\]" } else { if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { - set_token_waiting type endtablename value "" complete 1 + set_token_waiting type endtablename value "" complete 1 startindex $cindex return 1 } else { #we appear to still be in single or double quoted section @@ -3360,7 +3603,7 @@ namespace eval tomlish::parse { #todo? if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 + set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex return 1 } default { @@ -3576,7 +3819,7 @@ namespace eval tomlish::parse { literal { #slash_active always false #terminate the literal - set_token_waiting type endsquote value "'" complete 1 + set_token_waiting type endsquote value "'" complete 1 startindex $cindex return 1 } literalpart { @@ -3589,7 +3832,7 @@ namespace eval tomlish::parse { return 1 } itablesquotedkey { - set_token_waiting type endsquote value "'" complete 1 + set_token_waiting type endsquote value "'" complete 1 startindex $cindex return 1 } squotedkey { @@ -3615,7 +3858,7 @@ namespace eval tomlish::parse { set_tokenType "_start_squote_sequence" set tok "'" } - keyval-value-expected { + itable-keyval-value-expected - keyval-value-expected { set_tokenType "squote_seq_begin" set tok "'" return 1 @@ -3625,9 +3868,9 @@ namespace eval tomlish::parse { set_tokenType "squotedkey" set tok "" } - curly-space { - set_tokenType "startsquote" - set tok $c + itable-space { + set_tokenType "squote_seq_begin" + set tok "'" return 1 } tablename-state { @@ -3718,7 +3961,7 @@ namespace eval tomlish::parse { append tok "\\" $c } else { #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3730,7 +3973,7 @@ namespace eval tomlish::parse { #incr i -1 if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] set multi_dquote "" return 1 } else { @@ -3748,7 +3991,7 @@ namespace eval tomlish::parse { } else { switch -- [tcl::string::length $multi_dquote] { 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 + set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] set multi_dquote "" return 1 } @@ -3779,7 +4022,7 @@ namespace eval tomlish::parse { #} } default { - set_token_waiting type startquote value "\"" complete 1 + set_token_waiting type startquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3793,7 +4036,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 + set_token_waiting type endquote value "\"" complete 1 startindex $cindex return 1 } } @@ -3854,7 +4097,7 @@ namespace eval tomlish::parse { set tok $c return 1 } - curly-space { + itable-space { set_tokenType "startquote" set tok $c return 1 @@ -3916,7 +4159,7 @@ namespace eval tomlish::parse { incr i -$backlen return 1 } else { - set_token_waiting type equal value = complete 1 + set_token_waiting type equal value = complete 1 startindex $cindex return 1 } } @@ -4040,13 +4283,13 @@ namespace eval tomlish::parse { literal { #nl is not allowed *within* a literal - require multiliteral syntax for any literal containing a newline ''' ''' #even though we terminate the literal without the closing quote here - the token_waiting newline should trigger a state error - set_token_waiting type newline value lf complete 1 + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } literalpart { #we allow newlines - but store them within the multiliteral as their own element #This is a legitimate end to the literalpart - but not the whole multiliteral - set_token_waiting type newline value lf complete 1 + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } newline { @@ -4063,11 +4306,11 @@ namespace eval tomlish::parse { } else { if {$had_slash} { #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - set_token_waiting type newline value lf complete 1 + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } @@ -4083,7 +4326,7 @@ namespace eval tomlish::parse { # ie whitespace is split into separate whitespace tokens at each newline #puts "-------------- newline lf during tokenType $tokenType" - set_token_waiting type newline value lf complete 1 + set_token_waiting type newline value lf complete 1 startindex $cindex return 1 } } @@ -4182,12 +4425,12 @@ namespace eval tomlish::parse { incr i -$backlen return 1 } else { - set_token_waiting type comma value "," complete 1 + set_token_waiting type comma value "," complete 1 startindex $cindex return 1 } } default { - set_token_waiting type comma value "," complete 1 + set_token_waiting type comma value "," complete 1 startindex $cindex if {$had_slash} {append tok "\\"} return 1 } @@ -4285,7 +4528,7 @@ namespace eval tomlish::parse { #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review #x is the tablename y is the key - set_token_waiting type dotsep value "." complete 1 + set_token_waiting type dotsep value "." complete 1 startindex $cindex return 1 } default { @@ -4591,7 +4834,7 @@ namespace eval tomlish::parse { append tok $c } default { - set_token_waiting type bom value "\uFEFF" complete 1 + set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } } @@ -4675,7 +4918,7 @@ namespace eval tomlish::parse { set had_slash $slash_active set slash_active 0 switch -exact -- $state { - table-space - curly-space - curly-syntax { + table-space - itable-space { #if no currently active token - assume another key value pair if {[tomlish::utils::is_barekey $c]} { set_tokenType "barekey" @@ -4684,6 +4927,15 @@ namespace eval tomlish::parse { error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } + curly-syntax { + puts stderr "curly-syntax - review" + if {[tomlish::utils::is_barekey $c]} { + set_tokenType "barekey" + append tok $c + } else { + error "Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" + } + } multistring-space { set_tokenType "stringpart" if {$had_slash} { @@ -4751,7 +5003,8 @@ namespace eval tomlish::parse { error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - set_token_waiting type endsquote value "'" complete 1 + #review + set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 @@ -4759,7 +5012,7 @@ namespace eval tomlish::parse { } } } - set_token_waiting type eof value eof complete 1 + set_token_waiting type eof value eof complete 1 startindex $i ;#review return 1 } else { ::tomlish::log::debug "- No current tokenType, ran out of characters, setting tokenType to 'eof' [tomlish::parse::report_line]"