# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2024 # # @@ Meta Begin # Application fauxlink 0.1.1 # Meta platform tcl # Meta license MIT # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin fauxlink_module_fauxlink 0 0.1.1] #[copyright "2024"] #[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] #[moddesc {.fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] #[require fauxlink] #[keywords symlink faux fake shortcut toml] #[description] #[para] A cross platform shortcut/symlink alternative. #[para] Unapologetically ugly - but practical in certain circumstances. #[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as #[para] archiving and packaging systems. #[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. #[para] format of name #.fauxlink #[para] where can be empty - then the effective nominal name is the tail of the #[para] The file extension must be .fauxlink or .fxlnk #[para] The + symbol substitutes for forward-slashes. #[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) #[para] We deliberately treat higher % sequences literally. #[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [lb]heart[rb]) can remain literal for linking to urls. #[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 #[para] e.g a link to a file file#A.txt in parent dir could be: #[para] file%23A.txt#..+file%23A.txt.fauxlink #[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fauxlink #[para] The can be unrelated to the actual target #[para] e.g datafile.dat#..+file%23A.txt.fauxlink #[para] This system has no filesystem support - and must be completely application driven. #[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. #[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined #[para] Extensions to behaviour should be added in the file as text data in Toml format, #[para] with custom data being under a single application-chosen table name #[para] The toplevel Toml table [lb]fauxlink[rb] is reserved for core extensions to this system. #[para] Aside from the 2 used for delimiting (+ #) #[para] certain characters which might normally be allowed in filesystems are required to be encoded #[para] e.g space and tab are required to be %20 %09 #[para] Others that require encoding are: * ? \ / | : ; " < > #[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. #[para] Control characters and other punctuation is optional to encode. #[para] Generally utf-8 should be used where possible and unicode characters can often be left unencoded on modern systems. #[para] Where encoding of unicode is desired in the nominalname,encodedtarget,tag or comment portions it can be specified as %UXXXXXXXX #[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. #[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest # # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded # ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. #Using fauxlink - a link would be: # "my-program-files#++server+c+Program%20Files.fauxlink" #If we needed the old-style literal %20 it would become # "my-program-files#++server+c+Program%2520Files.fauxlink" # # The file:// scheme on windows supposedly *does* decode %xx (for use in a browser) # e.g # pfiles#file%3a++++localhost+c+Program%2520files # The browser will work with literal spaces too though - so it could just as well be: # pfiles#file%3a++++localhost+c+Program%20files #windows may default to using explorer.exe instead of a browser for file:// urls though #and explorer doesn't want the literal %20. It probably depends what API the file:// url is to be passed to? #in a .url shortcut either literal space or %20 will work ie %xx values are decoded #*** !doctools #[section Overview] #[para] overview of fauxlink #[subsection Concepts] #[para] - # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by fauxlink #[list_begin itemized] package require Tcl 8.6- #*** !doctools #[item] [package {Tcl 8.6-}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # oo::class namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink::class { #*** !doctools #[subsection {Namespace fauxlink::class}] #[para] class definitions if {[info commands [namespace current]::interface_sample1] eq ""} { #*** !doctools #[list_begin enumerated] # oo::class create interface_sample1 { # #*** !doctools # #[enum] CLASS [class interface_sample1] # #[list_begin definitions] # method test {arg1} { # #*** !doctools # #[call class::interface_sample1 [method test] [arg arg1]] # #[para] test method # puts "test: $arg1" # } # #*** !doctools # #[list_end] [comment {-- end definitions interface_sample1}] # } #*** !doctools #[list_end] [comment {--- end class enumeration ---}] } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink { namespace export {[a-z]*}; # Convention: export all lowercase #todo - enforce utf-8 #literal unicode chars supported by modern filesystems - leave as is - REVIEW variable encode_map variable decode_map #most filesystems don't allow NULL - map to empty string #Make sure % is not in encode_map set encode_map [dict create\ \x00 ""\ { } %20\ \t %09\ + %2B\ # %23\ * %2A\ ? %3F\ \\ %5C\ / %2F\ | %7C\ : %3A\ {;} %3B\ {"} %22\ < %3C\ > %3E\ ] #above have some overlap with ctrl codes below. #no big deal as it's a dict #must_encode # + # * ? \ / | : ; " < > \t # also NUL to empty string # also ctrl chars 01 to 1F (1..31) for {set i 1} {$i < 32} {incr i} { set ch [format %c $i] set enc "%[format %02X $i]" set enc_lower [string tolower $enc] dict set encode_map $ch $enc dict set decode_map $enc $ch dict set decode_map $enc_lower $ch } variable must_encode set must_encode [dict keys $encode_map] #if they are in #decode map doesn't include # %00 (nul) # %2F "/" # %2f "/" # %7f (del) #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. # set decode_map [dict merge $decode_map [dict create\ %09 \t\ %20 { }\ %21 "!"\ %22 {"}\ %23 "#"\ %24 "$"\ %25 "%"\ %26 "&"\ %27 "'"\ %28 "("\ %29 ")"\ %2A "*"\ %2a "*"\ %2B "+"\ %2b "+"\ %2C ","\ %2c ","\ %2D "-"\ %2d "-"\ %2E "."\ %2e "."\ %3A ":"\ %3a ":"\ %3B {;}\ %3b {;}\ %3D "="\ %3C "<"\ %3c "<"\ %3d "="\ %3E ">"\ %3e ">"\ %3F "?"\ %3f "?"\ %40 "@"\ %5B "\["\ %5b "\["\ %5C "\\"\ %5c "\\"\ %5D "\]"\ %5d "\]"\ %5E "^"\ %5e "^"\ %60 "`"\ %7B "{"\ %7b "{"\ %7C "|"\ %7c "|"\ %7D "}"\ %7d "}"\ %7E "~"\ %7e "~"\ ]] #Don't go above 7f #if we want to specify p #*** !doctools #[subsection {Namespace fauxlink}] #[para] Core API functions for fauxlink #[list_begin definitions] proc Segment_mustencode_check {str} { variable decode_map variable encode_map ;#must_encode set idx 0 set err "" foreach ch [split $str ""] { if {[dict exists $encode_map $ch]} { set enc [dict get $encode_map $ch] if {[dict exists $decode_map $enc]} { append err " char $idx should be encoded as $enc" \n } else { append err " no %xx encoding available. Use %UXX if really required" \n } } incr idx } return $err ;#empty string if ok } proc resolve {link} { variable decode_map variable encode_map variable must_encode set ftail [file tail $link] set extension_name [string range [file extension $ftail] 1 end] if {$extension_name ni [list fxlnk fauxlink]} { set is_fauxlink 0 #we'll process anyway - but return the result wrapped #This should allow deliberate erroring for the calling dict user if the extension difference is inadvertent #(e.g blindly processing all files in a folder that is normally only .fauxlink files - but then something added that happens # to have # characters in it) #It also means if someone really wants to use the fauxlink semantics on a different file type # - they can - but just have to access the results differently and take that (minor) risk. #error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" set err_extra "\nnonstandard extension '$extension_name' for fauxlink. (expected .fxlnk or .fauxlink) Check that the call to fauxlink::resolve was deliberate" } else { set is_fauxlink 1 set err_extra "" } set linkspec [file rootname $ftail] # - any # or + within the target path or name should have been uri encoded as %23 and %2b if {[tcl::string::first # $linkspec] < 0} { set err "fauxlink::resolve '$link'. Link must contain a # (usually at start if name matches target)" append err $err_extra error $err } #The 1st 2 parts of split on # are name and target file/dir #If there are only 3 parts the 3rd part is a comment and there are no 'tags' #if there are 4 parts - the 3rd part is a tagset where each tag begins with @ #and each subsequent part is a comment. Empty comments are stripped from the comments list #A tagset can be empty - but if it's not empty it must contain at least one @ and must start with @ #e.g name.txt#path#@tag1@tag2#test###.fauxlink #has a name, a target, 2 tags and one comment #check namespec already has required chars encoded set segments [split $linkspec #] lassign $segments namespec targetspec #puts stderr "-->namespec $namespec" set nametest [tcl::string::map $encode_map $namespec] #puts stderr "-->nametest $nametest" #nothing should be changed - if there are unencoded chars that must be encoded it is an error if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { set err "fauxlink::resolve '$link' invalid chars in name part (section prior to first #)" append err [Segment_mustencode_check $namespec] append err $err_extra error $err } #see comments below regarding 2 rounds and ordering. set name [decode_unicode_escapes $namespec] set name [tcl::string::map $decode_map $name] #puts stderr "-->name: $name" set targetsegment [split $targetspec +] #check each + delimited part of targetspec already has required chars encoded set pp 0 ;#pathpart index set targetpath_parts [list] foreach pathpart $targetsegment { set targettest [tcl::string::map $encode_map $pathpart] if {[tcl::string::length $targettest] ne [tcl::string::length $pathpart]} { set err "fauxlink::resolve '$link' invalid chars in targetpath (section following first #)" append err [Segment_mustencode_check $pathpart] append err $err_extra error $err } #2 rounds of substitution is possibly asking for trouble.. #We allow anything in the resultant segments anyway (as %UXXXX... allows all) #so it's not so much about what can be encoded, # - but it makes it harder to reason about for users # In particular - if we map %XX first it makes %25 -> % substitution tricky # if the user requires a literal %UXXX - they can't do %25UXXX # the double sub would make it %UXXX -> somechar anyway. #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. #There is still the opportunity to use things like %U00000025 followed by hex-chars # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW set pathpart [decode_unicode_escapes $pathpart] set pathpart [tcl::string::map $decode_map $pathpart] lappend targetpath_parts $pathpart incr pp } set targetpath [join $targetpath_parts /] if {$name eq ""} { set name [lindex $targetpath_parts end] } #we do the same encoding checks on tags and comments to increase chances of portability set tags [list] set comments [list] switch -- [llength $segments] { 2 { #no tags or comments } 3 { #only 3 sections - last is comment - even if looks like tags #to make the 3rd part a tagset, an extra # would be needed set comments [list [lindex $segments 2]] } default { set tagset [lindex $segments 2] if {$tagset eq ""} { #ok - no tags } else { if {[string first @ $tagset] != 0} { set err "fauxlink::resolve '$link' invalid tagset in 3rd #-delimited segment" append err \n " - must begin with @" append err $err_extra error $err } else { set tagset [string range $tagset 1 end] set rawtags [split $tagset @] set tags [list] foreach t $rawtags { if {$t eq ""} { lappend tags "" } else { set tagtest [tcl::string::map $encode_map $t] if {[tcl::string::length $tagtest] ne [tcl::string::length $t]} { set err "fauxlink::resolve '$link' invalid chars in tag [llength $tags]" append err [Segment_mustencode_check $t] append err $err_extra error $err } lappend tags [tcl::string::map $decode_map [decode_unicode_escapes $t]] } } } } set rawcomments [lrange $segments 3 end] #set comments [lsearch -all -inline -not $comments ""] set comments [list] foreach c $rawcomments { if {$c eq ""} {continue} set commenttest [tcl::string::map $encode_map $c] if {[tcl::string::length $commenttest] ne [tcl::string::length $c]} { set err "fauxlink::resolve '$link' invalid chars in comment [llength $comments]" append err [Segment_mustencode_check $c] append err $err_extra error $err } lappend comments [tcl::string::map $decode_map [decode_unicode_escapes $c]] } } } set data [dict create name $name targetpath $targetpath tags $tags comments $comments fauxlinkextension $extension_name] if {$is_fauxlink} { #standard .fxlnk or .fauxlink return $data } else { #custom extension - or called in error on wrong type of file but happened to parse. #see comments at top regarding is_fauxlink #make sure no keys in common at top level. return [dict create\ linktype $extension_name\ note "nonstandard extension returning nonstandard dict with result in data key"\ data $data\ ] } } variable map #default exclusion of / (%U2f and equivs) #this would allow obfuscation of intention - when we have + for that anyway proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { variable map set ucstart [string first %U $str 0] if {$ucstart < 0} { return $str } set max 8 set map [list] set strend [expr {[string length $str]-1}] while {$ucstart >= 0} { set s $ucstart set i [expr {$s +2}] ;#skip the %U set hex "" while {[tcl::string::length $hex] < 8 && $i <= $strend} { set in [string index $str $i] if {[tcl::string::is xdigit -strict $in]} { append hex $in } else { break } incr i } if {$hex ne ""} { incr i -1 lappend map $s $i $hex } set ucstart [tcl::string::first %U $str $i] } set out "" set lastidx -1 set e 0 foreach {s e hex} $map { append out [string range $str $lastidx+1 $s-1] set sub [format %c 0x$hex] if {$sub in $exclusions} { append out %U$hex ;#put it back } else { append out $sub } set lastidx $e } if {$e < [tcl::string::length $str]-1} { append out [string range $str $e+1 end] } return $out } proc link_as {name target} { } #proc sample1 {p1 args} { # #*** !doctools # #[call [fun sample1] [arg p1] [opt {?option value...?}]] # #[para]Description of sample1 # return "ok" #} #*** !doctools #[list_end] [comment {--- end definitions namespace fauxlink ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Secondary API namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink::lib { namespace export {[a-z]*}; # Convention: export all lowercase namespace path [namespace parent] #*** !doctools #[subsection {Namespace fauxlink::lib}] #[para] Secondary functions that are part of the API #[list_begin definitions] #proc utility1 {p1 args} { # #*** !doctools # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[para]Description of utility1 # return 1 #} #*** !doctools #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Internal] namespace eval fauxlink::system { #*** !doctools #[subsection {Namespace fauxlink::system}] #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide fauxlink [namespace eval fauxlink { variable pkg fauxlink variable version set version 0.1.1 }] return #*** !doctools #[manpage_end]