You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
568 lines
21 KiB
568 lines
21 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2024 |
|
# |
|
# @@ Meta Begin |
|
# Application fauxlink 0.1.1 |
|
# Meta platform tcl |
|
# Meta license MIT |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin fauxlink_module_fauxlink 0 0.1.1] |
|
#[copyright "2024"] |
|
#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {.fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] |
|
#[require fauxlink] |
|
#[keywords symlink faux fake shortcut toml] |
|
#[description] |
|
#[para] A cross platform shortcut/symlink alternative. |
|
#[para] Unapologetically ugly - but practical in certain circumstances. |
|
#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as |
|
#[para] archiving and packaging systems. |
|
#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. |
|
#[para] format of name <nominalname>#<encodedtarget>.fauxlink |
|
#[para] where <nominalname> can be empty - then the effective nominal name is the tail of the <encodedtarget> |
|
#[para] The file extension must be .fauxlink or .fxlnk |
|
#[para] The + symbol substitutes for forward-slashes. |
|
#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) |
|
#[para] We deliberately treat higher % sequences literally. |
|
#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [lb]heart[rb]) can remain literal for linking to urls. |
|
#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 |
|
#[para] e.g a link to a file file#A.txt in parent dir could be: |
|
#[para] file%23A.txt#..+file%23A.txt.fauxlink |
|
#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fauxlink |
|
#[para] The <nominalname> can be unrelated to the actual target |
|
#[para] e.g datafile.dat#..+file%23A.txt.fauxlink |
|
#[para] This system has no filesystem support - and must be completely application driven. |
|
#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. |
|
#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined |
|
#[para] Extensions to behaviour should be added in the file as text data in Toml format, |
|
#[para] with custom data being under a single application-chosen table name |
|
#[para] The toplevel Toml table [lb]fauxlink[rb] is reserved for core extensions to this system. |
|
#[para] Aside from the 2 used for delimiting (+ #) |
|
#[para] certain characters which might normally be allowed in filesystems are required to be encoded |
|
#[para] e.g space and tab are required to be %20 %09 |
|
#[para] Others that require encoding are: * ? \ / | : ; " < > |
|
#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. |
|
#[para] Control characters and other punctuation is optional to encode. |
|
#[para] Generally utf-8 should be used where possible and unicode characters can often be left unencoded on modern systems. |
|
#[para] Where encoding of unicode is desired in the nominalname,encodedtarget,tag or comment portions it can be specified as %UXXXXXXXX |
|
#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. |
|
#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest |
|
# |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded |
|
# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. |
|
#Using fauxlink - a link would be: |
|
# "my-program-files#++server+c+Program%20Files.fauxlink" |
|
#If we needed the old-style literal %20 it would become |
|
# "my-program-files#++server+c+Program%2520Files.fauxlink" |
|
# |
|
# The file:// scheme on windows supposedly *does* decode %xx (for use in a browser) |
|
# e.g |
|
# pfiles#file%3a++++localhost+c+Program%2520files |
|
# The browser will work with literal spaces too though - so it could just as well be: |
|
# pfiles#file%3a++++localhost+c+Program%20files |
|
#windows may default to using explorer.exe instead of a browser for file:// urls though |
|
#and explorer doesn't want the literal %20. It probably depends what API the file:// url is to be passed to? |
|
#in a .url shortcut either literal space or %20 will work ie %xx values are decoded |
|
|
|
|
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of fauxlink |
|
#[subsection Concepts] |
|
#[para] - |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by fauxlink |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
#*** !doctools |
|
#[item] [package {Tcl 8.6-}] |
|
|
|
# #package require frobz |
|
# #*** !doctools |
|
# #[item] [package {frobz}] |
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# oo::class namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval fauxlink::class { |
|
#*** !doctools |
|
#[subsection {Namespace fauxlink::class}] |
|
#[para] class definitions |
|
if {[info commands [namespace current]::interface_sample1] eq ""} { |
|
#*** !doctools |
|
#[list_begin enumerated] |
|
|
|
# oo::class create interface_sample1 { |
|
# #*** !doctools |
|
# #[enum] CLASS [class interface_sample1] |
|
# #[list_begin definitions] |
|
|
|
# method test {arg1} { |
|
# #*** !doctools |
|
# #[call class::interface_sample1 [method test] [arg arg1]] |
|
# #[para] test method |
|
# puts "test: $arg1" |
|
# } |
|
|
|
# #*** !doctools |
|
# #[list_end] [comment {-- end definitions interface_sample1}] |
|
# } |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end class enumeration ---}] |
|
} |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval fauxlink { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
|
|
#todo - enforce utf-8 |
|
|
|
#literal unicode chars supported by modern filesystems - leave as is - REVIEW |
|
|
|
|
|
variable encode_map |
|
variable decode_map |
|
#most filesystems don't allow NULL - map to empty string |
|
|
|
#Make sure % is not in encode_map |
|
set encode_map [dict create\ |
|
\x00 ""\ |
|
{ } %20\ |
|
\t %09\ |
|
+ %2B\ |
|
# %23\ |
|
* %2A\ |
|
? %3F\ |
|
\\ %5C\ |
|
/ %2F\ |
|
| %7C\ |
|
: %3A\ |
|
{;} %3B\ |
|
{"} %22\ |
|
< %3C\ |
|
> %3E\ |
|
] |
|
#above have some overlap with ctrl codes below. |
|
#no big deal as it's a dict |
|
|
|
#must_encode |
|
# + # * ? \ / | : ; " < > <sp> \t |
|
# also NUL to empty string |
|
|
|
# also ctrl chars 01 to 1F (1..31) |
|
for {set i 1} {$i < 32} {incr i} { |
|
set ch [format %c $i] |
|
set enc "%[format %02X $i]" |
|
set enc_lower [string tolower $enc] |
|
dict set encode_map $ch $enc |
|
dict set decode_map $enc $ch |
|
dict set decode_map $enc_lower $ch |
|
} |
|
|
|
variable must_encode |
|
set must_encode [dict keys $encode_map] |
|
|
|
|
|
#if they are in |
|
|
|
#decode map doesn't include |
|
# %00 (nul) |
|
# %2F "/" |
|
# %2f "/" |
|
# %7f (del) |
|
#we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. |
|
# |
|
set decode_map [dict merge $decode_map [dict create\ |
|
%09 \t\ |
|
%20 { }\ |
|
%21 "!"\ |
|
%22 {"}\ |
|
%23 "#"\ |
|
%24 "$"\ |
|
%25 "%"\ |
|
%26 "&"\ |
|
%27 "'"\ |
|
%28 "("\ |
|
%29 ")"\ |
|
%2A "*"\ |
|
%2a "*"\ |
|
%2B "+"\ |
|
%2b "+"\ |
|
%2C ","\ |
|
%2c ","\ |
|
%2D "-"\ |
|
%2d "-"\ |
|
%2E "."\ |
|
%2e "."\ |
|
%3A ":"\ |
|
%3a ":"\ |
|
%3B {;}\ |
|
%3b {;}\ |
|
%3D "="\ |
|
%3C "<"\ |
|
%3c "<"\ |
|
%3d "="\ |
|
%3E ">"\ |
|
%3e ">"\ |
|
%3F "?"\ |
|
%3f "?"\ |
|
%40 "@"\ |
|
%5B "\["\ |
|
%5b "\["\ |
|
%5C "\\"\ |
|
%5c "\\"\ |
|
%5D "\]"\ |
|
%5d "\]"\ |
|
%5E "^"\ |
|
%5e "^"\ |
|
%60 "`"\ |
|
%7B "{"\ |
|
%7b "{"\ |
|
%7C "|"\ |
|
%7c "|"\ |
|
%7D "}"\ |
|
%7d "}"\ |
|
%7E "~"\ |
|
%7e "~"\ |
|
]] |
|
#Don't go above 7f |
|
#if we want to specify p |
|
|
|
|
|
#*** !doctools |
|
#[subsection {Namespace fauxlink}] |
|
#[para] Core API functions for fauxlink |
|
#[list_begin definitions] |
|
proc Segment_mustencode_check {str} { |
|
variable decode_map |
|
variable encode_map ;#must_encode |
|
set idx 0 |
|
set err "" |
|
foreach ch [split $str ""] { |
|
if {[dict exists $encode_map $ch]} { |
|
set enc [dict get $encode_map $ch] |
|
if {[dict exists $decode_map $enc]} { |
|
append err " char $idx should be encoded as $enc" \n |
|
} else { |
|
append err " no %xx encoding available. Use %UXX if really required" \n |
|
} |
|
} |
|
incr idx |
|
} |
|
return $err ;#empty string if ok |
|
} |
|
|
|
proc resolve {link} { |
|
variable decode_map |
|
variable encode_map |
|
variable must_encode |
|
set ftail [file tail $link] |
|
set extension_name [string range [file extension $ftail] 1 end] |
|
if {$extension_name ni [list fxlnk fauxlink]} { |
|
set is_fauxlink 0 |
|
#we'll process anyway - but return the result wrapped |
|
#This should allow deliberate erroring for the calling dict user if the extension difference is inadvertent |
|
#(e.g blindly processing all files in a folder that is normally only .fauxlink files - but then something added that happens |
|
# to have # characters in it) |
|
#It also means if someone really wants to use the fauxlink semantics on a different file type |
|
# - they can - but just have to access the results differently and take that (minor) risk. |
|
#error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" |
|
set err_extra "\nnonstandard extension '$extension_name' for fauxlink. (expected .fxlnk or .fauxlink) Check that the call to fauxlink::resolve was deliberate" |
|
} else { |
|
set is_fauxlink 1 |
|
set err_extra "" |
|
} |
|
set linkspec [file rootname $ftail] |
|
# - any # or + within the target path or name should have been uri encoded as %23 and %2b |
|
if {[tcl::string::first # $linkspec] < 0} { |
|
set err "fauxlink::resolve '$link'. Link must contain a # (usually at start if name matches target)" |
|
append err $err_extra |
|
error $err |
|
} |
|
#The 1st 2 parts of split on # are name and target file/dir |
|
#If there are only 3 parts the 3rd part is a comment and there are no 'tags' |
|
#if there are 4 parts - the 3rd part is a tagset where each tag begins with @ |
|
#and each subsequent part is a comment. Empty comments are stripped from the comments list |
|
#A tagset can be empty - but if it's not empty it must contain at least one @ and must start with @ |
|
#e.g name.txt#path#@tag1@tag2#test###.fauxlink |
|
#has a name, a target, 2 tags and one comment |
|
|
|
#check namespec already has required chars encoded |
|
set segments [split $linkspec #] |
|
lassign $segments namespec targetspec |
|
#puts stderr "-->namespec $namespec" |
|
set nametest [tcl::string::map $encode_map $namespec] |
|
#puts stderr "-->nametest $nametest" |
|
#nothing should be changed - if there are unencoded chars that must be encoded it is an error |
|
if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { |
|
set err "fauxlink::resolve '$link' invalid chars in name part (section prior to first #)" |
|
append err [Segment_mustencode_check $namespec] |
|
append err $err_extra |
|
error $err |
|
} |
|
#see comments below regarding 2 rounds and ordering. |
|
set name [decode_unicode_escapes $namespec] |
|
set name [tcl::string::map $decode_map $name] |
|
#puts stderr "-->name: $name" |
|
|
|
set targetsegment [split $targetspec +] |
|
#check each + delimited part of targetspec already has required chars encoded |
|
set pp 0 ;#pathpart index |
|
set targetpath_parts [list] |
|
foreach pathpart $targetsegment { |
|
set targettest [tcl::string::map $encode_map $pathpart] |
|
if {[tcl::string::length $targettest] ne [tcl::string::length $pathpart]} { |
|
set err "fauxlink::resolve '$link' invalid chars in targetpath (section following first #)" |
|
append err [Segment_mustencode_check $pathpart] |
|
append err $err_extra |
|
error $err |
|
} |
|
#2 rounds of substitution is possibly asking for trouble.. |
|
#We allow anything in the resultant segments anyway (as %UXXXX... allows all) |
|
#so it's not so much about what can be encoded, |
|
# - but it makes it harder to reason about for users |
|
# In particular - if we map %XX first it makes %25 -> % substitution tricky |
|
# if the user requires a literal %UXXX - they can't do %25UXXX |
|
# the double sub would make it %UXXX -> somechar anyway. |
|
#we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. |
|
#There is still the opportunity to use things like %U00000025 followed by hex-chars |
|
# and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW |
|
set pathpart [decode_unicode_escapes $pathpart] |
|
set pathpart [tcl::string::map $decode_map $pathpart] |
|
lappend targetpath_parts $pathpart |
|
|
|
incr pp |
|
} |
|
set targetpath [join $targetpath_parts /] |
|
if {$name eq ""} { |
|
set name [lindex $targetpath_parts end] |
|
} |
|
#we do the same encoding checks on tags and comments to increase chances of portability |
|
set tags [list] |
|
set comments [list] |
|
switch -- [llength $segments] { |
|
2 { |
|
#no tags or comments |
|
} |
|
3 { |
|
#only 3 sections - last is comment - even if looks like tags |
|
#to make the 3rd part a tagset, an extra # would be needed |
|
set comments [list [lindex $segments 2]] |
|
} |
|
default { |
|
set tagset [lindex $segments 2] |
|
if {$tagset eq ""} { |
|
#ok - no tags |
|
} else { |
|
if {[string first @ $tagset] != 0} { |
|
set err "fauxlink::resolve '$link' invalid tagset in 3rd #-delimited segment" |
|
append err \n " - must begin with @" |
|
append err $err_extra |
|
error $err |
|
} else { |
|
set tagset [string range $tagset 1 end] |
|
set rawtags [split $tagset @] |
|
set tags [list] |
|
foreach t $rawtags { |
|
if {$t eq ""} { |
|
lappend tags "" |
|
} else { |
|
set tagtest [tcl::string::map $encode_map $t] |
|
if {[tcl::string::length $tagtest] ne [tcl::string::length $t]} { |
|
set err "fauxlink::resolve '$link' invalid chars in tag [llength $tags]" |
|
append err [Segment_mustencode_check $t] |
|
append err $err_extra |
|
error $err |
|
} |
|
lappend tags [tcl::string::map $decode_map [decode_unicode_escapes $t]] |
|
} |
|
} |
|
} |
|
} |
|
set rawcomments [lrange $segments 3 end] |
|
#set comments [lsearch -all -inline -not $comments ""] |
|
set comments [list] |
|
foreach c $rawcomments { |
|
if {$c eq ""} {continue} |
|
set commenttest [tcl::string::map $encode_map $c] |
|
if {[tcl::string::length $commenttest] ne [tcl::string::length $c]} { |
|
set err "fauxlink::resolve '$link' invalid chars in comment [llength $comments]" |
|
append err [Segment_mustencode_check $c] |
|
append err $err_extra |
|
error $err |
|
} |
|
lappend comments [tcl::string::map $decode_map [decode_unicode_escapes $c]] |
|
} |
|
} |
|
} |
|
|
|
set data [dict create name $name targetpath $targetpath tags $tags comments $comments fauxlinkextension $extension_name] |
|
if {$is_fauxlink} { |
|
#standard .fxlnk or .fauxlink |
|
return $data |
|
} else { |
|
#custom extension - or called in error on wrong type of file but happened to parse. |
|
#see comments at top regarding is_fauxlink |
|
#make sure no keys in common at top level. |
|
return [dict create\ |
|
linktype $extension_name\ |
|
note "nonstandard extension returning nonstandard dict with result in data key"\ |
|
data $data\ |
|
] |
|
} |
|
} |
|
variable map |
|
|
|
#default exclusion of / (%U2f and equivs) |
|
#this would allow obfuscation of intention - when we have + for that anyway |
|
proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { |
|
variable map |
|
set ucstart [string first %U $str 0] |
|
if {$ucstart < 0} { |
|
return $str |
|
} |
|
set max 8 |
|
set map [list] |
|
set strend [expr {[string length $str]-1}] |
|
while {$ucstart >= 0} { |
|
set s $ucstart |
|
set i [expr {$s +2}] ;#skip the %U |
|
set hex "" |
|
while {[tcl::string::length $hex] < 8 && $i <= $strend} { |
|
set in [string index $str $i] |
|
if {[tcl::string::is xdigit -strict $in]} { |
|
append hex $in |
|
} else { |
|
break |
|
} |
|
incr i |
|
} |
|
if {$hex ne ""} { |
|
incr i -1 |
|
lappend map $s $i $hex |
|
} |
|
set ucstart [tcl::string::first %U $str $i] |
|
} |
|
set out "" |
|
set lastidx -1 |
|
set e 0 |
|
foreach {s e hex} $map { |
|
append out [string range $str $lastidx+1 $s-1] |
|
set sub [format %c 0x$hex] |
|
if {$sub in $exclusions} { |
|
append out %U$hex ;#put it back |
|
} else { |
|
append out $sub |
|
} |
|
set lastidx $e |
|
} |
|
if {$e < [tcl::string::length $str]-1} { |
|
append out [string range $str $e+1 end] |
|
} |
|
return $out |
|
} |
|
proc link_as {name target} { |
|
|
|
} |
|
|
|
#proc sample1 {p1 args} { |
|
# #*** !doctools |
|
# #[call [fun sample1] [arg p1] [opt {?option value...?}]] |
|
# #[para]Description of sample1 |
|
# return "ok" |
|
#} |
|
|
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace fauxlink ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Secondary API namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval fauxlink::lib { |
|
namespace export {[a-z]*}; # Convention: export all lowercase |
|
namespace path [namespace parent] |
|
#*** !doctools |
|
#[subsection {Namespace fauxlink::lib}] |
|
#[para] Secondary functions that are part of the API |
|
#[list_begin definitions] |
|
|
|
#proc utility1 {p1 args} { |
|
# #*** !doctools |
|
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] |
|
# #[para]Description of utility1 |
|
# return 1 |
|
#} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[section Internal] |
|
namespace eval fauxlink::system { |
|
#*** !doctools |
|
#[subsection {Namespace fauxlink::system}] |
|
#[para] Internal functions that are not part of the API |
|
|
|
|
|
|
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide fauxlink [namespace eval fauxlink { |
|
variable pkg fauxlink |
|
variable version |
|
set version 0.1.1 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|