You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

568 lines
21 KiB

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2024
#
# @@ Meta Begin
# Application fauxlink 0.1.1
# Meta platform tcl
# Meta license MIT
# @@ Meta End
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin fauxlink_module_fauxlink 0 0.1.1]
#[copyright "2024"]
#[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}]
#[moddesc {.fauxlink .fxlnk}] [comment {-- Description at end of page heading --}]
#[require fauxlink]
#[keywords symlink faux fake shortcut toml]
#[description]
#[para] A cross platform shortcut/symlink alternative.
#[para] Unapologetically ugly - but practical in certain circumstances.
#[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as
#[para] archiving and packaging systems.
#[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable.
#[para] format of name <nominalname>#<encodedtarget>.fauxlink
#[para] where <nominalname> can be empty - then the effective nominal name is the tail of the <encodedtarget>
#[para] The file extension must be .fauxlink or .fxlnk
#[para] The + symbol substitutes for forward-slashes.
#[para] Other chars can be encoded using url-like encoding - (but only up to %7E !)
#[para] We deliberately treat higher % sequences literally.
#[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [lb]heart[rb]) can remain literal for linking to urls.
#[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23
#[para] e.g a link to a file file#A.txt in parent dir could be:
#[para] file%23A.txt#..+file%23A.txt.fauxlink
#[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fauxlink
#[para] The <nominalname> can be unrelated to the actual target
#[para] e.g datafile.dat#..+file%23A.txt.fauxlink
#[para] This system has no filesystem support - and must be completely application driven.
#[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform.
#[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined
#[para] Extensions to behaviour should be added in the file as text data in Toml format,
#[para] with custom data being under a single application-chosen table name
#[para] The toplevel Toml table [lb]fauxlink[rb] is reserved for core extensions to this system.
#[para] Aside from the 2 used for delimiting (+ #)
#[para] certain characters which might normally be allowed in filesystems are required to be encoded
#[para] e.g space and tab are required to be %20 %09
#[para] Others that require encoding are: * ? \ / | : ; " < >
#[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it.
#[para] Control characters and other punctuation is optional to encode.
#[para] Generally utf-8 should be used where possible and unicode characters can often be left unencoded on modern systems.
#[para] Where encoding of unicode is desired in the nominalname,encodedtarget,tag or comment portions it can be specified as %UXXXXXXXX
#[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character.
#[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest
#
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded
# ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it.
#Using fauxlink - a link would be:
# "my-program-files#++server+c+Program%20Files.fauxlink"
#If we needed the old-style literal %20 it would become
# "my-program-files#++server+c+Program%2520Files.fauxlink"
#
# The file:// scheme on windows supposedly *does* decode %xx (for use in a browser)
# e.g
# pfiles#file%3a++++localhost+c+Program%2520files
# The browser will work with literal spaces too though - so it could just as well be:
# pfiles#file%3a++++localhost+c+Program%20files
#windows may default to using explorer.exe instead of a browser for file:// urls though
#and explorer doesn't want the literal %20. It probably depends what API the file:// url is to be passed to?
#in a .url shortcut either literal space or %20 will work ie %xx values are decoded
#*** !doctools
#[section Overview]
#[para] overview of fauxlink
#[subsection Concepts]
#[para] -
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[subsection dependencies]
#[para] packages used by fauxlink
#[list_begin itemized]
package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6-}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section API]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# oo::class namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval fauxlink::class {
#*** !doctools
#[subsection {Namespace fauxlink::class}]
#[para] class definitions
if {[info commands [namespace current]::interface_sample1] eq ""} {
#*** !doctools
#[list_begin enumerated]
# oo::class create interface_sample1 {
# #*** !doctools
# #[enum] CLASS [class interface_sample1]
# #[list_begin definitions]
# method test {arg1} {
# #*** !doctools
# #[call class::interface_sample1 [method test] [arg arg1]]
# #[para] test method
# puts "test: $arg1"
# }
# #*** !doctools
# #[list_end] [comment {-- end definitions interface_sample1}]
# }
#*** !doctools
#[list_end] [comment {--- end class enumeration ---}]
}
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Base namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval fauxlink {
namespace export {[a-z]*}; # Convention: export all lowercase
#todo - enforce utf-8
#literal unicode chars supported by modern filesystems - leave as is - REVIEW
variable encode_map
variable decode_map
#most filesystems don't allow NULL - map to empty string
#Make sure % is not in encode_map
set encode_map [dict create\
\x00 ""\
{ } %20\
\t %09\
+ %2B\
# %23\
* %2A\
? %3F\
\\ %5C\
/ %2F\
| %7C\
: %3A\
{;} %3B\
{"} %22\
< %3C\
> %3E\
]
#above have some overlap with ctrl codes below.
#no big deal as it's a dict
#must_encode
# + # * ? \ / | : ; " < > <sp> \t
# also NUL to empty string
# also ctrl chars 01 to 1F (1..31)
for {set i 1} {$i < 32} {incr i} {
set ch [format %c $i]
set enc "%[format %02X $i]"
set enc_lower [string tolower $enc]
dict set encode_map $ch $enc
dict set decode_map $enc $ch
dict set decode_map $enc_lower $ch
}
variable must_encode
set must_encode [dict keys $encode_map]
#if they are in
#decode map doesn't include
# %00 (nul)
# %2F "/"
# %2f "/"
# %7f (del)
#we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention.
#
set decode_map [dict merge $decode_map [dict create\
%09 \t\
%20 { }\
%21 "!"\
%22 {"}\
%23 "#"\
%24 "$"\
%25 "%"\
%26 "&"\
%27 "'"\
%28 "("\
%29 ")"\
%2A "*"\
%2a "*"\
%2B "+"\
%2b "+"\
%2C ","\
%2c ","\
%2D "-"\
%2d "-"\
%2E "."\
%2e "."\
%3A ":"\
%3a ":"\
%3B {;}\
%3b {;}\
%3D "="\
%3C "<"\
%3c "<"\
%3d "="\
%3E ">"\
%3e ">"\
%3F "?"\
%3f "?"\
%40 "@"\
%5B "\["\
%5b "\["\
%5C "\\"\
%5c "\\"\
%5D "\]"\
%5d "\]"\
%5E "^"\
%5e "^"\
%60 "`"\
%7B "{"\
%7b "{"\
%7C "|"\
%7c "|"\
%7D "}"\
%7d "}"\
%7E "~"\
%7e "~"\
]]
#Don't go above 7f
#if we want to specify p
#*** !doctools
#[subsection {Namespace fauxlink}]
#[para] Core API functions for fauxlink
#[list_begin definitions]
proc Segment_mustencode_check {str} {
variable decode_map
variable encode_map ;#must_encode
set idx 0
set err ""
foreach ch [split $str ""] {
if {[dict exists $encode_map $ch]} {
set enc [dict get $encode_map $ch]
if {[dict exists $decode_map $enc]} {
append err " char $idx should be encoded as $enc" \n
} else {
append err " no %xx encoding available. Use %UXX if really required" \n
}
}
incr idx
}
return $err ;#empty string if ok
}
proc resolve {link} {
variable decode_map
variable encode_map
variable must_encode
set ftail [file tail $link]
set extension_name [string range [file extension $ftail] 1 end]
if {$extension_name ni [list fxlnk fauxlink]} {
set is_fauxlink 0
#we'll process anyway - but return the result wrapped
#This should allow deliberate erroring for the calling dict user if the extension difference is inadvertent
#(e.g blindly processing all files in a folder that is normally only .fauxlink files - but then something added that happens
# to have # characters in it)
#It also means if someone really wants to use the fauxlink semantics on a different file type
# - they can - but just have to access the results differently and take that (minor) risk.
#error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink"
set err_extra "\nnonstandard extension '$extension_name' for fauxlink. (expected .fxlnk or .fauxlink) Check that the call to fauxlink::resolve was deliberate"
} else {
set is_fauxlink 1
set err_extra ""
}
set linkspec [file rootname $ftail]
# - any # or + within the target path or name should have been uri encoded as %23 and %2b
if {[tcl::string::first # $linkspec] < 0} {
set err "fauxlink::resolve '$link'. Link must contain a # (usually at start if name matches target)"
append err $err_extra
error $err
}
#The 1st 2 parts of split on # are name and target file/dir
#If there are only 3 parts the 3rd part is a comment and there are no 'tags'
#if there are 4 parts - the 3rd part is a tagset where each tag begins with @
#and each subsequent part is a comment. Empty comments are stripped from the comments list
#A tagset can be empty - but if it's not empty it must contain at least one @ and must start with @
#e.g name.txt#path#@tag1@tag2#test###.fauxlink
#has a name, a target, 2 tags and one comment
#check namespec already has required chars encoded
set segments [split $linkspec #]
lassign $segments namespec targetspec
#puts stderr "-->namespec $namespec"
set nametest [tcl::string::map $encode_map $namespec]
#puts stderr "-->nametest $nametest"
#nothing should be changed - if there are unencoded chars that must be encoded it is an error
if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} {
set err "fauxlink::resolve '$link' invalid chars in name part (section prior to first #)"
append err [Segment_mustencode_check $namespec]
append err $err_extra
error $err
}
#see comments below regarding 2 rounds and ordering.
set name [decode_unicode_escapes $namespec]
set name [tcl::string::map $decode_map $name]
#puts stderr "-->name: $name"
set targetsegment [split $targetspec +]
#check each + delimited part of targetspec already has required chars encoded
set pp 0 ;#pathpart index
set targetpath_parts [list]
foreach pathpart $targetsegment {
set targettest [tcl::string::map $encode_map $pathpart]
if {[tcl::string::length $targettest] ne [tcl::string::length $pathpart]} {
set err "fauxlink::resolve '$link' invalid chars in targetpath (section following first #)"
append err [Segment_mustencode_check $pathpart]
append err $err_extra
error $err
}
#2 rounds of substitution is possibly asking for trouble..
#We allow anything in the resultant segments anyway (as %UXXXX... allows all)
#so it's not so much about what can be encoded,
# - but it makes it harder to reason about for users
# In particular - if we map %XX first it makes %25 -> % substitution tricky
# if the user requires a literal %UXXX - they can't do %25UXXX
# the double sub would make it %UXXX -> somechar anyway.
#we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere.
#There is still the opportunity to use things like %U00000025 followed by hex-chars
# and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW
set pathpart [decode_unicode_escapes $pathpart]
set pathpart [tcl::string::map $decode_map $pathpart]
lappend targetpath_parts $pathpart
incr pp
}
set targetpath [join $targetpath_parts /]
if {$name eq ""} {
set name [lindex $targetpath_parts end]
}
#we do the same encoding checks on tags and comments to increase chances of portability
set tags [list]
set comments [list]
switch -- [llength $segments] {
2 {
#no tags or comments
}
3 {
#only 3 sections - last is comment - even if looks like tags
#to make the 3rd part a tagset, an extra # would be needed
set comments [list [lindex $segments 2]]
}
default {
set tagset [lindex $segments 2]
if {$tagset eq ""} {
#ok - no tags
} else {
if {[string first @ $tagset] != 0} {
set err "fauxlink::resolve '$link' invalid tagset in 3rd #-delimited segment"
append err \n " - must begin with @"
append err $err_extra
error $err
} else {
set tagset [string range $tagset 1 end]
set rawtags [split $tagset @]
set tags [list]
foreach t $rawtags {
if {$t eq ""} {
lappend tags ""
} else {
set tagtest [tcl::string::map $encode_map $t]
if {[tcl::string::length $tagtest] ne [tcl::string::length $t]} {
set err "fauxlink::resolve '$link' invalid chars in tag [llength $tags]"
append err [Segment_mustencode_check $t]
append err $err_extra
error $err
}
lappend tags [tcl::string::map $decode_map [decode_unicode_escapes $t]]
}
}
}
}
set rawcomments [lrange $segments 3 end]
#set comments [lsearch -all -inline -not $comments ""]
set comments [list]
foreach c $rawcomments {
if {$c eq ""} {continue}
set commenttest [tcl::string::map $encode_map $c]
if {[tcl::string::length $commenttest] ne [tcl::string::length $c]} {
set err "fauxlink::resolve '$link' invalid chars in comment [llength $comments]"
append err [Segment_mustencode_check $c]
append err $err_extra
error $err
}
lappend comments [tcl::string::map $decode_map [decode_unicode_escapes $c]]
}
}
}
set data [dict create name $name targetpath $targetpath tags $tags comments $comments fauxlinkextension $extension_name]
if {$is_fauxlink} {
#standard .fxlnk or .fauxlink
return $data
} else {
#custom extension - or called in error on wrong type of file but happened to parse.
#see comments at top regarding is_fauxlink
#make sure no keys in common at top level.
return [dict create\
linktype $extension_name\
note "nonstandard extension returning nonstandard dict with result in data key"\
data $data\
]
}
}
variable map
#default exclusion of / (%U2f and equivs)
#this would allow obfuscation of intention - when we have + for that anyway
proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} {
variable map
set ucstart [string first %U $str 0]
if {$ucstart < 0} {
return $str
}
set max 8
set map [list]
set strend [expr {[string length $str]-1}]
while {$ucstart >= 0} {
set s $ucstart
set i [expr {$s +2}] ;#skip the %U
set hex ""
while {[tcl::string::length $hex] < 8 && $i <= $strend} {
set in [string index $str $i]
if {[tcl::string::is xdigit -strict $in]} {
append hex $in
} else {
break
}
incr i
}
if {$hex ne ""} {
incr i -1
lappend map $s $i $hex
}
set ucstart [tcl::string::first %U $str $i]
}
set out ""
set lastidx -1
set e 0
foreach {s e hex} $map {
append out [string range $str $lastidx+1 $s-1]
set sub [format %c 0x$hex]
if {$sub in $exclusions} {
append out %U$hex ;#put it back
} else {
append out $sub
}
set lastidx $e
}
if {$e < [tcl::string::length $str]-1} {
append out [string range $str $e+1 end]
}
return $out
}
proc link_as {name target} {
}
#proc sample1 {p1 args} {
# #*** !doctools
# #[call [fun sample1] [arg p1] [opt {?option value...?}]]
# #[para]Description of sample1
# return "ok"
#}
#*** !doctools
#[list_end] [comment {--- end definitions namespace fauxlink ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Secondary API namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval fauxlink::lib {
namespace export {[a-z]*}; # Convention: export all lowercase
namespace path [namespace parent]
#*** !doctools
#[subsection {Namespace fauxlink::lib}]
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
#*** !doctools
#[list_end] [comment {--- end definitions namespace fauxlink::lib ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Internal]
namespace eval fauxlink::system {
#*** !doctools
#[subsection {Namespace fauxlink::system}]
#[para] Internal functions that are not part of the API
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
package provide fauxlink [namespace eval fauxlink {
variable pkg fauxlink
variable version
set version 0.1.1
}]
return
#*** !doctools
#[manpage_end]