You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
926 lines
38 KiB
926 lines
38 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2023 |
|
# |
|
# @@ Meta Begin |
|
# Application punk::path 999999.0a1.0 |
|
# Meta platform tcl |
|
# Meta license <unspecified> |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin punkshell_module_punk::path 0 999999.0a1.0] |
|
#[copyright "2023"] |
|
#[titledesc {Filesystem path utilities}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {punk path filesystem utils}] [comment {-- Description at end of page heading --}] |
|
#[require punk::path] |
|
#[description] |
|
#[keywords module path filesystem] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of punk::path |
|
#[para] Filesystem path utility functions |
|
#[subsection Concepts] |
|
#[para] - |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by punk::path |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
package require punk::args |
|
#*** !doctools |
|
#[item] [package {Tcl 8.6-}] |
|
#[item] [package {punk::args}] |
|
|
|
# #package require frobz |
|
# #*** !doctools |
|
# #[item] [package {frobz}] |
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# oo::class namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#namespace eval punk::path::class { |
|
#*** !doctools |
|
#[subsection {Namespace punk::path::class}] |
|
#[para] class definitions |
|
#if {[info commands [namespace current]::interface_sample1] eq ""} { |
|
#*** !doctools |
|
#[list_begin enumerated] |
|
|
|
# oo::class create interface_sample1 { |
|
# #*** !doctools |
|
# #[enum] CLASS [class interface_sample1] |
|
# #[list_begin definitions] |
|
|
|
# method test {arg1} { |
|
# #*** !doctools |
|
# #[call class::interface_sample1 [method test] [arg arg1]] |
|
# #[para] test method |
|
# puts "test: $arg1" |
|
# } |
|
|
|
# #*** !doctools |
|
# #[list_end] [comment {-- end definitions interface_sample1}] |
|
# } |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end class enumeration ---}] |
|
#} |
|
#} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval punk::path { |
|
namespace export * |
|
#variable xyz |
|
|
|
#*** !doctools |
|
#[subsection {Namespace punk::path}] |
|
#[para] Core API functions for punk::path |
|
#[list_begin definitions] |
|
|
|
# -- --- |
|
#punk::path::normjoin |
|
# - simplify . and .. segments as far as possible whilst respecting specific types of root. |
|
# -- --- |
|
#a form of file normalize that supports //xxx to be treated as server path names |
|
#(ie regardless of unices ignoring (generally) leading double slashes, and regardless of windows volumerelative path syntax) |
|
#(sometimes //server.com used as a short form for urls - which doesn't seem too incompatible with this anyway) |
|
# -- --- |
|
#This is intended to be purely a string analysis - without reference to filesystem volumes or vfs or zipfs mountpoints etc |
|
# |
|
#TODO - option for caller to provide a -base below which we can't backtrack. |
|
#This is preferable to setting policy here for example regarding forcing no trackback below //servername/share |
|
#Our default is to allow trackback to: |
|
# <scheme>://<something> |
|
# <driveletter>:/ |
|
# //./<volume> (dos device volume) |
|
# //server (while normalizing //./UNC/server to same) |
|
# / (ordinary unix root) |
|
# ./../<repeated> - (track back indefinitely on relpath as we are not resolving to anything physical and can't fully simplify the leading backtracks) |
|
# |
|
#The caller should do the file/vfs operations to determine this - not us. |
|
# -- --- |
|
#simplify path with respect to /./ & /../ elements - independent of platform |
|
#NOTE: "anomalies" in standard tcl processing on windows: |
|
#e.g file normalize {//host} -> c:/host (or e.g d:/host if we happen to be on another volume) |
|
#file normalize {//host/share} -> //host/share |
|
#This is because //host is treated as volume-relative in cmd/powershell and Tcl quite reasonably follows suit. |
|
#This prevents cwd and windows commandlines from pointing to the server (above the share) |
|
#Explorer however does allow pointing to the //server level and seeing shares as if they are directory entries. |
|
#we are more interested in supporting the explorer-like behaviour - as while volumerelative paths are also useful on windows - they are lesser known. |
|
#REVIEW. |
|
#To get back to some consistent cross platform behaviour - we will treat //something as a root/volume i.e we can't backtrack above it with ".." |
|
#note too that file split on UNC paths doesn't give a clear indication of the root |
|
# file split //./UNC/server/share/subpath -> //./UNC server share subpath |
|
# file split //server/share/subpath -> //server/share subpath |
|
#TODO - disallow all change of root or change from relative path to absolute result. |
|
#e.g normjoin relpath/../d:/secret should not return d:/secret - but ./d:/secret |
|
# ================ |
|
#known issues: |
|
#1) |
|
# normjoin d://a//b//c -> d://a/b/c |
|
# This is because we don't detect specific schemes. ie it's treated the same as https://a/b/c -> https://a/b/c |
|
# Not considered a problem - just potentially surprising. |
|
# To avoid it we would have to enumerate possible schemes. |
|
# As it stands a unix system could define a 'scheme' that happens to match windows style driveletters. Consider a 'feature' ? review. |
|
# won't fix? |
|
#2) |
|
# normjoin https:///real.com/../fake.com -> https:///fake.com |
|
# The extra slash means effectively our servername is empty - this is potentially confusing but probably the right thing to do here. |
|
# It's a concern only if upstream treats the tripple slash in this case as valid and maps it to https:// - which would probably be bad anyway. |
|
# won't fix (review) |
|
#3) |
|
#similarly |
|
# normjoin //./UNC//server/share/subpath -> ///server/share/subpath (when 2 or more slashes directly after UNC) |
|
# normjoin ///server/share -> ///server/share |
|
#This is effectively an empty servername in the input with 'server' being pushed one level down - and the output is consistent |
|
# possibly won't fix - review |
|
#4) inconsistency |
|
# we return normalized //server/share for //./UNC/server share |
|
# but other dos device paths are maintained |
|
# e.g //./c:/etc |
|
# This is because such paths could contain alternate segment names (windows shortnames) which we aren't in a position to resolve. |
|
# caller should |
|
# #as with 'case' below - caller will need to run a post 'file normalize' |
|
#5) we don't normalize case like file normalize does on windows platform. |
|
# This is intentional. It could only be done with reference to underlying filesystem which we don't want here. |
|
# |
|
# ================ |
|
# |
|
#relpaths all end up with leading . - while not always the simplest form, this is ok. (helps stop inadvertent conversions to absolutes) |
|
# Tests - TODO |
|
# normjoin /d:/..//vfs:/test -> /vfs:/test (good - not converted to //vfs:/test) |
|
proc normjoin {args} { |
|
set args [lmap a $args {string map "\\\\ /" $a}] |
|
set path [plainjoin {*}$args] |
|
switch -exact $path { |
|
"" { |
|
return "" |
|
} |
|
/ - // { |
|
#treated in unixlike manner - (but leading doubleslashes with subsequent data are server indication) |
|
#// not considered a servername indicator - but /// (for consistency) is. (empty servername?) |
|
return / |
|
} |
|
/// { |
|
#if this is effectively //$emptyservername/ |
|
#then for consistency we should trail //<servername with a slash too? |
|
#we can't transform to // or / |
|
return /// |
|
#assert - code below should return /// (empty server prefix) for any number of leading slashes >=3 |
|
#todo - shortcircuit that here? |
|
} |
|
} |
|
# /// |
|
set doubleslash1_posn [string first // $path] |
|
|
|
# -- --- --- temp warning on windows only - no x-platform difference in result |
|
#on windows //host is of type volumerelative |
|
# whereas //host/share is of type absolute |
|
if {"windows" eq $::tcl_platform(platform) && [file pathtype $path] eq "volumerelative"} { |
|
#volumerelative probably only occurs on windows anyway |
|
if {$doubleslash1_posn == 0} { |
|
#e.g //something where no further slashes |
|
#review - eventually get rid of this warning and require upstream to know the appropriate usecase |
|
puts stderr "Warning - ambiguous path $path - treating as server path - not 'volumerelative'" |
|
} else { |
|
# /something/etc |
|
# /mnt/c/stuff |
|
#output will retain leading / as if on unix. |
|
#on windows - the result would still be interpreted as volumerelative if the caller normalizes it |
|
} |
|
} |
|
# -- --- --- |
|
|
|
set is_relpath 0 |
|
|
|
#set path [string map [list \\ /] $path] |
|
set finalparts [list] |
|
set is_nonunc_dosdevice 0 |
|
if {[punk::winpath::is_dos_device_path $path]} { |
|
#review |
|
if {[string range $path 4 6] eq "UNC"} { |
|
#convert to 'standard' //server/... path for processing |
|
set path "/[string range $path 7 end]" ;# //server/... |
|
} else { |
|
#error "normjoin non-UNC dos device path '$path' not supported" |
|
#first segment after //./ or //?/ represents the volume or drive. |
|
#not applicable to unix - but unlikely to conflict with a genuine usecase there (review) |
|
#we should pass through and stop navigation below //./vol |
|
#!!! |
|
#not anomaly in tcl (continues in tcl9) |
|
#file exists //./c:/test -> 0 |
|
#file exists //?/c:/test -> 1 |
|
#file exists //./BootPartition/Windows -> 1 |
|
#file exists //?/BootPartition/Windows -> 0 |
|
set is_nonunc_dosdevice 1 |
|
} |
|
} |
|
|
|
if {$is_nonunc_dosdevice} { |
|
#dosdevice prefix //./ or //?/ - preserve it (without trailing slash which will be put back in with join) |
|
set prefix [string range $path 0 2] |
|
set tail [string range $path 4 end] |
|
set tailparts [split $tail /] |
|
set parts [concat [list $prefix] $tailparts] |
|
set rootindex 1 ;#disallow backtrack below //./<volume> |
|
} else { |
|
#note use of ordinary ::split vs file split is deliberate. |
|
if {$doubleslash1_posn == 0} { |
|
#this is handled differently on different platforms as far as 'file split' is concerned. |
|
#e.g for file split //sharehost/share/path/etc |
|
#e.g on windows: -> //sharehost/share path |
|
#e.g on freebsd: -> / sharehost share path etc |
|
#however..also on windows: file split //sharehost -> / sharehost |
|
#normalize by dropping leading slash before split - and then treating first 2 segments as a root |
|
#set parts [file split [string range $path 1 end]] |
|
set parts [split $path /] |
|
#assert parts here has {} {} as first 2 entries |
|
set rootindex 2 |
|
#currently prefer can backtrack to the //zipfs:/ scheme (below the mountpoint - to browse other mounts) |
|
#alternative handling for //zipfs:/path - don't go below mountpoint |
|
#but we can't determine just from string if mountpoint is direct subpath or a lower one e.g //zipfs:/arbitraryname/actualmountpoint |
|
#review - more generally //<mountmechanism>:/path ? |
|
#todo - make an option for zipfs and others to determine the 'base' |
|
#if {"zipfs:" eq [lindex $parts 2]} { |
|
# set rootindex 3 |
|
#} |
|
} else { |
|
#path may or may not begin with a single slash here. |
|
#treat same on unix and windows |
|
set rootindex 0 |
|
#set parts [file split $path] |
|
set parts [::split $path /] |
|
#e.g /a/b/c -> {} a b c |
|
#or relative path a/b/c -> a b c |
|
#or c:/a/b/c -> c: a b c |
|
if {[string match *: [lindex $parts 0]]} { |
|
if {[lindex $parts 1] eq ""} { |
|
#scheme://x splits to scheme: {} x |
|
set parts [concat [list [lindex $parts 0]/] [lrange $parts 2 end]] |
|
#e.g {scheme:/ x} |
|
set rootindex 1 ;#disallow below first element of scheme |
|
} else { |
|
set rootindex 0 |
|
} |
|
} elseif {[lindex $parts 0] ne ""} { |
|
#relpath a/b/c |
|
set parts [linsert $parts 0 .] |
|
set rootindex 0 |
|
#allow backtracking arbitrarily for leading .. entries - simplify where possible |
|
#also need to stop possible conversion to absolute path |
|
set is_relpath 1 |
|
} |
|
} |
|
} |
|
set baseparts [lrange $parts 0 $rootindex] ;#base below which we can't retreat via ".." |
|
#puts stderr "-->baseparts:$baseparts" |
|
#ensure that if our rootindex already spans a dotted segment (after the first one) we remove it |
|
#must maintain initial . for relpaths to stop them converting to absolute via backtrack |
|
# |
|
set finalparts [list [lindex $baseparts 0]] |
|
foreach b [lrange $baseparts 1 end] { |
|
if {$b ni {. ..}} { |
|
lappend finalparts $b |
|
} |
|
} |
|
set baselen [expr {$rootindex + 1}] |
|
if {$is_relpath} { |
|
set i [expr {$rootindex+1}] |
|
foreach p [lrange $parts $i end] { |
|
switch -exact -- $p { |
|
. - "" {} |
|
.. { |
|
switch -exact -- [lindex $finalparts end] { |
|
. - .. { |
|
lappend finalparts .. |
|
} |
|
default { |
|
lpop finalparts |
|
} |
|
} |
|
} |
|
default { |
|
lappend finalparts $p |
|
} |
|
} |
|
incr i |
|
} |
|
} else { |
|
foreach p [lrange $parts $rootindex+1 end] { |
|
if {[llength $finalparts] <= $baselen} { |
|
if {$p ni {. .. ""}} { |
|
lappend finalparts $p |
|
} |
|
} else { |
|
switch -exact -- $p { |
|
. - "" {} |
|
.. { |
|
lpop finalparts ;#uses punk::lib::compat::lpop if on < 8.7 |
|
} |
|
default { |
|
lappend finalparts $p |
|
} |
|
} |
|
} |
|
} |
|
} |
|
puts "==>finalparts: '$finalparts'" |
|
# using join - {"" "" server share} -> //server/share and {a b} -> a/b |
|
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { |
|
#backtracking on unix-style path can end up with empty string as only member of finalparts |
|
#e.g /x/.. |
|
return / |
|
} |
|
set result [::join $finalparts /] |
|
#normalize volumes and mountschemes to have trailing slash if no subpath |
|
#e.g c: -> c:/ |
|
#//zipfs: -> //zipfs:/ |
|
if {[set lastchar [string index $result end]] eq ":"} { |
|
if {$result eq "//zipfs:"} { |
|
set result "//zipfs:/" |
|
} else { |
|
if {[string first / $result] < 0} { |
|
set result $result/ |
|
} |
|
} |
|
} elseif {[string match //* $result]} { |
|
if {![punk::winpath::is_dos_device_path $result]} { |
|
#server |
|
set tail [string range $result 2 end] |
|
set tailparts [split $tail /] |
|
if {[llength $tailparts] <=1} { |
|
#empty // or //servername |
|
append result / |
|
} |
|
} |
|
} elseif {[llength $finalparts] == 2} { |
|
if {[string range [lindex $finalparts 0] end-1 end] eq ":/"} { |
|
#e.g https://server/ -> finalparts {https:/ server} |
|
#e.g https:/// -> finalparts {https:/ ""} |
|
#scheme based path should always return trailing slash after server component - even if server component empty. |
|
lappend finalparts "" ;#force trailing / |
|
return [join $finalparts /] |
|
} |
|
} |
|
return $result |
|
} |
|
|
|
proc trim_final_slash {str} { |
|
if {[string index $str end] eq "/"} { |
|
return [string range $str 0 end-1] |
|
} |
|
return $str |
|
} |
|
|
|
|
|
#x-platform - punk::path::pathtype - can be used in safe interps - different concept of pathtypes to 'file pathtype' |
|
# - no volumerelative |
|
# - no lookup of file volumes (volume is a windows concept - but with //zipfs:/ somewhat applicable to other platforms) |
|
# - /* as absolute (covers also //zipfs:/ (volume), //server , //./etc , //./UNC) |
|
# - xxx:// as absolute (scheme) |
|
# - xxx:/ or x:/ as absolute |
|
# - x: xxx: -> as absolute (volume-basic or volume-extended) |
|
|
|
#note also on windows - legacy name for COM devices |
|
# COM1 = COM1: |
|
# //./COM1 ?? review |
|
|
|
proc pathtype {str} { |
|
set str [string map "\\\\ /" $str] |
|
if {[string index $str 0] eq "/"} { |
|
#todo - look for //xxx:/ prefix (generalisation of //zipfs:/) as a 'volume' specifically {volume mount} ?? - review |
|
# look for //server prefix as {absolute server} |
|
# look for //./UNC/server or //?/UNC/server as {absolute server UNC} ? |
|
# look for //./<dosdevice> as {absolute dosdevice} |
|
return absolute |
|
} |
|
|
|
#only firstsegment with single colon at last position (after some non empty string) counts as volume or scheme - review |
|
#e.g a:b:/.. or a::/.. or :/.. is not treated as volume/scheme whereas ab:/ is. |
|
set firstslash [string first / $str] |
|
if {$firstslash == -1} { |
|
set firstsegment $str |
|
} else { |
|
set firstsegment [string range $str 0 $firstslash-1] |
|
} |
|
if {[set firstc [string first : $firstsegment]] > 0} { |
|
set lhs_firstsegment [string range $firstsegment 0 $firstc-1] |
|
set rhs_firstsegment [string range $firstsegment $firstc+1 end] ;#exclude a:b/ etc |
|
if {$rhs_firstsegment eq ""} { |
|
set rhs_entire_path [string range $str $firstc+1 end] |
|
#assert lhs_firstsegment not empty since firstc > 0 |
|
#count following / sequence |
|
set i 0 |
|
set slashes_after_firstsegment "" ;#run of slashes *directly* following first segment |
|
while {$i < [string length $rhs_entire_path]} { |
|
if {[string index $rhs_entire_path $i] eq "/"} { |
|
append slashes_after_firstsegment / |
|
} else { |
|
break |
|
} |
|
incr i |
|
} |
|
switch -exact -- $slashes_after_firstsegment { |
|
"" - / { |
|
if {[string length $lhs_firstsegment] == 1} { |
|
return {absolute volume basic} |
|
} else { |
|
return {absolute volume extended} |
|
} |
|
} |
|
default { |
|
#2 or more / |
|
#this will return 'scheme' even for c:// - even though that may look like a windows volume - review |
|
return {absolute scheme} |
|
} |
|
} |
|
} |
|
} |
|
#assert first element of any return has been absolute or relative |
|
return relative |
|
} |
|
|
|
|
|
proc plain {str} { |
|
set str [string map "\\\\ /" $str] |
|
set pathinfo [punk::path::pathtype $str] |
|
if {[lindex $pathinfo 0] eq "relative" && ![string match ./* $str]} { |
|
set str ./$str |
|
} |
|
if {[string index $str end] eq "/"} { |
|
if {[string map {/ ""} $str] eq ""} { |
|
#all slash segment |
|
return $str |
|
} else { |
|
if {[lindex $pathinfo 1] ni {volume scheme}} { |
|
return [string range $str 0 end-1] |
|
} |
|
} |
|
} |
|
return $str |
|
} |
|
#purely string based - no reference to filesystem knowledge |
|
#unix-style forward slash only |
|
proc plainjoin {args} { |
|
set args [lmap a $args {string map "\\\\ /" $a}] |
|
#if {[llength $args] == 1} { |
|
# return [lindex $args 0] |
|
#} |
|
set out "" |
|
foreach a $args { |
|
if {![string length $out]} { |
|
append out [plain $a] |
|
} else { |
|
set a [plain $a] |
|
if {[string map {/ ""} $out] eq ""} { |
|
set out [string range $out 0 end-1] |
|
} |
|
|
|
if {[string map {/ ""} $a] eq ""} { |
|
#all / segment |
|
append out [string range $a 0 end-1] |
|
} else { |
|
if {[string length $a] > 2 && [string match "./*" $a]} { |
|
set a [string range $a 2 end] |
|
} |
|
if {[string index $out end] eq "/"} { |
|
append out $a |
|
} else { |
|
append out / $a |
|
} |
|
} |
|
} |
|
} |
|
return $out |
|
} |
|
proc plainjoin1 {args} { |
|
if {[llength $args] == 1} { |
|
return [lindex $args 0] |
|
} |
|
set out [trim_final_slash [lindex $args 0]] |
|
foreach a [lrange $args 1 end] { |
|
set a [trim_final_slash $a] |
|
append out / $a |
|
} |
|
return $out |
|
} |
|
|
|
#intention? |
|
#proc filepath_dotted_dirname {path} { |
|
#} |
|
|
|
proc strip_prefixdepth {path prefix} { |
|
if {$prefix eq ""} { |
|
return [norm $path] |
|
} |
|
return [file join \ |
|
{*}[lrange \ |
|
[file split [norm $path]] \ |
|
[llength [file split [norm $prefix]]] \ |
|
end]] |
|
} |
|
|
|
proc pathglob_as_re {pathglob} { |
|
#*** !doctools |
|
#[call [fun pathglob_as_re] [arg pathglob]] |
|
#[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure |
|
#[para] ** matches any number of subdirectories. |
|
#[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) |
|
#[para] e.g /etc/**.txt will match any .txt files at any depth below /etc |
|
#[para] any segment that does not contain ** must match exactly one segment in the path |
|
#[para] e.g the glob /etc/*/*.doc - will match any .doc files that are exactly one tree level below /etc |
|
#[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified. |
|
#[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals |
|
|
|
|
|
#todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? * |
|
# - would require counting immediately-preceding backslashes |
|
set pats [list] |
|
foreach seg [file split $pathglob] { |
|
if {[string range $seg end end] eq "/"} { |
|
set seg [string range $seg 0 end-1] ;# e.g c:/ -> c: / -> "" so that join at end doesn't double up |
|
} |
|
switch -- $seg { |
|
* {lappend pats {[^/]*}} |
|
** {lappend pats {.*}} |
|
default { |
|
set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals |
|
#set seg [string map [list . {[.]}] $seg] |
|
set seg [string map {. [.]} $seg] |
|
if {[regexp {[*?]} $seg]} { |
|
set pat [string map [list ** {.*} * {[^/]*} ? {[^/]}] $seg] |
|
lappend pats "$pat" |
|
} else { |
|
lappend pats "$seg" |
|
} |
|
} |
|
} |
|
} |
|
return "^[join $pats /]\$" |
|
} |
|
proc globmatchpath {pathglob path args} { |
|
#*** !doctools |
|
#[call [fun globmatchpath] [arg pathglob] [arg path] [opt {option value...}]] |
|
#[para] Return true if the pathglob matches the path |
|
#[para] see [fun pathglob_as_re] for pathglob description |
|
#[para] Caller must ensure that file separator is forward slash. (e.g use file normalize on windows) |
|
#[para] |
|
#[para] Known options: |
|
#[para] -nocase 0|1 (default 0 - case sensitive) |
|
#[para] If -nocase is not supplied - default to case sensitive *except for driveletter* |
|
#[para] ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah) |
|
#[para] Explicitly specifying -nocase 0 will require the entire case to match including the driveletter. |
|
|
|
set opts [dict create\ |
|
-nocase \uFFFF\ |
|
] |
|
foreach {k v} $args { |
|
switch -- $k { |
|
-nocase { |
|
dict set opts $k $v |
|
} |
|
default { |
|
error "Unrecognised option '$k'. Known-options: [dict keys $opts]" |
|
} |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_nocase [dict get $opts -nocase] |
|
set explicit_nocase 1 ;#default to disprove |
|
if {$opt_nocase eq "\uFFFF"} { |
|
set opt_nocase 0 |
|
set explicit_nocase 0 |
|
} |
|
# -- --- --- --- --- --- |
|
if {$opt_nocase} { |
|
return [regexp -nocase [pathglob_as_re $pathglob] $path] |
|
} else { |
|
set re [pathglob_as_re $pathglob] |
|
if {$explicit_nocase} { |
|
set ismatch [regexp $re $path] ;#explicit -nocase 0 - require exact match of path literals including driveletter |
|
} else { |
|
#caller is using default for -nocase - which indicates case sensitivity - but we have an exception for the driveletter. |
|
set re_segments [file split $re] ;#Note that file split c:/etc gives {c:/ etc} but file split ^c:/etc gives {^c: etc} |
|
set first_seg [lindex $re_segments 0] |
|
if {[regexp {^\^(.{1}):$} $first_seg _match driveletter]} { |
|
#first part of re is like "^c:" i.e a drive letter |
|
set chars [string tolower $driveletter][string toupper $driveletter] |
|
set re [join [concat "^\[$chars\]:" [lrange $re_segments 1 end]] /] ;#rebuild re with case insensitive driveletter only - use join - not file join. file join will misinterpret leading re segment. |
|
} |
|
#puts stderr "-->re: $re" |
|
set ismatch [regexp $re $path] |
|
} |
|
} |
|
return $ismatch |
|
} |
|
|
|
punk::args::define { |
|
@id -id ::punk::path::treefilenames |
|
-directory -type directory -help\ |
|
"folder in which to begin recursive scan for files." |
|
-call-depth-internal -default 0 -type integer |
|
-antiglob_paths -default {} -help\ |
|
"list of path patterns to exclude |
|
may include * and ** path segments e.g |
|
/usr/** (exlude subfolders based at /usr but not |
|
files within /usr itself) |
|
**/_aside (exlude files where _aside is last segment) |
|
**/_aside/* (exclude folders one below an _aside folder) |
|
**/_aside/** (exclude all folders with _aside as a segment)" |
|
-antiglob_files -default {} |
|
@values -min 0 -max -1 -optional 1 -type string |
|
tailglobs -default * -multiple 1 -help\ |
|
"Patterns to match against filename portion (last segment) of each file path |
|
within the directory tree being searched." |
|
} |
|
|
|
#todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ |
|
#then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase) |
|
proc treefilenames {args} { |
|
#*** !doctools |
|
#[call [fun treefilenames] [opt {option value...}] [opt {globpattern...}]] |
|
#[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive |
|
#[para] options: |
|
#[para] [opt -dir] <path> |
|
#[para] defaults to [lb]pwd[rb] - base path for tree to search |
|
#[para] [opt -antiglob_paths] <list> |
|
#[para] list of path patterns to exclude - may include * and ** path segments e.g /usr/** |
|
#[para]no natsorting - so order is dependent on filesystem |
|
|
|
set argd [punk::args::parse $args withid ::punk::path::treefilenames] |
|
lassign [dict values $argd] leaders opts values received |
|
set tailglobs [dict get $values tailglobs] |
|
# -- --- --- --- --- --- --- |
|
set opt_antiglob_paths [dict get $opts -antiglob_paths] |
|
set opt_antiglob_files [dict get $opts -antiglob_files] |
|
set CALLDEPTH [dict get $opts -call-depth-internal] |
|
# -- --- --- --- --- --- --- |
|
# -- --- --- --- --- --- --- |
|
|
|
set files [list] |
|
if {$CALLDEPTH == 0} { |
|
#set opts [dict merge $opts [list -directory $opt_dir]] |
|
if {![dict exists $received -directory]} { |
|
set opt_dir [pwd] |
|
} else { |
|
set opt_dir [dict get $opts -directory] |
|
} |
|
if {![file isdirectory $opt_dir]} { |
|
return [list] |
|
} |
|
} else { |
|
#assume/require to exist in any recursive call |
|
set opt_dir [dict get $opts -directory] |
|
} |
|
|
|
set skip 0 |
|
foreach anti $opt_antiglob_paths { |
|
if {[globmatchpath $anti $opt_dir]} { |
|
set skip 1 |
|
break |
|
} |
|
} |
|
if {$skip} { |
|
return [list] |
|
} |
|
|
|
#todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match? |
|
if {[catch {glob -nocomplain -dir $opt_dir -type f {*}$tailglobs} matches]} { |
|
#we can get for example a permissions error |
|
puts stderr "treefilenames error while listing files in dir $opt_dir\n $matches" |
|
set dirfiles [list] |
|
} else { |
|
set retained [list] |
|
if {[llength $opt_antiglob_files]} { |
|
foreach m $matches { |
|
set skip 0 |
|
set ftail [file tail $m] |
|
foreach anti $opt_antiglob_files { |
|
if {[string match $anti $ftail]} { |
|
set skip 1; break |
|
} |
|
} |
|
if {!$skip} { |
|
lappend retained $m |
|
} |
|
} |
|
} else { |
|
set retained $matches |
|
} |
|
set dirfiles [lsort $retained] |
|
} |
|
|
|
lappend files {*}$dirfiles |
|
if {[catch {glob -nocomplain -dir $opt_dir -type d *} dirdirs]} { |
|
puts stderr "treefilenames error while listing subdirs in dir $opt_dir\n $dirdirs" |
|
set dirdirs [list] |
|
} |
|
|
|
foreach dir $dirdirs { |
|
set skip 0 |
|
foreach anti $opt_antiglob_paths { |
|
if {[globmatchpath $anti $dir]} { |
|
set skip 1 |
|
break |
|
} |
|
} |
|
if {$skip} { |
|
continue |
|
} |
|
set nextopts [dict merge $opts [list -directory $dir -call-depth-internal [incr CALLDEPTH]]] |
|
lappend files {*}[treefilenames {*}$nextopts {*}$tailglobs] |
|
} |
|
return $files |
|
} |
|
|
|
#maint warning - also in punkcheck |
|
proc relative {reference location} { |
|
#*** !doctools |
|
#[call [fun relative] [arg reference] [arg location]] |
|
#[para] Taking two directory paths, a reference and a location, computes the path |
|
# of the location relative to the reference. |
|
#[list_begin itemized] |
|
#[item] |
|
#[para] Arguments: |
|
# [list_begin arguments] |
|
# [arg_def string reference] The path from which the relative path to location is determined. |
|
# [arg_def string location] The location path which may be above or below the reference path |
|
# [list_end] |
|
#[item] |
|
#[para] Results: |
|
#[para] The relative path of the location to the reference path. |
|
#[para] Will return a single dot "." if the paths are the same |
|
#[item] |
|
#[para] Notes: |
|
#[para] Both paths must be the same type - ie both absolute or both relative |
|
#[para] Case sensitive. ie punk::path::relative /etc /etC |
|
# will return ../etC |
|
#[para] On windows, the drive-letter component (only) is not case sensitive |
|
#[example_begin] |
|
# P% punk::path::relative c:/etc C:/etc |
|
# - . |
|
#[example_end] |
|
#[para] The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail |
|
#[example_begin] |
|
# P% punk::path::relative c:/etc C:/Etc |
|
# - ../Etc |
|
#[example_end] |
|
#[para] On windows, if the paths are absolute and specifiy different volumes, only the location will be returned. |
|
#[example_begin] |
|
# P% punk::path::relative c:/etc d:/etc/blah |
|
# - d:/etc/blah |
|
#[example_end] |
|
#[para] Unix-like examples: |
|
#[example_begin] |
|
# P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below |
|
# - somewhere/below |
|
# P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here |
|
# - ../../lib/here |
|
#[example_end] |
|
#[list_end] |
|
|
|
#see also kettle |
|
# Modified copy of ::fileutil::relative (tcllib) |
|
# Adapted to 8.5 ({*}). |
|
|
|
#review - check volume info on windows.. UNC paths? |
|
if {[file pathtype $reference] ne [file pathtype $location]} { |
|
return -code error "Unable to compute relation for paths of different pathtypes: [file pathtype $reference] vs. [file pathtype $location], ($reference vs. $location)" |
|
} |
|
|
|
#avoid normalizing if possible (file normalize *very* expensive on windows) |
|
set do_normalize 0 |
|
if {[file pathtype $reference] eq "relative"} { |
|
#if reference is relative so is location |
|
if {[regexp {[.]{2}} [list $reference $location]]} { |
|
set do_normalize 1 |
|
} |
|
if {[regexp {[.]/} [list $reference $location]]} { |
|
set do_normalize 1 |
|
} |
|
} else { |
|
set do_normalize 1 |
|
} |
|
if {$do_normalize} { |
|
set reference [file normalize $reference] |
|
set location [file normalize $location] |
|
} |
|
|
|
set save $location |
|
set reference [file split $reference] |
|
set location [file split $location] |
|
|
|
while {[lindex $location 0] eq [lindex $reference 0]} { |
|
set location [lrange $location 1 end] |
|
set reference [lrange $reference 1 end] |
|
if {![llength $location]} {break} |
|
} |
|
|
|
set location_len [llength $location] |
|
set reference_len [llength $reference] |
|
|
|
if {($location_len == 0) && ($reference_len == 0)} { |
|
# Cases: |
|
# (a) reference == location |
|
|
|
set location . |
|
} else { |
|
# Cases: |
|
# (b) ref is: ref/sub = sub |
|
# loc is: ref = {} |
|
|
|
# (c) ref is: ref = {} |
|
# loc is: ref/sub = sub |
|
|
|
while {$reference_len > 0} { |
|
set location [linsert $location 0 ..] |
|
incr reference_len -1 |
|
} |
|
set location [file join {*}$location] |
|
} |
|
return $location |
|
} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::path ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Secondary API namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval punk::path::lib { |
|
namespace export * |
|
namespace path [namespace parent] |
|
#*** !doctools |
|
#[subsection {Namespace punk::path::lib}] |
|
#[para] Secondary functions that are part of the API |
|
#[list_begin definitions] |
|
|
|
|
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::path::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[section Internal] |
|
namespace eval punk::path::system { |
|
#*** !doctools |
|
#[subsection {Namespace punk::path::system}] |
|
#[para] Internal functions that are not part of the API |
|
|
|
|
|
|
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide punk::path [namespace eval punk::path { |
|
variable pkg punk::path |
|
variable version |
|
set version 999999.0a1.0 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|