You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
819 lines
38 KiB
819 lines
38 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2023 |
|
# |
|
# @@ Meta Begin |
|
# Application punk::du 0.1.0 |
|
# Meta platform tcl |
|
# Meta license <unspecified> |
|
# @@ Meta End |
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
##e.g package require frobz |
|
|
|
namespace eval punk::du { |
|
variable has_twapi 0 |
|
} |
|
if {"windows" eq $::tcl_platform(platform)} { |
|
if {[catch {package require twapi}]} { |
|
puts stderr "Warning: punk::du - unable to load twapi. Disk operations may be much slower on windows without the twapi package" |
|
} else { |
|
set punk::du::has_twapi 1 |
|
} |
|
package require punk::winpath |
|
} |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval punk::du { |
|
|
|
|
|
proc dirlisting {{folderpath {}}} { |
|
if {[lib::pathcharacterlen $folderpath] == 0} { |
|
set folderpath [pwd] |
|
} elseif {[file pathtype $folderpath] ne "absolute"} { |
|
#file normalize relativelly slow - avoid in inner loops |
|
#set folderpath [file normalize $folderpath] |
|
|
|
} |
|
#run whichever of du_dirlisting_twapi, du_dirlisting_generic, du_dirlisting_unix has been activated |
|
set dirinfo [active::du_dirlisting $folderpath] |
|
} |
|
|
|
|
|
|
|
#Note that unix du seems to do depth-first - which makese sense when piping.. as output can be emitted as we go rather than requiring sort at end. |
|
#breadth-first with sort can be quite fast .. but memory usage can easily get out of control |
|
proc du { args } { |
|
variable has_twapi |
|
package require struct::set |
|
|
|
|
|
if 0 { |
|
switch -exact [llength $args] { |
|
0 { |
|
set dir . |
|
set switch -k |
|
} |
|
1 { |
|
set dir $args |
|
set switch -k |
|
} |
|
2 { |
|
set switch [lindex $args 0] |
|
set dir [lindex $args 1] |
|
} |
|
default { |
|
set msg "only one switch and one dir " |
|
append msg "currently supported" |
|
return -code error $msg |
|
} |
|
} |
|
|
|
set switch [string tolower $switch] |
|
|
|
set -b 1 |
|
set -k 1024 |
|
set -m [expr 1024*1024] |
|
} |
|
|
|
|
|
set opts $args |
|
# flags in args are solos (or longopts --something=somethingelse) or sometimes pairopts |
|
# we don't currently support mashopts (ie -xy vs separate -x -y) |
|
|
|
|
|
#------------------------------------------------------- |
|
# process any pairopts first and remove the pair |
|
# (may also process some solo-opts) |
|
|
|
set opt_depth -1 |
|
if {[set posn [lsearch $opts -d]] >= 0} { |
|
set opt_depth [lindex $opts $posn+1] |
|
set opts [lreplace $opts $posn $posn+1] |
|
} |
|
foreach o $opts { |
|
if {[string match --max-depth=* $o]} { |
|
set opt_depth [lindex [split $o =] 1] |
|
if {![string is integer -strict $opt_depth]} { |
|
error "--max-depth=n n must be an integer" |
|
} |
|
} |
|
} |
|
#------------------------------------------------------- |
|
#only solos and longopts remain in the opts now |
|
|
|
|
|
set lastarg [lindex $opts end] |
|
if {[string length $lastarg] && (![string match -* $lastarg])} { |
|
set dir $lastarg |
|
set opts [lrange $opts 0 end-1] |
|
} else { |
|
set dir . |
|
set opts $opts |
|
} |
|
foreach a $opts { |
|
if {![string match -* $a]} { |
|
error "unrecognized option '$a'" |
|
} |
|
} |
|
|
|
set -b 1 |
|
set -k 1024 |
|
set -m [expr 1024*1024] |
|
set switch -k ;#default (same as unix) |
|
set lc_opts [string tolower $opts] |
|
|
|
|
|
|
|
if {"-b" in $lc_opts} { |
|
set switch -b |
|
} elseif {"-k" in $lc_opts} { |
|
set switch -k |
|
} elseif {"-m" in $lc_opts} { |
|
set switch -m |
|
} |
|
set opt_progress 0 |
|
if {"--prog" in $lc_opts || "--progress" in $lc_opts} { |
|
set opt_progress 1 |
|
} |
|
set opt_extra 0 |
|
if {"--extra" in $lc_opts} { |
|
set opt_extra 1 |
|
} |
|
set opt_vfs 0 |
|
if {"--vfs" in $lc_opts} { |
|
set opt_vfs 1 |
|
} |
|
|
|
|
|
|
|
set result [list] |
|
|
|
set dir_depths_remaining [list] |
|
|
|
set is_windows [expr {$::tcl_platform(platform) eq "windows"}] |
|
set zero [expr {0}] |
|
|
|
# ## ### ### ### ### |
|
# containerid and itemid |
|
set folders [list] ;#we lookup string by index |
|
lappend folders [file dirname $dir] |
|
lappend folders $dir ;#itemindex 1 |
|
# ## ### ### ### ### |
|
if {![file isdirectory $dir]} { |
|
lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] [file size $dir]] |
|
#set ary($dir,bytes) [file size $dir] |
|
set leveldircount 0 |
|
} else { |
|
lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] $zero] |
|
set leveldircount 1 |
|
} |
|
set level [expr {0}] |
|
set nextlevel [expr {1}] |
|
#dir_depths list structure |
|
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
|
#0 1 2 3 4 5 |
|
#i_depth i_containerid i_itemid i_item i_size i_index |
|
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
|
set i_depth [expr {0}] |
|
set i_containerid [expr {1}] |
|
set i_itemid [expr {2}] |
|
set i_item [expr {3}] |
|
set i_size [expr {4}] |
|
set i_index [expr {5}] |
|
|
|
|
|
set listlength [llength $dir_depths_remaining] |
|
set diridx 0 |
|
#this is a breadth-first algorithm |
|
while {$leveldircount > 0} { |
|
set leveldirs 0 |
|
set levelfiles 0 |
|
for {set i $diridx} {$i < $listlength} {incr i} { |
|
#lassign [lindex $dir_depths_remaining $i] _d containeridx folderidx itm bytecount |
|
set folderidx [lindex $dir_depths_remaining $i $i_itemid] |
|
set folderpath [lindex $folders $folderidx] |
|
#puts stderr ->$folderpath |
|
#if {$i >= 20} { |
|
#return |
|
#} |
|
|
|
#twapi supports gathering file sizes during directory contents traversal |
|
#for dirlisting methods that return an empty list in filesizes whilst files has entries - we will need to populate it below |
|
#e.g tcl glob based dirlisting doesn't support gathering file sizes at the same time |
|
|
|
set in_vfs 0 |
|
if {$opt_vfs} { |
|
foreach vfsmount [vfs::filesystem info] { |
|
if {[punk::repo::path_a_atorbelow_b $folderpath $vfsmount]} { |
|
set in_vfs 1 |
|
break |
|
} |
|
} |
|
} |
|
|
|
if {$in_vfs} { |
|
set du_info [lib::du_dirlisting_tclvfs $folderpath] |
|
} else { |
|
#run the activated function (proc imported to active namespace and renamed) |
|
set du_info [active::du_dirlisting $folderpath] |
|
} |
|
|
|
|
|
set dirs [dict get $du_info dirs] |
|
set files [dict get $du_info files] |
|
set filesizes [dict get $du_info filesizes] |
|
|
|
|
|
incr leveldirs [llength $dirs] |
|
incr levelfiles [llength $files] |
|
|
|
#lappend dir_depths_remaining {*}[lmap d $dirs {::list $nextdepth [lib::du_lit $cont/$itm] $d $zero}] |
|
#folderidx is parent index for new dirs |
|
lappend dir_depths_remaining {*}[lib::du_new_eachdir $dirs $nextlevel $folderidx] |
|
|
|
#we don't need to sort files (unless we add an option such as -a to du (?)) |
|
set bytecount [expr {0}] |
|
|
|
if {[llength $files] && ![llength $filesizes]} { |
|
#listing mechanism didn't supply corresponding sizes |
|
foreach filename $files { |
|
#incr bytecount [file size [file join $folderpath $filename] |
|
incr bytecount [file size $filename] |
|
} |
|
} else { |
|
set filesizes [lsearch -all -inline -not $filesizes[unset filesizes] na] ;#only legal non-number is na |
|
set bytecount [tcl::mathop::+ {*}$filesizes] |
|
} |
|
|
|
|
|
#we can safely assume initial count was zero |
|
lset dir_depths_remaining $i $i_size $bytecount |
|
#incr diridx |
|
} |
|
#puts stdout "level: $level dirs: $leveldirs" |
|
if {$opt_extra} { |
|
puts stdout "level: $level dircount: $leveldirs filecount: $levelfiles" |
|
} |
|
incr level ;#zero based |
|
set nextlevel [expr {$level + 1}] |
|
set leveldircount [expr {[llength $dir_depths_remaining] - $listlength }]; #current - previous - while loop terminates when zero |
|
#puts "diridx: $diridx i: $i rem: [llength $dir_depths_remaining] listlenth:$listlength levldircount: $leveldircount" |
|
set diridx $i |
|
set listlength [llength $dir_depths_remaining] |
|
} |
|
#puts stdout ">>> loop done" |
|
#flush stdout |
|
#puts stdout $dir_depths_remaining |
|
set dirs_as_encountered $dir_depths_remaining ;#index is in sync with 'folders' list |
|
set dir_depths_longfirst $dirs_as_encountered |
|
|
|
#store the index before sorting |
|
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { |
|
lset dir_depths_longfirst $i $i_index $i |
|
} |
|
set dir_depths_longfirst [lsort -integer -index 0 -decreasing $dir_depths_longfirst[set dir_depths_longfirst {}]] |
|
|
|
#store main index in the reducing list |
|
set dir_depths_remaining $dir_depths_longfirst |
|
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { |
|
#stored index at position 3 |
|
lset dir_depths_remaining $i $i_index $i |
|
} |
|
|
|
#index 3 |
|
#dir_depths_remaining -> dir_depths_longfirst -> dirs_as_encountered |
|
|
|
#puts stdout "initial dir_depths_remaining: $dir_depths_remaining" |
|
|
|
|
|
#summing performance is not terrible but significant on large tree - the real time is for large trees in the main loop above |
|
#update - on really large trees the reverse is true especiallyl now that twapi fixed the original speed issues.. todo - rework/simplify below - review natsort |
|
# |
|
#TODO - reconsider sorting by depth.. lreverse dirs_as_encountered should work.. |
|
if {[llength $dir_depths_longfirst] > 1} { |
|
set i 0 |
|
foreach dd $dir_depths_longfirst { |
|
lassign $dd d parentidx folderidx item bytecount |
|
#set nm $cont/$item |
|
set nm [lindex $folders $folderidx] |
|
set dnext [expr {$d +1}] |
|
set nextdepthposns [lsearch -all -integer -index 0 $dir_depths_remaining $dnext] |
|
set nextdepthposns [lsort -integer -decreasing $nextdepthposns[set nextdepthposns {}]];#remove later elements first |
|
foreach posn $nextdepthposns { |
|
set id [lindex $dir_depths_remaining $posn $i_itemid] |
|
set ndirname [lindex $folders $id] |
|
#set ndirname $cont/$item |
|
#set item [lindex $dir_depths_remaining $posn $i_item] |
|
#set ndirname [lindex $ndir 1] |
|
if {[string match $nm/* $ndirname]} { |
|
#puts stdout "dir $nm adding subdir size $ndirname" |
|
#puts stdout "incr $nm from $ary($nm,bytes) plus $ary($ndirname,bytes)" |
|
incr bytecount [lindex $dir_depths_remaining $posn $i_size] |
|
set dir_depths_remaining [lreplace $dir_depths_remaining[set dir_depths_remaining {}] $posn $posn] |
|
} |
|
} |
|
lset dir_depths_longfirst $i $i_size $bytecount |
|
set p [lsearch -index $i_index -integer $dir_depths_remaining $i] |
|
lset dir_depths_remaining $p $i_size $bytecount |
|
#set ary($nm,bytes) $bytecount |
|
incr i |
|
} |
|
} |
|
#set dir_depths_longfirst [lsort -index 1 -decreasing $dir_depths_longfirst] |
|
# |
|
|
|
set retval [list] |
|
#copy across the bytecounts |
|
for {set i 0} {$i < [llength $dir_depths_longfirst]} {incr i} { |
|
set posn [lindex $dir_depths_longfirst $i $i_index] |
|
set bytes [lindex $dir_depths_longfirst $i $i_size] |
|
lset dirs_as_encountered $posn $i_size $bytes |
|
} |
|
foreach dirinfo [lreverse $dirs_as_encountered] { |
|
set id [lindex $dirinfo $i_itemid] |
|
set depth [lindex $dirinfo $i_depth] |
|
if {($opt_depth >= 0) && $depth > $opt_depth} { |
|
continue |
|
} |
|
set path [lindex $folders $id] |
|
#set path $cont/$item |
|
set item [lindex $dirinfo $i_item] |
|
set bytes [lindex $dirinfo $i_size] |
|
set size [expr {$bytes / [set $switch]}] |
|
lappend retval [list $size $path] |
|
} |
|
# copyright 2002 by The LIGO Laboratory |
|
return $retval |
|
} |
|
namespace eval active { |
|
variable functions [list du_dirlisting ""] |
|
variable functions_known [dict create] |
|
|
|
#known functions from lib namespace |
|
dict set functions_known du_dirlisting [list du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix] |
|
|
|
proc show_functions {} { |
|
variable functions |
|
variable functions_known |
|
set msg "" |
|
dict for {callname implementations} $functions_known { |
|
append msg "callname: $callname" \n |
|
foreach imp $implementations { |
|
if {[dict get $functions $callname] eq $imp} { |
|
append msg " $imp (active)" \n |
|
} else { |
|
append msg " $imp" \n |
|
} |
|
} |
|
} |
|
return $msg |
|
} |
|
proc set_active_function {callname implementation} { |
|
variable functions |
|
variable functions_known |
|
if {$callname ni [dict keys $functions_known]} { |
|
error "unknown function callname $callname" |
|
} |
|
if {$implementation ni [dict get $functions_known $callname]} { |
|
error "unknown implementation $implementation for callname $callname" |
|
} |
|
dict set functions $callname $implementation |
|
|
|
catch {rename ::punk::du::active::$callname ""} |
|
namespace eval ::punk::du::active [string map [list %imp% $implementation %call% $callname] { |
|
namespace import ::punk::du::lib::%imp% |
|
rename %imp% %call% |
|
}] |
|
|
|
return $implementation |
|
} |
|
proc get_active_function {callname} { |
|
variable functions |
|
variable functions_known |
|
if {$callname ni [dict keys $functions_known]} { |
|
error "unknown function callname $callname known functions: [dict keys $functions_known]" |
|
} |
|
return [dict get $functions $callname] |
|
} |
|
|
|
|
|
#where we import & the appropriate du_listing.. function for the platform |
|
} |
|
namespace eval lib { |
|
variable du_literal |
|
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] |
|
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api |
|
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this |
|
|
|
proc decode_win_attributes {bitmask} { |
|
variable winfile_attributes |
|
if {[dict exists $winfile_attributes $bitmask]} { |
|
return [dict get $winfile_attributes $bitmask] |
|
} else { |
|
#list/dict shimmering? |
|
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] |
|
} |
|
} |
|
proc attributes_twapi {path {detail basic}} { |
|
try { |
|
set iterator [twapi::find_file_open $path -detail $detail] ;# -detail full only adds data to the altname field |
|
if {[twapi::find_file_next $iterator iteminfo]} { |
|
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] |
|
set result [dict create -archive 0 -hidden 0 -longname $path -readonly 0 -shortname {} -system 0] |
|
if {"hidden" in $attrinfo} { |
|
dict set result -hidden 1 |
|
} |
|
if {"system" in $attrinfo} { |
|
dict set result -system 1 |
|
} |
|
if {"readonly" in $attrinfo} { |
|
dict set result -readonly 1 |
|
} |
|
dict set result -shortname [dict get $iteminfo altname] |
|
dict set result -rawflags $attrinfo |
|
set extras [list] |
|
foreach prop {ctime atime mtime size} { |
|
lappend extras $prop [dict get $iteminfo $prop] |
|
} |
|
dict set result -extras $extras |
|
return $result |
|
} else { |
|
error "could not read attributes for $path" |
|
} |
|
} finally { |
|
catch {twapi::find_file_close $iterator} |
|
} |
|
} |
|
namespace export du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix |
|
# get listing without using unix-tools (may not be installed on the windows system) |
|
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) |
|
proc du_dirlisting_twapi {folderpath} { |
|
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ |
|
# return it so it can be stored and tried as an alternative for problem paths |
|
try { |
|
set iterator [twapi::find_file_open [file join $folderpath *] -detail basic] ;# -detail full only adds data to the altname field |
|
} on error args { |
|
try { |
|
if {[string match "*denied*" $args]} { |
|
#output similar format as unixy du |
|
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" |
|
return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { |
|
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" |
|
puts stderr " (errorcode: $::errorCode)\n" |
|
return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
|
|
|
|
|
|
|
|
|
|
if {[set plen [pathcharacterlen $folderpath]] >= 250} { |
|
set errmsg "error reading folder: $folderpath (len:$plen)\n" |
|
append errmsg "error: $args" \n |
|
append errmsg "errorcode: $::errorCode" \n |
|
# re-fetch this folder with altnames |
|
#file normalize - aside from being slow - will have problems with long paths - so this won't work. |
|
#this function should only accept absolute paths |
|
# |
|
# |
|
#Note: using -detail full only helps if the last segment of path has an altname.. |
|
#To properly shorten we need to have kept track of altname all the way from the root! |
|
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd |
|
#### SLOW |
|
set fixedpath [dict get [file attributes $folderpath] -shortname] |
|
#### SLOW |
|
|
|
|
|
append errmsg "retrying with with windows altname '$fixedpath'" |
|
puts stderr $errmsg |
|
} else { |
|
set errmsg "error reading folder: $folderpath (len:$plen)\n" |
|
append errmsg "error: $args" \n |
|
append errmsg "errorcode: $::errorCode" \n |
|
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share |
|
#we can use //?/path dos device path - but not with tcl functions |
|
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. |
|
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here |
|
|
|
set parent [file dirname $folderpath] |
|
set badtail [file tail $folderpath] |
|
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames |
|
set fixedtail "" |
|
while {[twapi::find_file_next $iterator iteminfo]} { |
|
set nm [dict get $iteminfo name] |
|
if {$nm eq $badtail} { |
|
set fixedtail [dict get $iteminfo altname] |
|
break |
|
} |
|
} |
|
|
|
if {![string length $fixedtail]} { |
|
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" |
|
return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. |
|
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it |
|
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) |
|
#so the illegalname_fix doesn't really work here |
|
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] |
|
|
|
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. |
|
set fixedpath [file join $parent $fixedtail] |
|
append errmsg "retrying with with windows dos device path $fixedpath\n" |
|
puts stderr $errmsg |
|
|
|
} |
|
|
|
|
|
set iterator [twapi::find_file_open $fixedpath/* -detail basic] |
|
|
|
|
|
} on error args { |
|
set errmsg "error reading folder: $folderpath\n" |
|
append errmsg "error: $args" |
|
append errmsg "aborting.." |
|
error $errmsg |
|
|
|
} |
|
} |
|
set dirs [list] |
|
set files [list] |
|
set filesizes [list] |
|
set links [list] |
|
set flaggedhidden [list] |
|
set flaggedsystem [list] |
|
set flaggedreadonly [list] |
|
|
|
while {[twapi::find_file_next $iterator iteminfo]} { |
|
set nm [dict get $iteminfo name] |
|
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path |
|
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] |
|
#puts stderr "$iteminfo" |
|
#puts stderr "$nm -> [dict get $iteminfo attrs] -> $attrinfo" |
|
|
|
#attributes applicable to any classification |
|
set fullname [file_join_one $folderpath $nm] |
|
if {"hidden" in $attrinfo} { |
|
lappend flaggedhidden $fullname |
|
} |
|
if {"system" in $attrinfo} { |
|
lappend flaggedsystem $fullname |
|
} |
|
if {"readonly" in $attrinfo} { |
|
lappend flaggedreadonly $fullname |
|
} |
|
|
|
#main classification |
|
if {"reparse_point" in $attrinfo} { |
|
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? |
|
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' |
|
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls |
|
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} |
|
#e.g (stripped of headers/footers and other lines) |
|
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] |
|
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. |
|
#du includes the size of the symlink |
|
#but we can't get it with tcl's file size |
|
#twapi doesn't seem to have anything to help read it either (?) |
|
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link |
|
# |
|
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. |
|
# |
|
#links are techically files too, whether they point to a file/dir or nothing. |
|
lappend links $fullname |
|
} elseif {"directory" in $attrinfo} { |
|
if {$nm in {. ..}} { |
|
continue |
|
} |
|
lappend dirs $fullname |
|
} else { |
|
|
|
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? |
|
lappend files $fullname |
|
lappend filesizes [dict get $iteminfo size] |
|
} |
|
} |
|
twapi::find_file_close $iterator |
|
set vfsmounts [get_vfsmounts_in_folder $folderpath] |
|
#also determine whether vfs. file system x is *much* faster than file attributes |
|
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder |
|
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes $filesizes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname] |
|
} |
|
proc get_vfsmounts_in_folder {folderpath} { |
|
set vfsmounts [list] |
|
set known_vfs_mounts [vfs::filesystem info] |
|
foreach mount $known_vfs_mounts { |
|
if {[punk::repo::path_a_above_b $folderpath $mount]} { |
|
if {([llength [file split $mount]] - [llength [file split $folderpath]]) == 1} { |
|
#the mount is in this folder |
|
lappend vfsmounts $mount |
|
} |
|
} |
|
} |
|
return $vfsmounts |
|
} |
|
#work around the horrible tilde-expansion thing (not needed for tcl 9+) |
|
proc file_join_one {base newtail} { |
|
if {[string index $newtail 0] ne {~}} { |
|
return [file join $base $newtail] |
|
} |
|
return [file join $base ./$newtail] |
|
} |
|
|
|
|
|
#this is the cross-platform pure-tcl version - which calls glob multiple times to make sure it gets everythign it needs and can ignore everything it needs to. |
|
#These repeated calls to glob will be a killer for performance - especially on a network share or when walking a large directory structure |
|
proc du_dirlisting_generic {folderpath} { |
|
#note platform differences between what is considered hidden make this tricky. |
|
# on windows 'glob .*' will not return some hidden dot items but will return . .. and glob -types hidden .* will not return some dotted items |
|
# glob -types hidden * on windows will not necessarily return all dot files/folders |
|
# unix-like platforms seem to consider all dot files as hidden so processing is more straightforward |
|
# we need to process * and .* in the same glob calls and remove duplicates |
|
# if we do * and .* in separate iterations of this loop we lose the ability to filter duplicates easily |
|
|
|
#Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink' |
|
#set parent [lindex $folders $folderidx] |
|
set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*] |
|
#set hdirs {} |
|
set dirs [glob -nocomplain -dir $folderpath -types d * .*] |
|
|
|
set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*] |
|
#set hlinks {} |
|
set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. struct::set difference will remove |
|
#set links [lsort -unique [concat $hlinks $links[unset links]]] |
|
|
|
set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} * .*] |
|
#set hfiles {} |
|
set files [glob -nocomplain -dir $folderpath -types f * .*] |
|
#set files {} |
|
|
|
#note struct::set difference produces unordered result |
|
#struct::set difference removes duplicates |
|
#remove links and . .. from directories, remove links from files |
|
set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] |
|
set files [struct::set difference [concat $hfiles $files[unset files]] $links] |
|
set links [lsort -unique [concat $links $hlinks]] |
|
if {"windows" eq $::tcl_platform(platform)} { |
|
set flaggedhidden [concat $hdirs $hfiles $hlinks] |
|
} else { |
|
#unix dotted files/folders are not 'flagged' as such - it's a convention - so the client can use the same convention to decide if something is hidden |
|
#this allows us to return less data - but more importantly - reserve flaggedhidden for systems where such a flag exists and avoid conflating the different concepts of what is hidden |
|
set flaggedhidden {} |
|
} |
|
|
|
set vfsmounts [get_vfsmounts_in_folder $folderpath] |
|
|
|
set filesizes [list]; #not available in listing-call - as opposed to twapi which can do it as it goes |
|
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
|
|
#we can halve the number of round trips on unix-like systems, where 'hidden' always corresponds to dotted files |
|
proc du_dirlisting_unix {folderpath} { |
|
set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs |
|
set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove |
|
set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files |
|
#remove any links from our dirs and files collections |
|
set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] |
|
set files [struct::set difference $files[unset files] $links] |
|
set vfsmounts [get_vfsmounts_in_folder $folderpath] |
|
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
proc du_dirlisting_tclvfs {folderpath} { |
|
set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs |
|
#review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? |
|
set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove |
|
set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files |
|
#remove any links from our dirs and files collections |
|
set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] |
|
set files [struct::set difference $files[unset files] $links] |
|
#nested vfs mount.. REVIEW - does anything need special handling? |
|
set vfsmounts [get_vfsmounts_in_folder $folderpath] |
|
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}] |
|
} |
|
|
|
|
|
|
|
proc du_lit value { |
|
variable du_literal |
|
if {![info exists du_literal($value)]} { |
|
set du_literal($value) $value |
|
} |
|
return $du_literal($value) |
|
} |
|
|
|
#v1 |
|
proc du_new_eachdirtail {dirtails depth parentfolderidx} { |
|
set newlist {} |
|
upvar folders folders |
|
set parentpath [lindex $folders $parentfolderidx] |
|
set newindex [llength $folders] |
|
foreach dt $dirtails { |
|
lappend folders [file join $parentpath [du_lit $dt]]; #store as a 'path' rather than a string (see tcl::unsupported::representation) |
|
lappend newlist [::list $depth $parentfolderidx $newindex [du_lit $dt] [expr {0}]] |
|
incr newindex |
|
} |
|
return $newlist |
|
} |
|
proc du_new_eachdir {dirpaths depth parentfolderidx} { |
|
set newlist {} |
|
upvar folders folders |
|
set newindex [llength $folders] |
|
foreach dp $dirpaths { |
|
lappend folders $dp |
|
#puts stdout "--->$dp" |
|
lappend newlist [::list $depth $parentfolderidx $newindex [du_lit [file tail $dp]] [expr {0}]] |
|
incr newindex |
|
} |
|
return $newlist |
|
} |
|
|
|
#just an experiment |
|
#get length of path which has internal rep of path - maintaining path/list rep without shimmering to string representation. |
|
proc pathcharacterlen {pathrep} { |
|
set l 0 |
|
set parts [file split $pathrep] |
|
if {[llength $parts] < 2} { |
|
return [string length [lindex $parts 0]] |
|
} |
|
foreach seg $parts { |
|
incr l [string length $seg] |
|
} |
|
return [expr {$l + [llength $parts] -2}] |
|
} |
|
#slower - doesn't work for short paths like c:/ |
|
proc pathcharacterlen2 {pathrep} { |
|
return [tcl::mathop::+ {*}[lmap v [set plist [file split $pathrep]] {[string length $v]}] [llength $plist] -2] |
|
} |
|
|
|
#Strip using lengths without examining path components |
|
#without normalization is much faster |
|
proc path_strip_alreadynormalized_prefixdepth {path prefix} { |
|
set tail [lrange [file split $path] [llength [file split $prefix]] end] |
|
if {[llength $tail]} { |
|
return [file join {*}$tail] |
|
} else { |
|
return "" |
|
} |
|
} |
|
|
|
|
|
|
|
} |
|
package require natsort |
|
#interp alias {} du {} .=args>* punk::du |> .=>1 natsort::sort -cols 1 |> list_as_lines <args| |
|
#use natsort -debug 2 to see index output |
|
#this works better for display of directory/file names with spaces (doesn't show curly braces) |
|
interp alias {} du2 {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat words |> list_as_lines <args| |
|
|
|
#experiment with csv as easy way to get column like format.. |
|
#The /r is somewhat cheating however.. as it messes up redirected output .. e.g if redirected to text file |
|
interp alias {} du {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat csv -outputformatoptions {\r\t\t\t} |> list_as_lines <args| |
|
|
|
|
|
} |
|
|
|
|
|
namespace eval ::punk::du::active { |
|
variable functions |
|
variable functions_kown |
|
upvar ::punk::du::has_twapi has_twapi |
|
|
|
if {"windows" eq $::tcl_platform(platform)} { |
|
if {$has_twapi} { |
|
set_active_function du_dirlisting du_dirlisting_twapi |
|
} else { |
|
set_active_function du_dirlisting du_dirlisting_generic |
|
} |
|
} else { |
|
set_active_function du_dirlisting du_dirlisting_unix |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide punk::du [namespace eval punk::du { |
|
variable version |
|
set version 0.1.0 |
|
}] |
|
return
|
|
|