Julian Noble
1 year ago
7 changed files with 694 additions and 262 deletions
@ -0,0 +1,485 @@
|
||||
# -*- tcl -*- |
||||
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
||||
# |
||||
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
||||
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# (C) 2023 |
||||
# |
||||
# @@ Meta Begin |
||||
# Application punk::du 999999.0a1.0 |
||||
# Meta platform tcl |
||||
# Meta license <unspecified> |
||||
# @@ Meta End |
||||
|
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Requirements |
||||
##e.g package require frobz |
||||
|
||||
if {"windows" eq $::tcl_platform(platform)} { |
||||
if {[catch {package require twapi}]} { |
||||
puts stderr "Warning: punk::du - unable to load twapi. Disk operations may be much slower on windows without the twapi package" |
||||
} |
||||
} |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
namespace eval punk::du { |
||||
|
||||
proc du_lit value { |
||||
if {![info exists ::punk::du_literal($value)]} { |
||||
set ::punk::du_literal($value) $value |
||||
} |
||||
return $::punk::du_literal($value) |
||||
} |
||||
proc _du_new_eachdir {dirtails depth parentfolderidx} { |
||||
set newlist {} |
||||
upvar folders folders |
||||
set parentpath [lindex $folders $parentfolderidx] |
||||
set newindex [llength $folders] |
||||
foreach dt $dirtails { |
||||
lappend folders $parentpath/$dt |
||||
lappend newlist [::list $depth $parentfolderidx $newindex $dt [expr {0}]] |
||||
incr newindex |
||||
} |
||||
return $newlist |
||||
} |
||||
proc du_listing {folderpath} { |
||||
#note platform differences between what is considered hidden make this tricky. |
||||
# on windows 'glob .*' will not return some hidden dot items but will return . .. and glob -types hidden .* will not return some dotted items |
||||
# glob -types hidden * on windows will not necessarily return all dot files/folders |
||||
# unix-like platforms seem to consider all dot files as hidden so processing is more straightforward |
||||
# we need to process * and .* in the same glob calls and remove duplicates |
||||
# if we do * and .* in separate iterations of this loop we lose the ability to filter duplicates easily |
||||
|
||||
#Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink' |
||||
#set parent [lindex $folders $folderidx] |
||||
set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} -tail * .*] |
||||
#set hdirs {} |
||||
set dirs [glob -nocomplain -dir $folderpath -types d -tail * .*] |
||||
|
||||
#set hlinks [glob -nocomplain -dir [lindex $folders $folderidx] -types {hidden l} -tail * .*] |
||||
set hlinks {} |
||||
set links [glob -nocomplain -dir $folderpath -types l -tail * .*] ;#links may have dupes - we don't care. struct::set difference will remove |
||||
#set links [lsort -unique [concat $hlinks $links[unset links]]] |
||||
|
||||
set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} -tail * .*] |
||||
#set hfiles {} |
||||
set files [glob -nocomplain -dir $folderpath -types f -tail * .*] |
||||
#set files {} |
||||
|
||||
#note struct::set difference produces unordered result |
||||
#struct::set difference removes duplicates |
||||
#remove links and . .. from directories, remove links from files |
||||
set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links {. ..}]] |
||||
set files [struct::set difference [concat $hfiles $files[unset files]] $links] |
||||
|
||||
set filesizes [list]; #not available in listing-call - as opposed to twapi which can do it as it goes |
||||
return [list $dirs $files $filesizes] |
||||
} |
||||
|
||||
# get listing without using unix-tools (may not be installed on the windows system) |
||||
proc du_listing_twapi {folderpath} { |
||||
package require punk::winpath |
||||
set dirs [list] |
||||
set files [list] |
||||
set filesizes [list] |
||||
try { |
||||
if {[string length [file normalize $folderpath]] >= 250} { |
||||
set folderpath_shortname [punk::winpath::pwdshortname $folderpath] |
||||
set iterator [twapi::find_file_open $folderpath_shortname/* -detail basic] |
||||
} else { |
||||
set iterator [twapi::find_file_open $folderpath/* -detail basic] ;# -detail full only adds data to the altname field |
||||
} |
||||
} on error args { |
||||
if {[string match "*denied*" $args]} { |
||||
#output similar format as unixy du |
||||
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" |
||||
return [list {} {} {}] |
||||
} |
||||
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share |
||||
set errmsg "error reading folder: $folderpath (len:[string length [file normalize $folderpath]])\n" |
||||
append errmsg "error: $args" \n |
||||
append errmsg "errorcode: $::errorCode" \n |
||||
append errmsg {retrying with with windows altname} |
||||
puts stderr $errmsg |
||||
try { |
||||
#go to parent and re-fetch this folder with altnames |
||||
#use normalize to get full path - in case we are currently at "." |
||||
set parent [file dirname [file normalize $folderpath]] |
||||
set iterator [twapi::find_file_open $parent/* -detail full] ;# -detail full because we need altname! |
||||
while {[twapi::find_file_next $iterator iteminfo]} { |
||||
set nm [dict get $iteminfo name] |
||||
if {$nm eq [file tail $folderpath]} { |
||||
break |
||||
} |
||||
} |
||||
if {$nm ne [file tail $folderpath]} { |
||||
error "failed to re-fetch current directory info with altpath info" |
||||
} |
||||
set altname [dict get $iteminfo altname] |
||||
puts stderr "using altname $parent/$altname" |
||||
set iterator [twapi::find_file_open $parent/$altname/* -detail basic] |
||||
|
||||
|
||||
} on error args { |
||||
set errmsg "error reading folder: $parent or $folderpath\n" |
||||
append errmsg "error: $args" |
||||
append errmsg "aborting.." |
||||
error $errmsg |
||||
} |
||||
} |
||||
while {[twapi::find_file_next $iterator iteminfo]} { |
||||
set nm [dict get $iteminfo name] |
||||
if {$nm in {. ..}} { |
||||
continue |
||||
} |
||||
set attrinfo [twapi::decode_file_attributes [dict get $iteminfo attrs]] |
||||
if {"reparse_point" in $attrinfo} { |
||||
#we will treat as a zero sized file.. review - option -L for symlinks like BSD du? |
||||
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' |
||||
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls |
||||
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} |
||||
#e.g (stripped of headers/footers and other lines) |
||||
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] |
||||
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. |
||||
#du includes the size of the symlink |
||||
#but we can't get it with tcl's file size |
||||
#twapi doesn't seem to have anything to help read it either (?) |
||||
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link |
||||
# |
||||
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. |
||||
#This .lnk seems to be an ordinary file and we can treat as such here. |
||||
lappend files $nm |
||||
lappend filesizes 0 |
||||
} elseif {"directory" in $attrinfo} { |
||||
lappend dirs $nm |
||||
} else { |
||||
lappend files $nm |
||||
lappend filesizes [dict get $iteminfo size] |
||||
} |
||||
} |
||||
return [list $dirs $files $filesizes] |
||||
} |
||||
|
||||
|
||||
#Note that unix du seems to do depth-first - which makese sense when piping.. as output can be emitted as we go rather than requiring sort at end. |
||||
#breadth-first with sort can be quite fast though |
||||
proc du { args } { |
||||
package require struct::set |
||||
|
||||
|
||||
if 0 { |
||||
switch -exact [llength $args] { |
||||
0 { |
||||
set dir . |
||||
set switch -k |
||||
} |
||||
1 { |
||||
set dir $args |
||||
set switch -k |
||||
} |
||||
2 { |
||||
set switch [lindex $args 0] |
||||
set dir [lindex $args 1] |
||||
} |
||||
default { |
||||
set msg "only one switch and one dir " |
||||
append msg "currently supported" |
||||
return -code error $msg |
||||
} |
||||
} |
||||
|
||||
set switch [string tolower $switch] |
||||
|
||||
set -b 1 |
||||
set -k 1024 |
||||
set -m [expr 1024*1024] |
||||
} |
||||
|
||||
|
||||
set opts $args |
||||
# flags in args are singletons (or longopts --something=somethingelse) or sometimes pairopts |
||||
# process any pairopts first |
||||
|
||||
set opt_depth -1 |
||||
if {[set posn [lsearch $opts -d]] >= 0} { |
||||
set opt_depth [lindex $opts $posn+1] |
||||
set opts [lreplace $opts $posn $posn+1] |
||||
} |
||||
foreach o $opts { |
||||
if {[string match --max-depth=* $o]} { |
||||
set opt_depth [lindex [split $o =] 1] |
||||
if {![string is integer -strict $opt_depth]} { |
||||
error "--max-depth=n n must be an integer" |
||||
} |
||||
} |
||||
} |
||||
#---- |
||||
|
||||
set lastarg [lindex $opts end] |
||||
if {[string length $lastarg] && (![string match -* $lastarg])} { |
||||
set dir $lastarg |
||||
set opts [lrange $opts 0 end-1] |
||||
} else { |
||||
set dir . |
||||
set opts $opts |
||||
} |
||||
foreach a $opts { |
||||
if {![string match -* $a]} { |
||||
error "unrecognized option '$a'" |
||||
} |
||||
} |
||||
|
||||
set -b 1 |
||||
set -k 1024 |
||||
set -m [expr 1024*1024] |
||||
set switch -k ;#default (same as unix) |
||||
set lc_opts [string tolower $opts] |
||||
|
||||
|
||||
|
||||
if {"-b" in $lc_opts} { |
||||
set switch -b |
||||
} elseif {"-k" in $lc_opts} { |
||||
set switch -k |
||||
} elseif {"-m" in $lc_opts} { |
||||
set switch -m |
||||
} |
||||
set opt_progress 0 |
||||
if {"--prog" in $lc_opts || "--progress" in $lc_opts} { |
||||
set opt_progress 1 |
||||
} |
||||
set opt_extra 0 |
||||
if {"--extra" in $lc_opts} { |
||||
set opt_extra 1 |
||||
} |
||||
|
||||
|
||||
|
||||
set result [list] |
||||
|
||||
set dir_depths_remaining [list] |
||||
|
||||
#if {[file normalize [file dirname $dir]] eq [file normalize $dir]} { |
||||
# error "du at root of filesystem not yet implemented.. sorry" |
||||
# |
||||
#} |
||||
|
||||
set is_windows [expr {$::tcl_platform(platform) eq "windows"}] |
||||
set has_twapi 0 ;#default assumption |
||||
if {$is_windows} { |
||||
if {![catch { |
||||
package require twapi |
||||
} |
||||
]} { |
||||
set has_twapi 1 |
||||
} |
||||
} |
||||
set zero [expr {0}] |
||||
|
||||
# ## ### ### ### ### |
||||
# containerid and itemid |
||||
set folders [list] ;#we lookup string by index |
||||
lappend folders [file dirname $dir] |
||||
lappend folders $dir ;#itemindex 1 |
||||
# ## ### ### ### ### |
||||
if {![file isdirectory $dir]} { |
||||
lappend dir_depths_remaining [list $zero $zero [expr {1}] [du_lit [file tail $dir]] [file size $dir]] |
||||
#set ary($dir,bytes) [file size $dir] |
||||
set leveldircount 0 |
||||
} else { |
||||
lappend dir_depths_remaining [list $zero $zero [expr {1}] [du_lit [file tail $dir]] $zero] |
||||
set leveldircount 1 |
||||
} |
||||
set level [expr {0}] |
||||
set nextlevel [expr {1}] |
||||
#dir_depths list structure |
||||
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
||||
#0 1 2 3 4 5 |
||||
#i_depth i_containerid i_itemid i_item i_size i_index |
||||
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- |
||||
set i_depth [expr {0}] |
||||
set i_containerid [expr {1}] |
||||
set i_itemid [expr {2}] |
||||
set i_item [expr {3}] |
||||
set i_size [expr {4}] |
||||
set i_index [expr {5}] |
||||
|
||||
|
||||
set listlength [llength $dir_depths_remaining] |
||||
set diridx 0 |
||||
#this is a breadth-first algorithm |
||||
while {$leveldircount > 0} { |
||||
set leveldirs 0 |
||||
set levelfiles 0 |
||||
for {set i $diridx} {$i < $listlength} {incr i} { |
||||
#lassign [lindex $dir_depths_remaining $i] _d containeridx folderidx itm bytecount |
||||
set folderidx [lindex $dir_depths_remaining $i $i_itemid] |
||||
set folderpath [lindex $folders $folderidx] |
||||
#puts stderr ->$folderpath |
||||
#if {$i >= 20} { |
||||
#return |
||||
#} |
||||
|
||||
if {$is_windows && $has_twapi} { |
||||
lassign [du_listing_twapi $folderpath] dirs files filesizes; #twapi supports gathering file sizes during directory contents traversal |
||||
} else { |
||||
lassign [du_listing $folderpath] dirs files filesizes ;#filesizes is empty list if listing mechanism doesn't support it (as Tcl glob doesn't) |
||||
} |
||||
|
||||
|
||||
incr leveldirs [llength $dirs] |
||||
incr levelfiles [llength $files] |
||||
|
||||
#lappend dir_depths_remaining {*}[lmap d $dirs {::list $nextdepth [du_lit $cont/$itm] $d $zero}] |
||||
#folderidx is parent index for new dirs |
||||
lappend dir_depths_remaining {*}[_du_new_eachdir $dirs $nextlevel $folderidx] |
||||
|
||||
#we don't need to sort files (unless we add an option such as -a to du (?)) |
||||
set bytecount [expr {0}] |
||||
|
||||
if {[llength $files] && ![llength $filesizes]} { |
||||
#listing mechanism didn't supply corresponding sizes |
||||
foreach filename $files { |
||||
incr bytecount [file size "$folderpath/$filename"] |
||||
} |
||||
} else { |
||||
set bytecount [tcl::mathop::+ {*}$filesizes] |
||||
} |
||||
|
||||
|
||||
#we can safely assume initial count was zero |
||||
lset dir_depths_remaining $i $i_size $bytecount |
||||
#incr diridx |
||||
} |
||||
#puts stdout "level: $level dirs: $leveldirs" |
||||
if {$opt_extra} { |
||||
puts stdout "level: $level dircount: $leveldirs filecount: $levelfiles" |
||||
} |
||||
incr level ;#zero based |
||||
set nextlevel [expr {$level + 1}] |
||||
set leveldircount [expr {[llength $dir_depths_remaining] - $listlength }]; #current - previous - while loop terminates when zero |
||||
#puts "diridx: $diridx i: $i rem: [llength $dir_depths_remaining] listlenth:$listlength levldircount: $leveldircount" |
||||
set diridx $i |
||||
set listlength [llength $dir_depths_remaining] |
||||
} |
||||
#puts stdout ">>> loop done" |
||||
#flush stdout |
||||
#puts stdout $dir_depths_remaining |
||||
set dirs_as_encountered $dir_depths_remaining ;#index is in sync with 'folders' list |
||||
set dir_depths_longfirst $dirs_as_encountered |
||||
|
||||
#store the index before sorting |
||||
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { |
||||
lset dir_depths_longfirst $i $i_index $i |
||||
} |
||||
set dir_depths_longfirst [lsort -integer -index 0 -decreasing $dir_depths_longfirst[set dir_depths_longfirst {}]] |
||||
|
||||
#store main index in the reducing list |
||||
set dir_depths_remaining $dir_depths_longfirst |
||||
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { |
||||
#stored index at position 3 |
||||
lset dir_depths_remaining $i $i_index $i |
||||
} |
||||
|
||||
#index 3 |
||||
#dir_depths_remaining -> dir_depths_longfirst -> dirs_as_encountered |
||||
|
||||
#puts stdout "initial dir_depths_remaining: $dir_depths_remaining" |
||||
|
||||
|
||||
#summing performance is not terrible but significant on large tree - the real time is for large trees in the main loop above |
||||
#update - on really large trees the reverse is true especiallyl now that twapi fixed the original speed issues.. todo - rework/simplify below - review natsort |
||||
# |
||||
#TODO - reconsider sorting by depth.. lreverse dirs_as_encountered should work.. |
||||
if {[llength $dir_depths_longfirst] > 1} { |
||||
set i 0 |
||||
foreach dd $dir_depths_longfirst { |
||||
lassign $dd d parentidx folderidx item bytecount |
||||
#set nm $cont/$item |
||||
set nm [lindex $folders $folderidx] |
||||
set dnext [expr {$d +1}] |
||||
set nextdepthposns [lsearch -all -integer -index 0 $dir_depths_remaining $dnext] |
||||
set nextdepthposns [lsort -integer -decreasing $nextdepthposns[set nextdepthposns {}]];#remove later elements first |
||||
foreach posn $nextdepthposns { |
||||
set id [lindex $dir_depths_remaining $posn $i_itemid] |
||||
set ndirname [lindex $folders $id] |
||||
#set ndirname $cont/$item |
||||
#set item [lindex $dir_depths_remaining $posn $i_item] |
||||
#set ndirname [lindex $ndir 1] |
||||
if {[string match $nm/* $ndirname]} { |
||||
#puts stdout "dir $nm adding subdir size $ndirname" |
||||
#puts stdout "incr $nm from $ary($nm,bytes) plus $ary($ndirname,bytes)" |
||||
incr bytecount [lindex $dir_depths_remaining $posn $i_size] |
||||
set dir_depths_remaining [lreplace $dir_depths_remaining[set dir_depths_remaining {}] $posn $posn] |
||||
} |
||||
} |
||||
lset dir_depths_longfirst $i $i_size $bytecount |
||||
set p [lsearch -index $i_index -integer $dir_depths_remaining $i] |
||||
lset dir_depths_remaining $p $i_size $bytecount |
||||
#set ary($nm,bytes) $bytecount |
||||
incr i |
||||
} |
||||
} |
||||
#set dir_depths_longfirst [lsort -index 1 -decreasing $dir_depths_longfirst] |
||||
# |
||||
|
||||
set retval [list] |
||||
#copy across the bytecounts |
||||
for {set i 0} {$i < [llength $dir_depths_longfirst]} {incr i} { |
||||
set posn [lindex $dir_depths_longfirst $i $i_index] |
||||
set bytes [lindex $dir_depths_longfirst $i $i_size] |
||||
lset dirs_as_encountered $posn $i_size $bytes |
||||
} |
||||
foreach dirinfo [lreverse $dirs_as_encountered] { |
||||
set id [lindex $dirinfo $i_itemid] |
||||
set depth [lindex $dirinfo $i_depth] |
||||
if {($opt_depth >= 0) && $depth > $opt_depth} { |
||||
continue |
||||
} |
||||
set path [lindex $folders $id] |
||||
#set path $cont/$item |
||||
set item [lindex $dirinfo $i_item] |
||||
set bytes [lindex $dirinfo $i_size] |
||||
set size [expr {$bytes / [set $switch]}] |
||||
lappend retval [list $size $path] |
||||
} |
||||
# copyright 2002 by The LIGO Laboratory |
||||
return $retval |
||||
} |
||||
package require natsort |
||||
#interp alias {} du {} .=args>* punk::du |> .=>1 natsort::sort -cols 1 |> list_as_lines <args| |
||||
#use natsort -debug 2 to see index output |
||||
#this works better for display of directory/file names with spaces (doesn't show curly braces) |
||||
interp alias {} du2 {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat words |> list_as_lines <args| |
||||
|
||||
#experiment with csv as easy way to get column like format.. |
||||
#The /r is somewhat cheating however.. as it messes up redirected output .. e.g if redirected to text file |
||||
interp alias {} du {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat csv -outputformatoptions {\r\t\t\t} |> list_as_lines <args| |
||||
|
||||
|
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Ready |
||||
package provide punk::du [namespace eval punk::du { |
||||
variable version |
||||
set version 999999.0a1.0 |
||||
}] |
||||
return |
@ -0,0 +1,3 @@
|
||||
0.1.0 |
||||
#First line must be a semantic version number |
||||
#all other lines are ignored. |
Loading…
Reference in new issue