#copyright 2023 Julian Marcel Noble #license: BSD (revised 3-clause) # #Note shellfilter is currently only directly useful for unidirectional channels e.g stdin,stderr,stdout, or for example fifo2 where only one direction is being used. #To generalize this to bidrectional channels would require shifting around read & write methods on transform objects in a very complicated manner. #e.g each transform would probably be a generic transform container which holds sub-objects to which read & write are indirected. #This is left as a future exercise...possibly it's best left as a concept for uni-directional channels anyway # - as presumably the reads/writes from a bidirectional channel could be diverted off to unidirectional pipelines for processing with less work # (and maybe even better speed/efficiency if the data volume is asymmetrical and there is significant processing on one direction) # tcl::namespace::eval shellfilter::log { variable allow_adhoc_tags 1 variable open_logs [tcl::dict::create] #'tag' is an identifier for the log source. # each tag will use it's own thread to write to the configured log target proc open {tag {settingsdict {}}} { upvar ::shellfilter::sources sourcelist package require shellthread if {![dict exists $settingsdict -tag]} { tcl::dict::set settingsdict -tag $tag } else { #review if {$tag ne [tcl::dict::get $settingsdict -tag]} { error "shellfilter::log::open first argument tag: '$tag' does not match -tag '[tcl::dict::get $settingsdict -tag]' omit -tag, or supply same value" } } if {$tag ni $sourcelist} { lappend sourcelist $tag } #note new_worker set worker_tid [shellthread::manager::new_worker $tag $settingsdict] #puts stderr "shellfilter::log::open this_threadid: [thread::id] tag: $tag worker_tid: $worker_tid" return $worker_tid } proc write {tag msg} { upvar ::shellfilter::sources sourcelist variable allow_adhoc_tags if {!$allow_adhoc_tags} { if {$tag ni $sourcelist} { error "shellfilter::log::write tag '$tag' hasn't been initialised with a call to shellfilter::log::open $tag , and allow_adhoc_tags has been set false. use shellfilter::log::require_open false to allow adhoc tags" } } shellthread::manager::write_log $tag $msg } #write_sync - synchronous processing with logging thread, slower but potentially useful for debugging/testing or forcing delay til log written proc write_sync {tag msg} { shellthread::manager::write_log $tag $msg -async 0 } proc close {tag} { #shellthread::manager::close_worker $tag shellthread::manager::unsubscribe [list $tag]; #workertid will be added back to free list if no tags remain subscribed } #review #configure whether we can call shellfilter::log::write without having called open first proc require_open {{is_open_required {}}} { variable allow_adhoc_tags if {![string length $is_open_required]} { return $allow_adhoc_tags } else { set truevalues [list y yes true 1] set falsevalues [list n no false 0] if {[string tolower $is_open_required] in $truevalues} { set allow_adhoc_tags 1 } elseif {[string tolower $is_open_required] in $falsevalues} { set allow_adhoc_tags 0 } else { error "shellfilter::log::require_open unrecognised value '$is_open_required' try one of $truevalues or $falsevalues" } } } } namespace eval shellfilter::pipe { #write channel for program. workerthread reads other end of fifo2 and writes data somewhere proc open_out {tag_pipename {pipesettingsdict {}}} { set defaultsettings {-buffering full} set settingsdict [dict merge $defaultsettings $pipesettingsdict] package require shellthread #we are only using the fifo in a single direction to pipe to another thread # - so whilst wchan and rchan could theoretically each be both read & write we're only using them for one operation each if {![catch {package require Memchan}]} { lassign [fifo2] wchan rchan } else { package require tcl::chan::fifo2 lassign [tcl::chan::fifo2] wchan rchan } #default -translation for both types of fifo on windows is {auto crlf} # -encoding is as per '[encoding system]' on the platform - e.g utf-8 (e.g windows when beta-utf8 enabled) chan configure $wchan -buffering [dict get $settingsdict -buffering] ;# #application end must not be binary for our filters to operate on it #chan configure $rchan -buffering [dict get $settingsdict -buffering] -translation binary ;#works reasonably.. chan configure $rchan -buffering [dict get $settingsdict -buffering] -translation lf set worker_tid [shellthread::manager::new_pipe_worker $tag_pipename $settingsdict] #puts stderr "worker_tid: $worker_tid" #set_read_pipe does the thread::transfer of the rchan end. -buffering setting is maintained during thread transfer shellthread::manager::set_pipe_read_from_client $tag_pipename $worker_tid $rchan set pipeinfo [list localchan $wchan remotechan $rchan workertid $worker_tid direction out] return $pipeinfo } #read channel for program. workerthread writes to other end of fifo2 from whereever it's reading (stdin, file?) proc open_in {tag_pipename {settingsdict {} }} { package require shellthread package require tcl::chan::fifo2 lassign [tcl::chan::fifo2] wchan rchan set program_chan $rchan set worker_chan $wchan chan configure $worker_chan -buffering [dict get $settingsdict -buffering] chan configure $program_chan -buffering [dict get $settingsdict -buffering] chan configure $program_chan -blocking 0 chan configure $worker_chan -blocking 0 set worker_tid [shellthread::manager::new_worker $tag_pipename $settingsdict] shellthread::manager::set_pipe_write_to_client $tag_pipename $worker_tid $worker_chan set pipeinfo [list localchan $program_chan remotechan $worker_chan workertid $worker_tid direction in] puts stderr "|jn>pipe::open_in returning $pipeinfo" puts stderr "program_chan: [chan conf $program_chan]" return $pipeinfo } } namespace eval shellfilter::ansi { #maint warning - #ansistrip from punk::ansi is better/more comprehensive proc stripcodes {text} { #obsolete? #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic dict set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. #line endings can theoretically occur within an ansi escape sequence (review e.g title?) set inputlist [split $text ""] set outputlist [list] #self-contained 2 byte ansi escape sequences - review more? set 2bytecodes_dict [dict create\ "reset_terminal" "\033c"\ "save_cursor_posn" "\u001b7"\ "restore_cursor_posn" "\u001b8"\ "cursor_up_one" "\u001bM"\ ] set 2bytecodes [dict values $2bytecodes_dict] set in_escapesequence 0 #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls set i 0 foreach u $inputlist { set v [lindex $inputlist $i+1] set uv ${u}${v} if {$in_escapesequence eq "2b"} { #2nd byte - done. set in_escapesequence 0 } elseif {$in_escapesequence != 0} { set escseq [dict get $escape_terminals $in_escapesequence] if {$u in $escseq} { set in_escapesequence 0 } elseif {$uv in $escseq} { set in_escapseequence 2b ;#flag next byte as last in sequence } } else { #handle both 7-bit and 8-bit CSI and OSC if {[regexp {^(?:\033\[|\u009b)} $uv]} { set in_escapesequence CSI } elseif {[regexp {^(?:\033\]|\u009c)} $uv]} { set in_escapesequence OSC } elseif {$uv in $2bytecodes} { #self-contained e.g terminal reset - don't pass through. set in_escapesequence 2b } else { lappend outputlist $u } } incr i } return [join $outputlist ""] } } namespace eval shellfilter::chan { set testobj ::shellfilter::chan::var if {$testobj ni [info commands $testobj]} { oo::class create var { variable o_datavar variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] set varname [dict get $settingsdict -varname] set o_datavar $varname if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 1 ;# as a var is diversionary - default it to be a jucntion } } method initialize {ch mode} { return [list initialize finalize write] } method finalize {ch} { my destroy } method watch {ch events} { # must be present but we ignore it because we do not # post any events } #method read {ch count} { # return ? #} method write {ch bytes} { set stringdata [encoding convertfrom $o_enc $bytes] append $o_datavar $stringdata return "" } method meta_is_redirection {} { return $o_is_junction } method meta_buffering_supported {} { return [list line full none] } } #todo - something similar for multiple grep specs each with own -pre & -post .. store to dict? oo::class create tee_grep_to_var { variable o_datavar variable o_lastxlines variable o_trecord variable o_grepfor variable o_prelines variable o_postlines variable o_postcountdown variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [tcl::dict::get $tf -encoding] set o_lastxlines [list] set o_postcountdown 0 set defaults [tcl::dict::create -pre 1 -post 1] set settingsdict [tcl::dict::get $tf -settings] set settings [tcl::dict::merge $defaults $settingsdict] set o_datavar [tcl::dict::get $settings -varname] set o_grepfor [tcl::dict::get $settings -grep] set o_prelines [tcl::dict::get $settings -pre] set o_postlines [tcl::dict::get $settings -post] if {[tcl::dict::exists $tf -junction]} { set o_is_junction [tcl::dict::get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize finalize write] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #method read {transform_handle count} { # return ? #} method write {transform_handle bytes} { set logdata [tcl::encoding::convertfrom $o_enc $bytes] set lastx $o_lastxlines lappend o_lastxlines $logdata if {$o_postcountdown > 0} { append $o_datavar $logdata if {[regexp $o_grepfor $logdata]} { #another match in postlines set o_postcountdown $o_postlines } else { incr o_postcountdown -1 } } else { if {[regexp $o_grepfor $logdata]} { append $o_datavar [join $lastx] append $o_datavar $logdata set o_postcountdown $o_postlines } } if {[llength $o_lastxlines] > $o_prelines} { set o_lastxlines [lrange $o_lastxlines 1 end] } return $bytes } method meta_is_redirection {} { return $o_is_junction } method meta_buffering_supported {} { return [list line] } } oo::class create tee_to_var { variable o_datavars variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [tcl::dict::get $tf -encoding] set settingsdict [tcl::dict::get $tf -settings] set varname [tcl::dict::get $settingsdict -varname] set o_datavars $varname if {[tcl::dict::exists $tf -junction]} { set o_is_junction [tcl::dict::get $tf -junction] } else { set o_is_junction 0 } } method initialize {ch mode} { return [list initialize finalize write flush clear] } method finalize {ch} { my destroy } method clear {ch} { return } method watch {ch events} { # must be present but we ignore it because we do not # post any events } #method read {ch count} { # return ? #} method flush {ch} { return "" } method write {ch bytes} { set stringdata [tcl::encoding::convertfrom $o_enc $bytes] foreach v $o_datavars { append $v $stringdata } return $bytes } method meta_is_redirection {} { return $o_is_junction } } oo::class create tee_to_pipe { variable o_logsource variable o_localchan variable o_enc variable o_trecord variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [tcl::dict::get $tf -encoding] set settingsdict [tcl::dict::get $tf -settings] if {![dict exists $settingsdict -tag]} { error "tee_to_pipe constructor settingsdict missing -tag" } set o_localchan [tcl::dict::get $settingsdict -pipechan] set o_logsource [tcl::dict::get $settingsdict -tag] if {[tcl::dict::exists $tf -junction]} { set o_is_junction [tcl::dict::get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize read drain write flush clear finalize] } method finalize {transform_handle} { ::shellfilter::log::close $o_logsource my destroy } method watch {transform_handle events} { # must be present but we ignore it because we do not # post any events } method clear {transform_handle} { return } method drain {transform_handle} { return "" } method read {transform_handle bytes} { set logdata [tcl::encoding::convertfrom $o_enc $bytes] #::shellfilter::log::write $o_logsource $logdata puts -nonewline $o_localchan $logdata return $bytes } method flush {transform_handle} { return "" } method write {transform_handle bytes} { set logdata [tcl::encoding::convertfrom $o_enc $bytes] #::shellfilter::log::write $o_logsource $logdata puts -nonewline $o_localchan $logdata return $bytes } #a tee is not a redirection - because data still flows along the main path method meta_is_redirection {} { return $o_is_junction } } oo::class create tee_to_log { variable o_tid variable o_logsource variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [tcl::dict::get $tf -encoding] set settingsdict [tcl::dict::get $tf -settings] if {![tcl::dict::exists $settingsdict -tag]} { error "tee_to_log constructor settingsdict missing -tag" } set o_logsource [tcl::dict::get $settingsdict -tag] set o_tid [::shellfilter::log::open $o_logsource $settingsdict] if {[tcl::dict::exists $tf -junction]} { set o_is_junction [tcl::dict::get $tf -junction] } else { set o_is_junction 0 } } method initialize {ch mode} { return [list initialize read write finalize] } method finalize {ch} { ::shellfilter::log::close $o_logsource my destroy } method watch {ch events} { # must be present but we ignore it because we do not # post any events } method read {ch bytes} { set logdata [tcl::encoding::convertfrom $o_enc $bytes] ::shellfilter::log::write $o_logsource $logdata return $bytes } method write {ch bytes} { set logdata [tcl::encoding::convertfrom $o_enc $bytes] ::shellfilter::log::write $o_logsource $logdata return $bytes } method meta_is_redirection {} { return $o_is_junction } } oo::class create logonly { variable o_tid variable o_logsource variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {![dict exists $settingsdict -tag]} { error "logonly constructor settingsdict missing -tag" } set o_logsource [dict get $settingsdict -tag] set o_tid [::shellfilter::log::open $o_logsource $settingsdict] } method initialize {transform_handle mode} { return [list initialize finalize write] } method finalize {transform_handle} { ::shellfilter::log::close $o_logsource my destroy } method watch {transform_handle events} { # must be present but we ignore it because we do not # post any events } #method read {transform_handle count} { # return ? #} method write {transform_handle bytes} { set logdata [encoding convertfrom $o_enc $bytes] if 0 { if {"utf-16le" in [encoding names]} { set logdata [encoding convertfrom utf-16le $bytes] } else { set logdata [encoding convertto utf-8 $bytes] #set logdata [encoding convertfrom unicode $bytes] #set logdata $bytes } } #set logdata $bytes #set logdata [string map [list \r -r- \n -n-] $logdata] #if {[string equal [string range $logdata end-1 end] "\r\n"]} { # set logdata [string range $logdata 0 end-2] #} #::shellfilter::log::write_sync $o_logsource $logdata ::shellfilter::log::write $o_logsource $logdata #return $bytes return } method meta_is_redirection {} { return 1 } } #review - we should probably provide a more narrow filter than only strips color - and one that strips most(?) # - but does it ever really make sense to strip things like "esc(0" and "esc(B" which flip to the G0 G1 characters? (once stripped - things like box-lines become ordinary letters - unlikely to be desired?) #punk::ansi::ansistrip converts at least some of the box drawing G0 chars to unicode - todo - more complete conversion #assumes line-buffering. a more advanced filter required if ansicodes can arrive split across separate read or write operations! oo::class create ansistrip { variable o_trecord variable o_enc variable o_is_junction constructor {tf} { package require punk::ansi set o_trecord $tf set o_enc [dict get $tf -encoding] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize read write clear flush drain finalize] } method finalize {transform_handle} { my destroy } method clear {transform_handle} { return } method watch {transform_handle events} { } method drain {transform_handle} { return "" } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring [punk::ansi::ansistrip $instring] return [encoding convertto $o_enc $outstring] } method flush {transform_handle} { return "" } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring [punk::ansi::ansistrip $instring] return [encoding convertto $o_enc $outstring] } method meta_is_redirection {} { return $o_is_junction } } #a test oo::class create reconvert { variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } } oo::define reconvert { method meta_is_redirection {} { return 0 } } #this isn't a particularly nice thing to do to a stream - especially if someone isn't expecting ansi codes sprinkled through it. #It can be useful for test/debugging #Due to chunking at random breaks - we have to check if an ansi code in the underlying stream has been split - otherwise our wrapping will break the existing ansi # oo::class create ansiwrap { variable o_trecord variable o_enc variable o_colour variable o_do_colour variable o_do_normal variable o_is_junction variable o_codestack variable o_gx_state ;#on/off alt graphics variable o_buffered constructor {tf} { package require punk::ansi set o_trecord $tf set o_enc [tcl::dict::get $tf -encoding] set settingsdict [tcl::dict::get $tf -settings] if {[tcl::dict::exists $settingsdict -colour]} { set o_colour [tcl::dict::get $settingsdict -colour] set o_do_colour [punk::ansi::a+ {*}$o_colour] set o_do_normal [punk::ansi::a] } else { set o_colour {} set o_do_colour "" set o_do_normal "" } set o_codestack [list] set o_gx_state [expr {off}] set o_buffered "" ;#hold back data that potentially contains partial ansi codes if {[tcl::dict::exists $tf -junction]} { set o_is_junction [tcl::dict::get $tf -junction] } else { set o_is_junction 0 } } method Trackcodes {chunk} { #puts stdout "===[ansistring VIEW -lf 1 $o_buffered]" set buf $o_buffered$chunk set emit "" if {[string last \x1b $buf] >= 0} { #detect will detect ansi SGR and gron groff and other codes if {[punk::ansi::ta::detect $buf]} { #split_codes_single regex faster than split_codes - but more resulting parts #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc) set parts [punk::ansi::ta::split_codes_single $buf] #process all pt/code pairs except for trailing pt foreach {pt code} [lrange $parts 0 end-1] { #puts "<==[ansistring VIEW -lf 1 $pt]==>" if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { append emit $o_do_colour$pt$o_do_normal #append emit $pt } else { append emit $pt } set c1c2 [tcl::string::range $code 0 1] set leadernorm [tcl::string::range [tcl::string::map [list\ \x1b\[ 7CSI\ \x9b 8CSI\ \x1b\( 7GFX\ ] $c1c2] 0 3] switch -- $leadernorm { 7CSI - 8CSI { if {[punk::ansi::codetype::is_sgr_reset $code]} { set o_codestack [list "\x1b\[m"] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set o_codestack [list $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #todo - make caching is_sgr method set dup_posns [lsearch -all -exact $o_codestack $code] set o_codestack [lremove $o_codestack {*}$dup_posns] lappend o_codestack $code } else { } } 7GFX { switch -- [tcl::string::index $code 2] { "0" { set o_gx_state on } "B" { set o_gx_state off } } } default { #other ansi codes } } append emit $code } set trailing_pt [lindex $parts end] if {[string first \x1b $trailing_pt] >= 0} { #puts stdout "...[ansistring VIEW -lf 1 $trailing_pt]...buffered:<[ansistring VIEW $o_buffered]> '[ansistring VIEW -lf 1 $emit]'" #may not be plaintext after all set o_buffered $trailing_pt #puts stdout "=-=[ansistring VIEWCODES $o_buffered]" } else { #puts [a+ yellow]???[ansistring VIEW "'$o_buffered'<+>'$trailing_pt'"]???[a] if {![llength $o_codestack] || ([llength $o_codestack] ==1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]])} { append emit $o_do_colour$trailing_pt$o_do_normal } else { append emit $trailing_pt } #the previous o_buffered formed the data we emitted - nothing new to buffer because we emitted all parts including the trailing plaintext set o_buffered "" } } else { #puts "-->esc but no detect" #no complete ansi codes - but at least one esc is present if {[string last \x1b $buf] == [llength $buf]-1} { #only esc is last char in buf #puts ">>trailing-esc<<" set o_buffered \x1b set emit [string range $buf 0 end-1] } else { set emit_anyway 0 #todo - ensure non-ansi escapes in middle of chunks don't lead to ever growing buffer if {[punk::ansi::ta::detect_st_open $buf]} { #no detect - but we have an ST open (privacy msg etc) - allow a larger chunk before we give up - could include newlines (and even nested codes - although not widely interpreted that way in terms) set st_partial_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of unclosed ST code #todo - configurable ST max - use 1k for now if {$st_partial_len < 1001} { append o_buffered $chunk set emit "" } else { set emit_anyway 1 } } else { set possible_code_len [expr {[llength $buf] - [string last \x1b $buf]}] ;#length of possible code #most opening sequences are 1,2 or 3 chars - review? set open_sequence_detected [punk::ansi::ta::detect_open $buf] if {$possible_code_len > 10 && !$open_sequence_detected} { set emit_anyway 1 } else { #could be composite sequence with params - allow some reasonable max sequence length #todo - configurable max sequence length #len 40-50 quite possible for SGR sequence using coloured underlines etc, even without redundancies # - allow some headroom for redundant codes when the caller didn't merge. if {$possible_code_len < 101} { append o_buffered $chunk set emit "" } else { #allow a little more grace if we at least have an opening ansi sequence of any type.. if {$open_sequence_detected && $possible_code_len < 151} { append o_buffered $chunk set emit "" } else { set emit_anyway 1 } } } } if {$emit_anyway} { #looked ansi-like - but we've given enough length without detecting close.. #treat as possible plain text with some esc or unrecognised ansi sequence if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { set emit $o_do_colour$buf$o_do_normal } else { set emit $buf } } } } } else { #no esc #puts stdout [a+ yellow]...[a] #test! if {( ![llength $o_codestack] || ([llength $o_codestack] == 1 && [punk::ansi::codetype::is_sgr_reset [lindex $o_codestack 0]]))} { set emit $o_do_colour$buf$o_do_normal } else { set emit $buf } #set emit $buf set o_buffered "" } return [dict create emit $emit stacksize [llength $o_codestack]] } method initialize {transform_handle mode} { #clear undesirable in terminal output channels (review) return [list initialize write flush read drain finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method clear {transform_handle} { #In the context of stderr/stdout - we probably don't want clear to run. #Terminals might call it in the middle of a split ansi code - resulting in broken output. #Leave clear of it the init call puts stdout "" set emit [tcl::encoding::convertto $o_enc $o_buffered] set o_buffered "" return $emit } method flush {transform_handle} { #puts stdout "" set emit [tcl::encoding::convertto $o_enc $o_buffered] set o_buffered "" return $emit return } method write {transform_handle bytes} { set instring [tcl::encoding::convertfrom $o_enc $bytes] set streaminfo [my Trackcodes $instring] set emit [dict get $streaminfo emit] if {[dict get $streaminfo stacksize] == 0} { #no ansi on the stack - we can wrap #review set outstring "$o_do_colour$emit$o_do_normal" } else { set outstring $emit } #puts stdout "decoded >>>[ansistring VIEWCODES $outstring]<<<" #puts stdout "re-encoded>>>[ansistring VIEW [tcl::encoding::convertto $o_enc $outstring]]<<<" return [tcl::encoding::convertto $o_enc $outstring] } method Write_naive {transform_handle bytes} { set instring [tcl::encoding::convertfrom $o_enc $bytes] set outstring "$o_do_colour$instring$o_do_normal" #set outstring ">>>$instring" return [tcl::encoding::convertto $o_enc $outstring] } method drain {transform_handle} { return "" } method read {transform_handle bytes} { set instring [tcl::encoding::convertfrom $o_enc $bytes] set outstring "$o_do_colour$instring$o_do_normal" return [tcl::encoding::convertto $o_enc $outstring] } method meta_is_redirection {} { return $o_is_junction } } #todo - something oo::class create rebuffer { variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] set outstring $instring return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define rebuffer { method meta_is_redirection {} { return 0 } } #has slight buffering/withholding of lone training cr - we can't be sure that a cr at end of chunk is part of \r\n sequence oo::class create tounix { variable o_trecord variable o_enc variable o_last_char_was_cr variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } set o_last_char_was_cr 0 } method initialize {transform_handle mode} { return [list initialize write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #don't use read method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] if {$o_last_char_was_cr} { set instring "\r$instring" } set outstring [string map {\r\n \n} $instring] set lastchar [string range $outstring end end] if {$lastchar eq "\r"} { set o_last_char_was_cr 1 set outstring [string range $outstring 0 end-1] } else { set o_last_char_was_cr 0 } #review! can we detect eof here on the transform_handle? #if eof, we don't want to strip a trailing \r return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define tounix { method meta_is_redirection {} { return $o_is_junction } } #write to handle case where line-endings already \r\n too oo::class create towindows { variable o_trecord variable o_enc variable o_last_char_was_cr variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } set o_last_char_was_cr 0 } method initialize {transform_handle mode} { return [list initialize write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #don't use read method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] if {$o_last_char_was_cr} { set instring "\r$instring" } set outstring [string map {\r\n \uFFFF} $instring] set outstring [string map {\n \r\n} $outstring] set outstring [string map {\uFFFF \r\n} $outstring] set lastchar [string range $outstring end end] if {$lastchar eq "\r"} { set o_last_char_was_cr 1 set outstring [string range $outstring 0 end-1] } else { set o_last_char_was_cr 0 } #review! can we detect eof here on the transform_handle? #if eof, we don't want to strip a trailing \r return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define towindows { method meta_is_redirection {} { return $o_is_junction } } } } # ---------------------------------------------------------------------------- #review float/sink metaphor. #perhaps something with the concept of upstream and downstream? #need concepts for push towards data, sit in middle where placed, and lag at tail of data stream. ## upstream for stdin is at the bottom of the stack and for stdout is the top of the stack. #upstream,neutral-upstream,downstream,downstream-aside,downstream-replace (default neutral-upstream - require action 'stack' to use standard channel stacking concept and ignore other actions) #This is is a bit different from the float/sink metaphor which refers to the channel stacking order as opposed to the data-flow direction. #The idea would be that whether input or output # upstream additions go to the side closest to the datasource # downstream additions go furthest from the datasource # - all new additions go ahead of any diversions as the most upstream diversion is the current end of the stream in a way. # - this needs review regarding subsequent removal of the diversion and whether filters re-order in response.. # or if downstream & neutral additions are reclassified upon insertion if they land among existing upstreams(?) # neutral-upstream goes to the datasource side of the neutral-upstream list. # No 'neutral' option provided so that we avoid the need to think forwards or backwards when adding stdin vs stdout shellfilter does the necessary pop/push reordering. # No 'neutral-downstream' to reduce complexity. # downstream-replace & downstream-aside head downstream to the first diversion they encounter. ie these actions are no longer referring to the stack direction but only the dataflow direction. # # ---------------------------------------------------------------------------- # # 'filters' are transforms that don't redirect # - limited range of actions to reduce complexity. # - any requirement not fulfilled by float,sink,sink-replace,sink-sideline should be done by multiple pops and pushes # #actions can float to top of filters or sink to bottom of filters #when action is of type sink, it can optionally replace or sideline the first non-filter it encounters (highest redirection on the stack.. any lower are starved of the stream anyway) # - sideline means to temporarily replace the item and keep a record, restoring if/when we are removed from the transform stack # ##when action is of type float it can't replace or sideline anything. A float is added above any existing floats and they stay in the same order relative to each other, #but non-floats added later will sit below all floats. #(review - float/sink initially designed around output channels. For stdin the dataflow is reversed. implement float-aside etc?) # # #action: float sink sink-replace,sink-sideline # # ## note - whether stack is for input or output we maintain it in the same direction - which is in sync with the tcl chan pop chan push concept. ## namespace eval shellfilter::stack { #todo - implement as oo variable pipelines [list] proc items {} { #review - stdin,stdout,stderr act as pre-existing pipelines, and we can't create a new one with these names - so they should probably be autoconfigured and listed.. # - but in what contexts? only when we find them in [chan names]? variable pipelines return [dict keys $pipelines] } proc item {pipename} { variable pipelines return [dict get $pipelines $pipename] } proc item_tophandle {pipename} { variable pipelines set handle "" if {[dict exists $pipelines $pipename stack]} { set stack [dict get $pipelines $pipename stack] set topstack [lindex $stack end] ;#last item in stack is top (for output channels anyway) review comment. input chans? if {$topstack ne ""} { if {[dict exists $topstack -handle]} { set handle [dict get $topstack -handle] } } } return $handle } proc status {{pipename *} args} { variable pipelines set pipecount [dict size $pipelines] set tableprefix "$pipecount pipelines active\n" set t [textblock::class::table new $tableprefix] $t add_column -headers [list channel-ident] $t add_column -headers [list device-info localchan] $t configure_column 1 -header_colspans {3} $t add_column -headers [list "" remotechan] $t add_column -headers [list "" tid] $t add_column -headers [list stack-info] foreach k [dict keys $pipelines $pipename] { set lc [dict get $pipelines $k device localchan] set rc [dict get $pipelines $k device remotechan] if {[dict exists $k device workertid]} { set tid [dict get $pipelines $k device workertid] } else { set tid "-" } set stack [dict get $pipelines $k stack] if {![llength $stack]} { set stackinfo "" } else { set tbl_inner [textblock::class::table new] $tbl_inner configure -show_edge 0 foreach rec $stack { set handle [punk::lib::dict_getdef $rec -handle ""] set id [punk::lib::dict_getdef $rec -id ""] set transform [namespace tail [punk::lib::dict_getdef $rec -transform ""]] set settings [punk::lib::dict_getdef $rec -settings ""] $tbl_inner add_row [list $id $transform $handle $settings] } set stackinfo [$tbl_inner print] $tbl_inner destroy } $t add_row [list $k $lc $rc $tid $stackinfo] } set result [$t print] $t destroy return $result } proc status1 {{pipename *} args} { variable pipelines set pipecount [dict size $pipelines] set tableprefix "$pipecount pipelines active\n" foreach p [dict keys $pipelines] { append tableprefix " " $p \n } package require overtype #todo -verbose set table "" set ac1 [string repeat " " 15] set ac2 [string repeat " " 42] set ac3 [string repeat " " 70] append table "[overtype::left $ac1 channel-ident] " append table "[overtype::left $ac2 device-info] " append table "[overtype::left $ac3 stack-info]" append table \n set bc1 [string repeat " " 5] ;#stack id set bc2 [string repeat " " 25] ;#transform set bc3 [string repeat " " 50] ;#settings foreach k [dict keys $pipelines $pipename] { set lc [dict get $pipelines $k device localchan] if {[dict exists $k device workertid]} { set tid [dict get $pipelines $k device workertid] } else { set tid "" } set col1 [overtype::left $ac1 $k] set col2 [overtype::left $ac2 "localchan: $lc tid:$tid"] set stack [dict get $pipelines $k stack] if {![llength $stack]} { set col3 $ac3 } else { set rec [lindex $stack 0] set bcol1 [overtype::left $bc1 [dict get $rec -id]] set bcol2 [overtype::left $bc2 [namespace tail [dict get $rec -transform]]] set bcol3 [overtype::left $bc3 [dict get $rec -settings]] set stackrow "$bcol1 $bcol2 $bcol3" set col3 [overtype::left $ac3 $stackrow] } append table "$col1 $col2 $col3\n" foreach rec [lrange $stack 1 end] { set col1 $ac1 set col2 $ac2 if {[llength $rec]} { set bc1 [overtype::left $bc1 [dict get $rec -id]] set bc2 [overtype::left $bc2 [namespace tail [dict get $rec -transform]]] set bc3 [overtype::left $bc3 [dict get $rec -settings]] set stackrow "$bc1 $bc2 $bc3" set col3 [overtype::left $ac3 $stackrow] } else { set col3 $ac3 } append table "$col1 $col2 $col3\n" } } return $tableprefix$table } #used for output channels - we usually want to sink redirections below the floaters and down to topmost existing redir proc _get_stack_floaters {stack} { set floaters [list] foreach t [lreverse $stack] { switch -- [dict get $t -action] { float { lappend floaters $t } default { break } } } return [lreverse $floaters] } #for output-channel sinking proc _get_stack_top_redirection {stack} { set r 0 ;#reverse index foreach t [lreverse $stack] { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { set idx [expr {[llength $stack] - ($r + 1) }] ;#forward index return [list index $idx record $t] } incr r } #not found return [list index -1 record {}] } #exclude float-locked, locked, sink-locked proc _get_stack_top_redirection_replaceable {stack} { set r 0 ;#reverse index foreach t [lreverse $stack] { set action [dict get $t -action] if {![string match "*locked*" $action]} { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { set idx [expr {[llength $stack] - ($r + 1) }] ;#forward index return [list index $idx record $t] } } incr r } #not found return [list index -1 record {}] } #for input-channels ? proc _get_stack_bottom_redirection {stack} { set i 0 foreach t $stack { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { return [linst index $i record $t] } incr i } #not found return [list index -1 record {}] } proc get_next_counter {pipename} { variable pipelines #use dictn incr ? set counter [dict get $pipelines $pipename counter] incr counter dict set pipelines $pipename counter $counter return $counter } proc unwind {pipename} { variable pipelines set stack [dict get $pipelines $pipename stack] set localchan [dict get $pipelines $pipename device localchan] foreach tf [lreverse $stack] { chan pop $localchan } dict set pipelines $pipename [list] } #todo proc delete {pipename {wait 0}} { variable pipelines set pipeinfo [dict get $pipelines $pipename] set deviceinfo [dict get $pipeinfo device] set localchan [dict get $deviceinfo localchan] unwind $pipename #release associated thread set tid [dict get $deviceinfo workertid] if {$wait} { thread::release -wait $tid } else { thread::release $tid } #Memchan closes without error - tcl::chan::fifo2 raises something like 'can not find channel named "rc977"' - REVIEW. why? catch {chan close $localchan} } #review - proc name clarity is questionable. remove_stackitem? proc remove {pipename remove_id} { variable pipelines if {![dict exists $pipelines $pipename]} { puts stderr "WARNING: shellfilter::stack::remove pipename '$pipename' not found in pipelines dict: '$pipelines' [info level -1]" return } set stack [dict get $pipelines $pipename stack] set localchan [dict get $pipelines $pipename device localchan] set posn 0 set idposn -1 set asideposn -1 foreach t $stack { set id [dict get $t -id] if {$id eq $remove_id} { set idposn $posn break } #look into asides (only can be one for now) if {[llength [dict get $t -aside]]} { set a [dict get $t -aside] if {[dict get $a -id] eq $remove_id} { set asideposn $posn break } } incr posn } if {$asideposn > 0} { #id wasn't found directly in stack, but in an -aside. we don't need to pop anything - just clear this aside record set container [lindex $stack $asideposn] dict set container -aside {} lset stack $asideposn $container dict set pipelines $pipename stack $stack } else { if {$idposn < 0} { ::shellfilter::log::write shellfilter "ERROR shellfilter::stack::remove $pipename id '$remove_id' not found" puts stderr "|WARNING>shellfilter::stack::remove $pipename id '$remove_id' not found" return 0 } set removed_item [lindex $stack $idposn] #include idposn in poplist set poplist [lrange $stack $idposn end] set stack [lreplace $stack $idposn end] #pop all chans before adding anything back in! foreach p $poplist { chan pop $localchan } if {[llength [dict get $removed_item -aside]]} { set restore [dict get $removed_item -aside] set t [dict get $restore -transform] set tsettings [dict get $restore -settings] set obj [$t new $restore] set h [chan push $localchan $obj] dict set restore -handle $h dict set restore -obj $obj lappend stack $restore } #put popped back except for the first one, which we want to remove foreach p [lrange $poplist 1 end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] dict set p -handle $h dict set p -obj $obj lappend stack $p } dict set pipelines $pipename stack $stack } show_pipeline $pipename -note "after_remove $remove_id" return 1 } #pop a number of items of the top of the stack, add our transform record, and add back all (or the tail of poplist if pushstartindex > 0) proc insert_transform {pipename stack transformrecord poplist {pushstartindex 0}} { variable pipelines set bottom_pop_posn [expr {[llength $stack] - [llength $poplist]}] set poplist [lrange $stack $bottom_pop_posn end] set stack [lreplace $stack $bottom_pop_posn end] set localchan [dict get $pipelines $pipename device localchan] foreach p [lreverse $poplist] { chan pop $localchan } set transformname [dict get $transformrecord -transform] set transformsettings [dict get $transformrecord -settings] set obj [$transformname new $transformrecord] set h [chan push $localchan $obj] dict set transformrecord -handle $h dict set transformrecord -obj $obj dict set transformrecord -note "insert_transform" lappend stack $transformrecord foreach p [lrange $poplist $pushstartindex end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] #retain previous -id - code that added it may have kept reference and not expecting it to change dict set p -handle $h dict set p -obj $obj dict set p -note "re-added" lappend stack $p } return $stack } #fifo2 proc new {pipename args} { variable pipelines if {($pipename in [dict keys $pipelines]) || ($pipename in [chan names])} { error "shellfilter::stack::new error: pipename '$pipename' already exists" } set opts [dict merge {-settings {}} $args] set defaultsettings [dict create -raw 1 -buffering line -direction out] set targetsettings [dict merge $defaultsettings [dict get $opts -settings]] set direction [dict get $targetsettings -direction] #pipename is the source/facility-name ? if {$direction eq "out"} { set pipeinfo [shellfilter::pipe::open_out $pipename $targetsettings] } else { puts stderr "|jn> pipe::open_in $pipename $targetsettings" set pipeinfo [shellfilter::pipe::open_in $pipename $targetsettings] } #open_out/open_in will configure buffering based on targetsettings set program_chan [dict get $pipeinfo localchan] set worker_chan [dict get $pipeinfo remotechan] set workertid [dict get $pipeinfo workertid] set deviceinfo [dict create pipename $pipename localchan $program_chan remotechan $worker_chan workertid $workertid direction $direction] dict set pipelines $pipename [list counter 0 device $deviceinfo stack [list]] return $deviceinfo } #we 'add' rather than 'push' because transforms can float,sink and replace/sideline so they don't necessarily go to the top of the transform stack proc add {pipename transformname args} { variable pipelines #chan names doesn't reflect available channels when transforms are in place #e.g stdout may exist but show as something like file191f5b0dd80 if {($pipename ni [dict keys $pipelines])} { if {[catch {eof $pipename} is_eof]} { error "shellfilter::stack::add no existing chan or pipename matching '$pipename' in channels:[chan names] or pipelines:$pipelines use stdin/stderr/stdout or shellfilter::stack::new " } } set args [dict merge {-action "" -settings {}} $args] set action [dict get $args -action] set transformsettings [dict get $args -settings] if {[string first "::" $transformname] < 0} { set transformname ::shellfilter::chan::$transformname } if {![llength [info commands $transformname]]} { error "shellfilter::stack::push unknown transform '$transformname'" } if {![dict exists $pipelines $pipename]} { #pipename must be in chan names - existing device/chan #record a -read and -write end even if the device is only being used as one or the other set deviceinfo [dict create pipename $pipename localchan $pipename remotechan {}] dict set pipelines $pipename [list counter 0 device $deviceinfo stack [list]] } else { set deviceinfo [dict get $pipelines $pipename device] } set id [get_next_counter $pipename] set stack [dict get $pipelines $pipename stack] set localchan [dict get $deviceinfo localchan] #we redundantly store chan in each transform - makes debugging clearer # -encoding similarly could be stored only at the pipeline level (or even queried directly each filter-read/write), # but here it may help detect unexpected changes during lifetime of the stack and avoids the chance of callers incorrectly using the transform handle?) # jn set transform_record [list -id $id -chan $pipename -encoding [chan configure $localchan -encoding] -transform $transformname -aside {} {*}$args] switch -glob -- $action { float - float-locked { set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj lappend stack $transform_record } "" - locked { set floaters [_get_stack_floaters $stack] if {![llength $floaters]} { set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj lappend stack $transform_record } else { set poplist $floaters set stack [insert_transform $pipename $stack $transform_record $poplist] } } "sink*" { set redirinfo [_get_stack_top_redirection $stack] set idx_existing_redir [dict get $redirinfo index] if {$idx_existing_redir == -1} { #no existing redirection transform on the stack #pop everything.. add this record as the first redirection on the stack set poplist $stack set stack [insert_transform $pipename $stack $transform_record $poplist] } else { switch -glob -- $action { "sink-replace" { #include that index in the poplist set poplist [lrange $stack $idx_existing_redir end] #pop all from idx_existing_redir to end, but put back 'lrange $poplist 1 end' set stack [insert_transform $pipename $stack $transform_record $poplist 1] } "sink-aside*" { set existing_redir_record [lindex $stack $idx_existing_redir] if {[string match "*locked*" [dict get $existing_redir_record -action]]} { set put_aside 0 #we can't aside this one - sit above it instead. set poplist [lrange $stack $idx_existing_redir+1 end] set stack [lrange $stack 0 $idx_existing_redir] } else { set put_aside 1 dict set transform_record -aside [lindex $stack $idx_existing_redir] set poplist [lrange $stack $idx_existing_redir end] set stack [lrange $stack 0 $idx_existing_redir-1] } foreach p $poplist { chan pop $localchan } set transformname [dict get $transform_record -transform] set transform_settings [dict get $transform_record -settings] set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj dict set transform_record -note "insert_transform-with-aside" lappend stack $transform_record #add back poplist *except* the one we transferred into -aside (if we were able) foreach p [lrange $poplist $put_aside end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] #retain previous -id - code that added it may have kept reference and not expecting it to change dict set p -handle $h dict set p -obj $obj dict set p -note "re-added-after-sink-aside" lappend stack $p } } default { #plain "sink" #we only sink to the topmost redirecting filter - which makes sense for an output channel #For stdin.. this is more problematic as we're more likely to want to intercept the bottom most redirection. #todo - review. Consider making default insert position for input channels to be at the source... and float/sink from there. # - we don't currently know from the stack api if adding input vs output channel - so this needs work to make intuitive. # consider splitting stack::add to stack::addinput stack::addoutput to split the different behaviour set poplist [lrange $stack $idx_existing_redir+1 end] set stack [insert_transform $pipename $stack $transform_record $poplist] } } } } default { error "shellfilter::stack::add unimplemented action '$action'" } } dict set pipelines $pipename stack $stack #puts stdout "==" #puts stdout "==>stack: $stack" #puts stdout "==" show_pipeline $pipename -note "after_add $transformname $args" return $id } proc show_pipeline {pipename args} { variable pipelines set stack [dict get $pipelines $pipename stack] set tag "SHELLFILTER::STACK" #JMN - load from config #::shellfilter::log::open $tag {-syslog 127.0.0.1:514} ::shellfilter::log::open $tag {-syslog ""} ::shellfilter::log::write $tag "transform stack for $pipename $args" foreach tf $stack { ::shellfilter::log::write $tag " $tf" } } } namespace eval shellfilter { variable sources [list] variable stacks [dict create] proc ::shellfilter::redir_channel_to_log {chan args} { variable sources set default_logsettings [dict create \ -tag redirected_$chan -syslog "" -file ""\ ] if {[dict exists $args -action]} { set action [dict get $args -action] } else { # action "sink" is a somewhat reasonable default for an output redirection transform # but it can make it harder to configure a plain ordered stack if the user is not expecting it, so we'll default to stack # also.. for stdin transform sink makes less sense.. #todo - default "stack" instead of empty string set action "" } if {[dict exists $args -settings]} { set logsettings [dict get $args -settings] } else { set logsettings {} } set logsettings [dict merge $default_logsettings $logsettings] set tag [dict get $logsettings -tag] if {$tag ni $sources} { lappend sources $tag } set id [shellfilter::stack::add $chan logonly -action $action -settings $logsettings] return $id } proc ::shellfilter::redir_output_to_log {tagprefix args} { variable sources set default_settings [list -tag ${tagprefix} -syslog "" -file ""] set opts [dict create -action "" -settings {}] set opts [dict merge $opts $args] set optsettings [dict get $opts -settings] set settings [dict merge $default_settings $optsettings] set tag [dict get $settings -tag] if {$tag ne $tagprefix} { error "shellfilter::redir_output_to_log -tag value must match supplied tagprefix:'$tagprefix'. Omit -tag, or make it the same. It will automatically be suffixed with stderr and stdout. Use redir_channel_to_log if you want to separately configure each channel" } lappend sources ${tagprefix}stdout ${tagprefix}stderr set stdoutsettings $settings dict set stdoutsettings -tag ${tagprefix}stdout set stderrsettings $settings dict set stderrsettings -tag ${tagprefix}stderr set idout [redir_channel_to_log stdout -action [dict get $opts -action] -settings $stdoutsettings] set iderr [redir_channel_to_log stderr -action [dict get $opts -action] -settings $stderrsettings] return [list $idout $iderr] } #return a dict keyed on numerical list index showing info about each element # - particularly # 'wouldbrace' to indicate that the item would get braced by Tcl when added to another list # 'head_tail_chars' to show current first and last character (in case it's wrapped e.g in double or single quotes or an existing set of braces) proc list_element_info {inputlist} { set i 0 set info [dict create] set testlist [list] foreach original_item $inputlist { #--- # avoid sharing internal rep with original items in the list (avoids shimmering of rep in original list for certain items such as paths) unset -nocomplain item append item $original_item {} #--- set iteminfo [dict create] set itemlen [string length $item] lappend testlist $item set tcl_len [string length $testlist] set diff [expr {$tcl_len - $itemlen}] if {$diff == 0} { dict set iteminfo wouldbrace 0 dict set iteminfo wouldescape 0 } else { #test for escaping vs bracing! set testlistchars [split $testlist ""] if {([lindex $testlistchars 0] eq "\{") && ([lindex $testlistchars end] eq "\}")} { dict set iteminfo wouldbrace 1 dict set iteminfo wouldescape 0 } else { dict set iteminfo wouldbrace 0 dict set iteminfo wouldescape 1 } } set testlist [list] set charlist [split $item ""] set char_a [lindex $charlist 0] set char_b [lindex $charlist 1] set char_ab ${char_a}${char_b} set char_y [lindex $charlist end-1] set char_z [lindex $charlist end] set char_yz ${char_y}${char_z} if { ("{" in $charlist) || ("}" in $charlist) } { dict set iteminfo has_braces 1 set innerchars [lrange $charlist 1 end-1] if {("{" in $innerchars) || ("}" in $innerchars)} { dict set iteminfo has_inner_braces 1 } else { dict set iteminfo has_inner_braces 0 } } else { dict set iteminfo has_braces 0 dict set iteminfo has_inner_braces 0 } #todo - brace/char counting to determine if actually 'wrapped' #e.g we could have list element {((abc)} - which appears wrapped if only looking at first and last chars. #also {(x) (y)} as a list member.. how to treat? if {$itemlen <= 1} { dict set iteminfo apparentwrap "not" } else { #todo - switch on $char_a$char_z if {($char_a eq {"}) && ($char_z eq {"})} { dict set iteminfo apparentwrap "doublequotes" } elseif {($char_a eq "'") && ($char_z eq "'")} { dict set iteminfo apparentwrap "singlequotes" } elseif {($char_a eq "(") && ($char_z eq ")")} { dict set iteminfo apparentwrap "brackets" } elseif {($char_a eq "\{") && ($char_z eq "\}")} { dict set iteminfo apparentwrap "braces" } elseif {($char_a eq "^") && ($char_z eq "^")} { dict set iteminfo apparentwrap "carets" } elseif {($char_a eq "\[") && ($char_z eq "\]")} { dict set iteminfo apparentwrap "squarebrackets" } elseif {($char_a eq "`") && ($char_z eq "`")} { dict set iteminfo apparentwrap "backquotes" } elseif {($char_a eq "\n") && ($char_z eq "\n")} { dict set iteminfo apparentwrap "lf-newline" } elseif {($char_ab eq "\r\n") && ($char_yz eq "\r\n")} { dict set iteminfo apparentwrap "crlf-newline" } else { dict set iteminfo apparentwrap "not-determined" } } dict set iteminfo wrapbalance "unknown" ;#a hint to caller that apparentwrap is only a guide. todo - possibly make wrapbalance indicate 0 for unbalanced.. and positive numbers for outer-count of wrappings. #e.g {((x)} == 0 {((x))} == 1 {(x) (y (z))} == 2 dict set iteminfo head_tail_chars [list $char_a $char_z] set namemap [list \ \r cr\ \n lf\ {"} doublequote\ {'} singlequote\ "`" backquote\ "^" caret\ \t tab\ " " sp\ "\[" lsquare\ "\]" rsquare\ "(" lbracket\ ")" rbracket\ "\{" lbrace\ "\}" rbrace\ \\ backslash\ / forwardslash\ ] if {[string length $char_a]} { set char_a_name [string map $namemap $char_a] } else { set char_a_name "emptystring" } if {[string length $char_z]} { set char_z_name [string map $namemap $char_z] } else { set char_z_name "emptystring" } dict set iteminfo head_tail_names [list $char_a_name $char_z_name] dict set iteminfo len $itemlen dict set iteminfo difflen $diff ;#2 for braces, 1 for quoting?, or 0. dict set info $i $iteminfo incr i } return $info } #parse bracketed expression (e.g produced by vim "shellxquote=(" ) into a tcl (nested) list #e.g {(^c:/my spacey/path^ >^somewhere^)} #e.g {(blah (etc))}" #Result is always a list - even if only one toplevel set of brackets - so it may need [lindex $result 0] if input is the usual case of {( ...)} # - because it also supports the perhaps less likely case of: {( ...) unbraced (...)} etc # Note that #maintenance warning - duplication in branches for bracketed vs unbracketed! proc parse_cmd_brackets {str} { #wordwrappers currently best suited to non-bracket entities - no bracket matching within - anything goes until end-token reached. # - but.. they only take effect where a word can begin. so a[x y] may be split at the space unless it's within some other wraper e.g " a[x y]" will not break at the space # todo - consider extending the in-word handling of word_bdepth which is currently only applied to () i.e aaa(x y) is supported but aaa[x y] is not as the space breaks the word up. set wordwrappers [list \ "\"" [list "\"" "\"" "\""]\ {^} [list "\"" "\"" "^"]\ "'" [list "'" "'" "'"]\ "\{" [list "\{" "\}" "\}"]\ {[} [list {[} {]} {]}]\ ] ;#dict mapping start_character to {replacehead replacetail expectedtail} set shell_specials [list "|" "|&" "<" "<@" "<<" ">" "2>" ">&" ">>" "2>>" ">>&" ">@" "2>@" "2>@1" ">&@" "&" "&&" ] ;#words/chars that may precede an opening bracket but don't merge with the bracket to form a word. #puts "pb:$str" set in_bracket 0 set in_word 0 set word "" set result {} set word_bdepth 0 set word_bstack [list] set wordwrap "" ;#only one active at a time set bracketed_elements [dict create] foreach char [split $str ""] { #puts "c:$char bracketed:$bracketed_elements" if {$in_bracket > 0} { if {$in_word} { if {[string length $wordwrap]} { #anything goes until end-char #todo - lookahead and only treat as closing if before a space or ")" ? lassign [dict get $wordwrappers $wordwrap] _open closing endmark if {$char eq $endmark} { set wordwrap "" append word $closing dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } else { append word $char } } else { if {$word_bdepth == 0} { #can potentially close off a word - or start a new one if word-so-far is a shell-special if {$word in $shell_specials} { if {$char eq ")"} { dict lappend bracketed_elements $in_bracket $word set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } set word "" set in_word 0 } elseif {[regexp {[\s]} $char]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } elseif {$char eq "("} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 incr in_bracket } else { #at end of shell-specials is another point to look for word started by a wordwrapper char #- expect common case of things like >^/my/path^ if {$char in [dict keys $wordwrappers]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 1 ;#just for explicitness.. we're straight into the next word. set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { #something unusual.. keep going with word! append word $char } } } else { if {$char eq ")"} { dict lappend bracketed_elements $in_bracket $word set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } set word "" set in_word 0 } elseif {[regexp {[\s]} $char]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } elseif {$char eq "("} { #ordinary word up-against and opening bracket - brackets are part of word. incr word_bdepth append word "(" } else { append word $char } } } else { #currently only () are used for word_bdepth - todo add all or some wordwrappers chars so that the word_bstack can have multiple active. switch -- $char { "(" { incr word_bdepth lappend word_bstack $char append word $char } ")" { incr word_bdepth -1 set word_bstack [lrange $word_bstack 0 end-1] append word $char } default { #spaces and chars added to word as it's still in a bracketed section append word $char } } } } } else { if {$char eq "("} { incr in_bracket } elseif {$char eq ")"} { set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } } elseif {[regexp {[\s]} $char]} { # } else { #first char of word - look for word-wrappers if {$char in [dict keys $wordwrappers]} { set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { set word $char } set in_word 1 } } } else { if {$in_word} { if {[string length $wordwrap]} { lassign [dict get $wordwrappers $wordwrap] _open closing endmark if {$char eq $endmark} { set wordwrap "" append word $closing lappend result $word set word "" set in_word 0 } else { append word $char } } else { if {$word_bdepth == 0} { if {$word in $shell_specials} { if {[regexp {[\s]} $char]} { lappend result $word set word "" set in_word 0 } elseif {$char eq "("} { lappend result $word set word "" set in_word 0 incr in_bracket } else { #at end of shell-specials is another point to look for word started by a wordwrapper char #- expect common case of things like >^/my/path^ if {$char in [dict keys $wordwrappers]} { lappend result $word set word "" set in_word 1 ;#just for explicitness.. we're straight into the next word. set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { #something unusual.. keep going with word! append word $char } } } else { if {[regexp {[\s)]} $char]} { lappend result $word set word "" set in_word 0 } elseif {$char eq "("} { incr word_bdepth append word $char } else { append word $char } } } else { switch -- $char { "(" { incr word_bdepth append word $char } ")" { incr word_bdepth -1 append word $char } default { append word $char } } } } } else { if {[regexp {[\s]} $char]} { #insig whitespace(?) } elseif {$char eq "("} { incr in_bracket dict set bracketed_elements $in_bracket [list] } elseif {$char eq ")"} { error "unbalanced bracket - unable to proceed result so far: $result bracketed_elements:$bracketed_elements" } else { #first char of word - look for word-wrappers if {$char in [dict keys $wordwrappers]} { set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { set word $char } set in_word 1 } } } #puts "----$bracketed_elements" } if {$in_bracket > 0} { error "shellfilter::parse_cmd_brackets missing close bracket. input was '$str'" } if {[dict exists $bracketed_elements 0]} { #lappend result [lindex [dict get $bracketed_elements 0] 0] lappend result [dict get $bracketed_elements 0] } if {$in_word} { lappend result $word } return $result } #only double quote if argument not quoted with single or double quotes proc dquote_if_not_quoted {a} { set wrapchars [string cat [string range $a 0 0] [string range $a end end]] switch -- $wrapchars { {""} - {''} { return $a } default { set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } } } #proc dquote_if_not_bracketed/braced? #wrap in double quotes if not double-quoted proc dquote_if_not_dquoted {a} { set wrapchars [string cat [string range $a 0 0] [string range $a end end]] switch -- $wrapchars { {""} { return $a } default { #escape any inner quotes.. set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } } } proc dquote {a} { #escape any inner quotes.. set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } proc get_scriptrun_from_cmdlist_dquote_if_not {cmdlist {shellcmdflag ""}} { set scr [auto_execok "script"] if {[string length $scr]} { #set scriptrun "( $c1 [lrange $cmdlist 1 end] )" set arg1 [lindex $cmdlist 0] if {[string first " " $arg1]>0} { set c1 [dquote_if_not_quoted $arg1] #set c1 "\"$arg1\"" } else { set c1 $arg1 } if {[string length $shellcmdflag]} { set scriptrun "$shellcmdflag \$($c1 " } else { set scriptrun "\$($c1 " } #set scriptrun "$c1 " foreach a [lrange $cmdlist 1 end] { #set a [string map [list "/" "//"] $a] #set a [string map [list "\"" "\\\""] $a] if {[string first " " $a] > 0} { append scriptrun [dquote_if_not_quoted $a] } else { append scriptrun $a } append scriptrun " " } set scriptrun [string trim $scriptrun] append scriptrun ")" #return [list $scr -q -e -c $scriptrun /dev/null] return [list $scr -e -c $scriptrun /dev/null] } else { return $cmdlist } } proc ::shellfilter::trun {commandlist args} { #jmn } # run a command (or tcl script) with tees applied to stdout/stderr/stdin (or whatever channels are being used) # By the point run is called - any transforms should already be in place on the channels if they're needed. # The tees will be inline with none,some or all of those transforms depending on how the stack was configured # (upstream,downstream configured via -float,-sink etc) proc ::shellfilter::run {commandlist args} { #must be a list. If it was a shell commandline string. convert it elsewhere first. variable sources set runtag "shellfilter-run" #set tid [::shellfilter::log::open $runtag [list -syslog 127.0.0.1:514]] set tid [::shellfilter::log::open $runtag [list -syslog ""]] if {[catch {llength $commandlist} listlen]} { set listlen "" } ::shellfilter::log::write $runtag " commandlist:'$commandlist' listlen:$listlen strlen:[string length $commandlist]" #flush stdout #flush stderr #adding filters with sink-aside will temporarily disable the existing redirection #All stderr/stdout from the shellcommand will now tee to the underlying stderr/stdout as well as the configured syslog set defaults [dict create \ -teehandle command \ -outchan stdout \ -errchan stderr \ -inchan stdin \ -tclscript 0 \ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set outchan [dict get $opts -outchan] set errchan [dict get $opts -errchan] set inchan [dict get $opts -inchan] set teehandle [dict get $opts -teehandle] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set is_script [dict get $opts -tclscript] dict unset opts -tclscript ;#don't pass it any further # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set teehandle_out ${teehandle}out ;#default commandout set teehandle_err ${teehandle}err set teehandle_in ${teehandle}in #puts stdout "shellfilter initialising tee_to_pipe transforms for in/out/err" # sources should be added when stack::new called instead(?) foreach source [list $teehandle_out $teehandle_err] { if {$source ni $sources} { lappend sources $source } } set outdeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_out device] set outpipechan [dict get $outdeviceinfo localchan] set errdeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_err device] set errpipechan [dict get $errdeviceinfo localchan] #set indeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_in device] #set inpipechan [dict get $indeviceinfo localchan] #NOTE:These transforms are not necessarily at the top of each stack! #The float/sink mechanism, along with whether existing transforms are diversionary decides where they sit. set id_out [shellfilter::stack::add $outchan tee_to_pipe -action sink-aside -settings [list -tag $teehandle_out -pipechan $outpipechan]] set id_err [shellfilter::stack::add $errchan tee_to_pipe -action sink-aside -settings [list -tag $teehandle_err -pipechan $errpipechan]] # need to use os level channel handle for stdin - try named pipes (or even sockets) instead of fifo2 for this # If non os-level channel - the command can't be run with the redirection # stderr/stdout can be run with non-os handles in the call - # but then it does introduce issues with terminal-detection and behaviour for stdout at least # # input is also a tee - we never want to change the source at this point - just log/process a side-channel of it. # #set id_in [shellfilter::stack::add $inchan tee_to_pipe -action sink-aside -settings [list -tag commandin -pipechan $inpipechan]] #set id_out [shellfilter::stack::add stdout tee_to_log -action sink-aside -settings [list -tag shellstdout -syslog 127.0.0.1:514 -file ""]] #set id_err [shellfilter::stack::add stderr tee_to_log -action sink-aside -settings [list -tag shellstderr -syslog 127.0.0.1:514 -file "stderr.txt"]] #we need to catch errors - and ensure stack::remove calls occur. #An error can be raised if the command couldn't even launch, as opposed to a non-zero exitcode and stderr output from the command itself. # if {!$is_script} { set experiment 0 if $experiment { try { set results [exec {*}$commandlist] set exitinfo [list exitcode 0] } trap CHILDSTATUS {results options} { set exitcode [lindex [dict get $options -errorcode] 2] set exitinfo [list exitcode $exitcode] } } else { if {[catch { #run process with stdout/stderr/stdin or with configured channels #set exitinfo [shellcommand_stdout_stderr $commandlist $outchan $errchan $inpipechan {*}$opts] set exitinfo [shellcommand_stdout_stderr $commandlist $outchan $errchan stdin {*}$opts] #puts stderr "---->exitinfo $exitinfo" #subprocess result should usually have an "exitcode" key #but for background execution we will get a "pids" key of process ids. } errMsg]} { set exitinfo [list error "$errMsg" source shellcommand_stdout_stderr] } } } else { if {[catch { #script result set exitinfo [list result [uplevel #0 [list eval $commandlist]]] } errMsg]} { set exitinfo [list error "$errMsg" errorCode $::errorCode errorInfo "$::errorInfo"] } } #the previous redirections on the underlying inchan/outchan/errchan items will be restored from the -aside setting during removal #Remove execution-time Tees from stack shellfilter::stack::remove stdout $id_out shellfilter::stack::remove stderr $id_err #shellfilter::stack::remove stderr $id_in #chan configure stderr -buffering line #flush stdout ::shellfilter::log::write $runtag " return '$exitinfo'" ::shellfilter::log::close $runtag return $exitinfo } proc ::shellfilter::logtidyup { {tags {}} } { variable sources set worker_errorlist [list] set tidied_sources [list] set tidytag "logtidy" # opening a thread or writing to a log/syslog close to possible process exit is probably not a great idea. # we should ensure the thread already exists early on if we really need logging here. # #set tid [::shellfilter::log::open $tidytag {-syslog 127.0.0.1:514}] #::shellfilter::log::write $tidytag " logtidyuptags '$tags'" foreach s $sources { if {$s eq $tidytag} { continue } #puts "logtidyup source $s" set close 1 if {[llength $tags]} { if {$s ni $tags} { set close 0 } } if {$close} { lappend tidied_sources $s shellfilter::log::close $s lappend worker_errorlist {*}[shellthread::manager::get_and_clear_errors $s] } } set remaining_sources [list] foreach s $sources { if {$s ni $tidied_sources} { lappend remaining_sources $s } } #set sources [concat $remaining_sources $tidytag] set sources $remaining_sources #shellfilter::stack::unwind stdout #shellfilter::stack::unwind stderr return [list tidied $tidied_sources errors $worker_errorlist] } #package require tcl::chan::null # e.g set errchan [tcl::chan::null] # e.g chan push stdout [shellfilter::chan::var new ::some_var] proc ::shellfilter::shellcommand_stdout_stderr {commandlist outchan errchan inchan args} { set valid_flags [list \ -timeout \ -outprefix \ -errprefix \ -debug \ -copytempfile \ -outbuffering \ -errbuffering \ -inbuffering \ -readprocesstranslation \ -outtranslation \ -stdinhandler \ -outchan \ -errchan \ -inchan \ -teehandle\ ] set runtag shellfilter-run2 #JMN - load from config #set tid [::shellfilter::log::open $runtag [list -syslog "127.0.0.1:514"]] set tid [::shellfilter::log::open $runtag [list -syslog ""]] if {([llength $args] % 2) != 0} { error "Trailing arguments after any positional arguments must be in pairs of the form -argname argvalue. Valid flags are:'$valid_flags'" } set invalid_flags [list] foreach {k -} $args { switch -- $k { -timeout - -outprefix - -errprefix - -debug - -copytempfile - -outbuffering - -errbuffering - -inbuffering - -readprocesstranslation - -outtranslation - -stdinhandler - -outchan - -errchan - -inchan - -teehandle { } default { lappend invalid_flags $k } } } if {[llength $invalid_flags]} { error "Unknown option(s)'$invalid_flags': must be one of '$valid_flags'" } #line buffering generally best for output channels.. keeps relative output order of stdout/stdin closer to source order #there may be data where line buffering is inappropriate, so it's configurable per std channel #reading inputs with line buffering can result in extraneous newlines as we can't detect trailing data with no newline before eof. set defaults [dict create \ -outchan stdout \ -errchan stderr \ -inchan stdin \ -outbuffering none \ -errbuffering none \ -readprocesstranslation auto \ -outtranslation lf \ -inbuffering none \ -timeout 900000\ -outprefix ""\ -errprefix ""\ -debug 0\ -copytempfile 0\ -stdinhandler ""\ ] set args [dict merge $defaults $args] set outbuffering [dict get $args -outbuffering] set errbuffering [dict get $args -errbuffering] set inbuffering [dict get $args -inbuffering] set readprocesstranslation [dict get $args -readprocesstranslation] set outtranslation [dict get $args -outtranslation] set timeout [dict get $args -timeout] set outprefix [dict get $args -outprefix] set errprefix [dict get $args -errprefix] set debug [dict get $args -debug] set copytempfile [dict get $args -copytempfile] set stdinhandler [dict get $args -stdinhandler] set debugname "shellfilter-debug" if {$debug} { set tid [::shellfilter::log::open $debugname [list -syslog "127.0.0.1:514"]] ::shellfilter::log::write $debugname " commandlist '$commandlist'" } #'clock micros' good enough id for shellcommand calls unless one day they can somehow be called concurrently or sequentially within a microsecond and within the same interp. # a simple counter would probably work too #consider other options if an alternative to the single vwait in this function is used. set call_id [tcl::clock::microseconds] ; set ::shellfilter::shellcommandvars($call_id,exitcode) "" set waitvar ::shellfilter::shellcommandvars($call_id,waitvar) if {$debug} { ::shellfilter::log::write $debugname " waitvar '$waitvar'" } lassign [chan pipe] rderr wrerr chan configure $wrerr -blocking 0 set custom_stderr "" set lastitem [lindex $commandlist end] #todo - ensure we can handle 2> file (space after >) #review - reconsider the handling of redirections such that tcl-style are handled totally separately to other shell syntaxes! # #note 2>@1 must ocur as last word for tcl - but 2@stdout can occur elsewhere #(2>@stdout echoes to main stdout - not into pipeline) #To properly do pipelines it looks like we will have to split on | and call this proc multiple times and wire it up accordingly (presumably in separate threads) switch -- [string trim $lastitem] { {&} { set name [lindex $commandlist 0] #background execution - stdout and stderr from child still comes here - but process is backgrounded #FIX! - this is broken for paths with backslashes for example #set pidlist [exec {*}[concat $name [lrange $commandlist 1 end]]] set pidlist [exec {*}$commandlist] return [list pids $pidlist] } {2>&1} - {2>@1} { set custom_stderr {2>@1} ;#use the tcl style set commandlist [lrange $commandlist 0 end-1] } default { # 2> filename # 2>> filename # 2>@ openfileid set redir2test [string range $lastitem 0 1] if {$redir2test eq "2>"} { set custom_stderr $lastitem set commandlist [lrange $commandlist 0 end-1] } } } set lastitem [lindex $commandlist end] set teefile "" ;#empty string, write, append #an ugly hack.. because redirections seem to arrive wrapped - review! #There be dragons here.. #Be very careful with list manipulation of the commandlist string.. backslashes cause havoc. commandlist must always be a well-formed list. generally avoid string manipulations on entire list or accidentally breaking a list element into parts if it shouldn't be.. #The problem here - is that we can't always know what was intended on the commandline regarding quoting ::shellfilter::log::write $runtag "checking for redirections in $commandlist" #sometimes we see a redirection without a following space e.g >C:/somewhere #normalize switch -regexp -- $lastitem\ {^>[/[:alpha:]]+} { set lastitem "> [string range $lastitem 1 end]" }\ {^>>[/[:alpha:]]+} { set lastitem ">> [string range $lastitem 2 end]" } #for a redirection, we assume either a 2-element list at tail of form {> {some path maybe with spaces}} #or that the tail redirection is not wrapped.. x y z > {some path maybe with spaces} #we can't use list methods such as llenth on a member of commandlist set wordlike_parts [regexp -inline -all {\S+} $lastitem] if {([llength $wordlike_parts] >= 2) && ([lindex $wordlike_parts 0] in [list ">>" ">"])} { #wrapped redirection - but maybe not 'well' wrapped (unquoted filename) set lastitem [string trim $lastitem] ;#we often see { > something} #don't use lassign or lrange on the element itself without checking first #we can treat the commandlist as a whole as a well formed list but not neccessarily each element within. #lassign $lastitem redir redirtarget #set commandlist [lrange $commandlist 0 end-1] # set itemchars [split $lastitem ""] set firstchar [lindex $itemchars 0] set lastchar [lindex $itemchars end] #NAIVE test for double quoted only! #consider for example {"a" x="b"} #testing first and last is not decisive #We need to decide what level of drilling down is even appropriate here.. #if something was double wrapped - it was perhaps deliberate so we don't interpret it as something(?) set head_tail_chars [list $firstchar $lastchar] set doublequoted [expr {[llength [lsearch -all $head_tail_chars "\""]] == 2}] if {[string equal "\{" $firstchar] && [string equal "\}" $lastchar]} { set curlyquoted 1 } else { set curlyquoted 0 } if {$curlyquoted} { #these are not the tcl protection brackets but ones supplied in the argument #it's still not valid to use list operations on a member of the commandlist set inner [string range $lastitem 1 end-1] #todo - fix! we still must assume there could be list-breaking data! set innerwords [regexp -inline -all {\S+} $inner] ;#better than [split $inner] because we don't get extra empty elements for each whitespace char set redir [lindex $innerwords 0] ;#a *potential* redir - to be tested below set redirtarget [lrange $innerwords 1 end] ;#all the rest } elseif {$doublequoted} { ::shellfilter::log::write $debugname "doublequoting at tail of command '$commandlist'" set inner [string range $lastitem 1 end-1] set innerwords [regexp -inline -all {\S+} $inner] set redir [lindex $innerwords 0] set redirtarget [lrange $innerwords 1 end] } else { set itemwords [regexp -inline -all {\S+} $lastitem] # e.g > c:\test becomes > {c:\test} # but > c/mnt/c/test/temp.txt stays as > /mnt/c/test/temp.txt set redir [lindex $itemwords 0] set redirtarget [lrange $itemwords 1 end] } set commandlist [lrange $commandlist 0 end-1] } elseif {[lindex $commandlist end-1] in [list ">>" ">"]} { #unwrapped redirection #we should be able to use list operations like lindex and lrange here as the command itself is hopefully still a well formed list set redir [lindex $commandlist end-1] set redirtarget [lindex $commandlist end] set commandlist [lrange $commandlist 0 end-2] } else { #no redirection set redir "" set redirtarget "" #no change to command list } switch -- $redir { ">>" - ">" { set redirtarget [string trim $redirtarget "\""] ::shellfilter::log::write $runtag " have redirection '$redir' to '$redirtarget'" set winfile $redirtarget ;#default assumption switch -glob -- $redirtarget { "/c/*" { set winfile "c:/[string range $redirtarget 3 end]" } "/mnt/c/*" { set winfile "c:/[string range $redirtarget 7 end]" } } if {[file exists [file dirname $winfile]]} { #containing folder for target exists if {$redir eq ">"} { set teefile "write" } else { set teefile "append" } ::shellfilter::log::write $runtag "Directory exists '[file dirname $winfile]' operation:$teefile" } else { #we should be writing to a file.. but can't ::shellfilter::log::write $runtag "cannot verify directory exists '[file dirname $winfile]'" } } default { ::shellfilter::log::write $runtag "No redir found!!" } } #often first element of command list is wrapped and cannot be run directly #e.g {{ls -l} {> {temp.tmp}}} #we will assume that if there is a single element which is a pathname containing a space - it is doubly wrapped. # this may not be true - and the command may fail if it's just {c:\program files\etc} but it is the less common case and we currently have no way to detect. #unwrap first element.. will not affect if not wrapped anyway (subject to comment above re spaces) set commandlist [concat [lindex $commandlist 0] [lrange $commandlist 1 end]] #todo? #child process environment. # - to pass a different environment to the child - we would need to save the env array, modify as required, and then restore the env array. #to restore buffering states after run set remember_in_out_err_buffering [list \ [chan configure $inchan -buffering] \ [chan configure $outchan -buffering] \ [chan configure $errchan -buffering] \ ] set remember_in_out_err_translation [list \ [chan configure $inchan -translation] \ [chan configure $outchan -translation] \ [chan configure $errchan -translation] \ ] chan configure $inchan -buffering $inbuffering -blocking 0 ;#we are setting up a readable handler for this - so non-blocking ok chan configure $errchan -buffering $errbuffering #chan configure $outchan -blocking 0 chan configure $outchan -buffering $outbuffering ;#don't configure non-blocking. weird duplicate of *second* line occurs if you do. # #-------------------------------------------- #Tested on windows. Works to stop in output when buffering is none, reading from channel with -translation auto #cmd, pwsh, tcl #chan configure $outchan -translation lf #chan configure $errchan -translation lf #-------------------------------------------- chan configure $outchan -translation $outtranslation chan configure $errchan -translation $outtranslation #puts stderr "chan configure $wrerr [chan configure $wrerr]" if {$debug} { ::shellfilter::log::write $debugname "COMMAND [list $commandlist] strlen:[string length $commandlist] llen:[llength $commandlist]" } #todo - handle custom redirection of stderr to a file? if {[string length $custom_stderr]} { #::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist $custom_stderr] a+" #set rdout [open |[concat $commandlist $custom_stderr] a+] ::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist [list $custom_stderr <@$inchan]] [list RDONLY]" set rdout [open |[concat $commandlist [list <@$inchan $custom_stderr]] [list RDONLY]] set rderr "bogus" ;#so we don't wait for it } else { ::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist [list 2>@$wrerr <@$inchan]] [list RDONLY]" #set rdout [open |[concat $commandlist [list 2>@$wrerr]] a+] #set rdout [open |[concat $commandlist [list 2>@$wrerr]] [list RDWR]] # If we don't redirect stderr to our own tcl-based channel - then the transforms don't get applied. # This is the whole reason we need these file-event loops. # Ideally we need something like exec,open in tcl that interacts with transformed channels directly and emits as it runs, not only at termination # - and that at least appears like a terminal to the called command. #set rdout [open |[concat $commandlist [list 2>@stderr <@$inchan]] [list RDONLY]] set rdout [open |[concat $commandlist [list 2>@$wrerr <@$inchan]] [list RDONLY]] chan configure $rderr -buffering $errbuffering -blocking 0 chan configure $rderr -translation $readprocesstranslation } set command_pids [pid $rdout] #puts stderr "command_pids: $command_pids" #tcl::process ensemble only available in 8.7+ - and it didn't prove useful here anyway # the child process generally won't shut down until channels are closed. # premature EOF on grandchild process launch seems to be due to lack of terminal emulation when redirecting stdin/stdout. # worked around in punk/repl using 'script' command as a fake tty. #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $command_pids 0] ni $subprocesses} { # puts stderr "pid [lindex $command_pids 0] not running $errMsg" #} else { # puts stderr "pid [lindex $command_pids 0] is running" #} if {$debug} { ::shellfilter::log::write $debugname "pipeline pids: $command_pids" } #jjj chan configure $rdout -buffering $outbuffering -blocking 0 chan configure $rdout -translation $readprocesstranslation if {![string length $custom_stderr]} { chan event $rderr readable [list apply {{chan other wrerr outchan errchan waitfor errprefix errbuffering debug debugname pids} { if {$errbuffering eq "line"} { set countchunk [chan gets $chan chunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved #errprefix only applicable to line buffered output if {$countchunk >= 0} { if {[chan eof $chan]} { puts -nonewline $errchan ${errprefix}$chunk } else { puts $errchan "${errprefix}$chunk" } } } else { set chunk [chan read $chan] if {[string length $chunk]} { puts -nonewline $errchan $chunk } } if {[chan eof $chan]} { flush $errchan ;#jmn #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $pids 0] ni $subprocesses} { # puts stderr "stderr reader: pid [lindex $pids 0] no longer running" #} else { # puts stderr "stderr reader: pid [lindex $pids 0] still running" #} chan close $chan #catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stderr } } }} $rderr $rdout $wrerr $outchan $errchan $waitvar $errprefix $errbuffering $debug $debugname $command_pids] } #todo - handle case where large amount of stdin coming in faster than rdout can handle #as is - arbitrary amount of memory could be used because we aren't using a filevent for rdout being writable # - we're just pumping it in to the non-blocking rdout buffers # ie there is no backpressure and stdin will suck in as fast as possible. # for most commandlines this probably isn't too big a deal.. but it could be a problem for multi-GB disk images etc # # ## Note - detecting trailing missing nl before eof is basically the same here as when reading rdout from executable # - but there is a slight difference in that with rdout we get an extra blocked state just prior to the final read. # Not known if that is significant ## with inchan configured -buffering line #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl -r cat #warning reading input with -buffering line. Cannot detect missing trailing-newline at eof #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl -r cat #warning reading input with -buffering line. Cannot detect missing trailing-newline at eof #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:1 pend:-1 count:3 #etc if 0 { chan event $inchan readable [list apply {{chan wrchan inbuffering waitfor} { #chan copy stdin $chan ;#doesn't work in a chan event if {$inbuffering eq "line"} { set countchunk [chan gets $chan chunk] #puts $wrchan "stdinstate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" if {$countchunk >= 0} { if {[chan eof $chan]} { puts -nonewline $wrchan $chunk } else { puts $wrchan $chunk } } } else { set chunk [chan read $chan] if {[string length $chunk]} { puts -nonewline $wrchan $chunk } } if {[chan eof $chan]} { puts stderr "|stdin_reader>eof [chan configure stdin]" chan event $chan readable {} #chan close $chan chan close $wrchan write ;#half close #set $waitfor "stdin" } }} $inchan $rdout $inbuffering $waitvar] if {[string length $stdinhandler]} { chan configure stdin -buffering line -blocking 0 chan event stdin readable $stdinhandler } } set actual_proc_out_buffering [chan configure $rdout -buffering] set actual_outchan_buffering [chan configure $outchan -buffering] #despite whatever is configured - we match our reading to how we need to output set read_proc_out_buffering $actual_outchan_buffering if {[string length $teefile]} { set logname "redir_[string map {: _} $winfile]_[tcl::clock::microseconds]" set tid [::shellfilter::log::open $logname {-syslog 127.0.0.1:514}] if {$teefile eq "write"} { ::shellfilter::log::write $logname "opening '$winfile' for write" set fd [open $winfile w] } else { ::shellfilter::log::write $logname "opening '$winfile' for appending" set fd [open $winfile a] } #chan configure $fd -translation lf chan configure $fd -translation $outtranslation chan configure $fd -encoding utf-8 set tempvar_bytetotal [namespace current]::totalbytes[tcl::clock::microseconds] set $tempvar_bytetotal 0 chan event $rdout readable [list apply {{chan other wrerr outchan errchan read_proc_out_buffering waitfor outprefix call_id debug debugname writefile writefilefd copytempfile bytevar logtag} { #review - if we write outprefix to normal stdout.. why not to redirected file? #usefulness of outprefix is dubious upvar $bytevar totalbytes if {$read_proc_out_buffering eq "line"} { #set outchunk [chan read $chan] set countchunk [chan gets $chan outchunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved if {$countchunk >= 0} { if {![chan eof $chan]} { set numbytes [expr {[string length $outchunk] + 1}] ;#we are assuming \n not \r\n - but count won't/can't be completely accurate(?) - review puts $writefilefd $outchunk } else { set numbytes [string length $outchunk] puts -nonewline $writefilefd $outchunk } incr totalbytes $numbytes ::shellfilter::log::write $logtag "${outprefix} wrote $numbytes bytes to $writefile" #puts $outchan "${outprefix} wrote $numbytes bytes to $writefile" } } else { set outchunk [chan read $chan] if {[string length $outchunk]} { puts -nonewline $writefilefd $outchunk set numbytes [string length $outchunk] incr totalbytes $numbytes ::shellfilter::log::write $logtag "${outprefix} wrote $numbytes bytes to $writefile" } } if {[chan eof $chan]} { flush $writefilefd ;#jmn #set blocking so we can get exit code chan configure $chan -blocking 1 catch {::shellfilter::log::write $logtag "${outprefix} total bytes $totalbytes written to $writefile"} #puts $outchan "${outprefix} total bytes $totalbytes written to $writefile" catch {close $writefilefd} if {$copytempfile} { catch {file copy $writefile "[file rootname $writefile]_copy[file extension $writefile]"} } try { chan close $chan set ::shellfilter::shellcommandvars($call_id,exitcode) 0 if {$debug} { ::shellfilter::log::write $debugname "(teefile) -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] if {$debug} { ::shellfilter::log::write $debugname "(teefile) CHILD PROCESS EXITED with code: $code" } set ::shellfilter::shellcommandvars($call_id,exitcode) $code } catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stdout } } }} $rdout $rderr $wrerr $outchan $errchan $read_proc_out_buffering $waitvar $outprefix $call_id $debug $debugname $winfile $fd $copytempfile $tempvar_bytetotal $logname] } else { # This occurs when we have outbuffering set to 'line' - as the 'input' from rdout which comes from the executable is also configured to 'line' # where b:0|1 is whether chan blocked $chan returns 0 or 1 # pend is the result of chan pending $chan # eof is the resot of chan eof $chan ##------------------------- ##If we still read with gets,to retrieve line by line for output to line-buffered output - but the input channel is configured with -buffering none ## then we can detect the difference # there is an extra blocking read - but we can stil use eof with data to detect the absent newline and avoid passing an extra one on. #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl /u/c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:1 eof:0 pend:-1 count:-1 #instate b:0 eof:1 pend:-1 count:3 #etc ##------------------------ #this should only occur if upstream is coming from stdin reader that has line buffering and hasn't handled the difference properly.. ###reading with gets from line buffered input with trailing newline #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 ###reading with gets from line buffered input with trailing newline ##No detectable difference! #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 ##------------------------- #Note that reading from -buffering none and writing straight out gives no problem because we pass the newlines through as is #set ::shellfilter::chan::lastreadblocked_nodata_noeof($rdout) 0 ;#a very specific case of readblocked prior to eof.. possibly not important #this detection is disabled for now - but left for debugging in case it means something.. or changes chan event $rdout readable [list apply {{chan other wrerr outchan errchan read_proc_out_buffering waitfor outprefix call_id debug debugname pids} { #set outchunk [chan read $chan] if {$read_proc_out_buffering eq "line"} { set countchunk [chan gets $chan outchunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved #countchunk can be -1 before eof e.g when blocked #debugging output inline with data - don't leave enabled #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" if {$countchunk >= 0} { if {![chan eof $chan]} { puts $outchan ${outprefix}$outchunk } else { puts -nonewline $outchan ${outprefix}$outchunk #if {$::shellfilter::chan::lastreadblocked_nodata_noeof($chan)} { # seems to be the usual case #} else { # #false alarm, or ? we've reached eof with data but didn't get an empty blocking read just prior # #Not known if this occurs # #debugging output inline with data - don't leave enabled # puts $outchan "!!!prev read didn't block: instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" #} } #set ::shellfilter::chan::lastreadblocked_nodata_noeof($chan) 0 } else { #set ::shellfilter::chan::lastreadblocked_nodata_noeof($chan) [expr {[chan blocked $chan] && ![chan eof $chan]}] } } else { #puts $outchan "read CHANNEL $chan [chan configure $chan]" #puts $outchan "write CHANNEL $outchan b:[chan configure $outchan -buffering] t:[chan configure $outchan -translation] e:[chan configure $outchan -encoding]" set outchunk [chan read $chan] #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:[string length $outchunk]" if {[string length $outchunk]} { #set stringrep [encoding convertfrom utf-8 $outchunk] #set newbytes [encoding convertto utf-16 $stringrep] #puts -nonewline $outchan $newbytes puts -nonewline $outchan $outchunk } } if {[chan eof $chan]} { flush $outchan ;#jmn #for now just look for first element in the pid list.. #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $pids 0] ni $subprocesses} { # puts stderr "stdout reader pid: [lindex $pids 0] no longer running" #} else { # puts stderr "stdout reader pid: [lindex $pids 0] still running" #} #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan]" chan configure $chan -blocking 1 ;#so we can get exit code try { chan close $chan set ::shellfilter::shellcommandvars($call_id,exitcode) 0 if {$debug} { ::shellfilter::log::write $debugname " -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] set ::shellfilter::shellcommandvars($call_id,exitcode) $code if {$debug} { ::shellfilter::log::write $debugname " CHILD PROCESS EXITED with code: $code" } } trap CHILDKILLED {result options} { #set code [lindex [dict get $options -errorcode] 2] #set ::shellfilter::shellcommandvars(%id%,exitcode) $code set ::shellfilter::shellcommandvars($call_id,exitcode) "childkilled" if {$debug} { ::shellfilter::log::write $debugname " CHILD PROCESS EXITED with result:'$result' options:'$options'" } } finally { #puts stdout "HERE" #flush stdout } catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stdout } } }} $rdout $rderr $wrerr $outchan $errchan $read_proc_out_buffering $waitvar $outprefix $call_id $debug $debugname $command_pids] } #todo - add ability to detect activity/data-flow and change timeout to only apply for period with zero data #e.g x hrs with no data(?) #reset timeout when data detected. after $timeout [string map [list %w% $waitvar %id% $call_id %wrerr% $wrerr %rdout% $rdout %rderr% $rderr %debug% $debug %debugname% $debugname] { if {[info exists ::shellfilter::shellcommandvars(%id%,exitcode)]} { if {[set ::shellfilter::shellcommandvars(%id%,exitcode)] ne ""} { catch { chan close %wrerr% } catch { chan close %rdout%} catch { chan close %rderr%} } else { chan configure %rdout% -blocking 1 try { chan close %rdout% set ::shellfilter::shellcommandvars(%id%,exitcode) 0 if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) CHILD PROCESS EXITED with code: $code" } set ::shellfilter::shellcommandvars(%id%,exitcode) $code } trap CHILDKILLED {result options} { set code [lindex [dict get $options -errorcode] 2] #set code [dict get $options -code] #set ::shellfilter::shellcommandvars(%id%,exitcode) $code #set ::shellfilter::shellcommandvars($call_id,exitcode) "childkilled-timeout" set ::shellfilter::shellcommandvars(%id%,exitcode) "childkilled-timeout" if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) CHILDKILLED with code: $code" ::shellfilter::log::write %debugname% "(timeout) result:$result options:$options" } } catch { chan close %wrerr% } catch { chan close %rderr%} } set %w% "timeout" } }] vwait $waitvar set exitcode [set ::shellfilter::shellcommandvars($call_id,exitcode)] if {![string is digit -strict $exitcode]} { puts stderr "Process exited with non-numeric code: $exitcode" flush stderr } if {[string length $teefile]} { #cannot be called from within an event handler above.. vwait reentrancy etc catch {::shellfilter::log::close $logname} } if {$debug} { ::shellfilter::log::write $debugname " closed by: [set $waitvar] with exitcode: $exitcode" catch {::shellfilter::log::close $debugname} } array unset ::shellfilter::shellcommandvars $call_id,* #restore buffering to pre shellfilter::run state lassign $remember_in_out_err_buffering bin bout berr chan configure $inchan -buffering $bin chan configure $outchan -buffering $bout chan configure $errchan -buffering $berr lassign $remember_in_out_err_translation tin tout terr chan configure $inchan -translation $tin chan configure $outchan -translation $tout chan configure $errchan -translation $terr #in channel probably closed..(? review - should it be?) catch { chan configure $inchan -buffering $bin } return [list exitcode $exitcode] } } package provide shellfilter [namespace eval shellfilter { variable version set version 0.1.9 }]