#copyright 2023 Julian Marcel Noble #license: BSD (revised 3-clause) # #Note shellfilter is currently only directly useful for unidirectional channels e.g stdin,stderr,stdout, or for example fifo2 where only one direction is being used. #To generalize this to bidrectional channels would require shifting around read & write methods on transform objects in a very complicated manner. #e.g each transform would probably be a generic transform container which holds sub-objects to which read & write are indirected. #This is left as a future exercise...possibly it's best left as a concept for uni-directional channels anyway # - as presumably the reads/writes from a bidirectional channel could be diverted off to unidirectional pipelines for processing with less work # (and maybe even better speed/efficiency if the data volume is asymmetrical and there is significant processing on one direction) # namespace eval shellfilter::log { variable allow_adhoc_tags 1 variable open_logs [dict create] #'tag' is an identifier for the log source. # each tag will use it's own thread to write to the configured log target proc open {tag {settingsdict {}}} { upvar ::shellfilter::sources sourcelist package require shellthread if {![dict exists $settingsdict -tag]} { dict set settingsdict -tag $tag } else { #review if {$tag ne [dict get $settingsdict -tag]} { error "shellfilter::log::open first argument tag: '$tag' does not match -tag '[dict get $settingsdict -tag]' omit -tag, or supply same value" } } if {$tag ni $sourcelist} { lappend sourcelist $tag } #note new_worker set worker_tid [shellthread::manager::new_worker $tag $settingsdict] #puts stderr "shellfilter::log::open this_threadid: [thread::id] tag: $tag worker_tid: $worker_tid" return $worker_tid } proc write {tag msg} { upvar ::shellfilter::sources sourcelist variable allow_adhoc_tags if {!$allow_adhoc_tags} { if {$tag ni $sourcelist} { error "shellfilter::log::write tag '$tag' hasn't been initialised with a call to shellfilter::log::open $tag , and allow_adhoc_tags has been set false. use shellfilter::log::require_open false to allow adhoc tags" } } shellthread::manager::write_log $tag $msg } #write_sync - synchronous processing with logging thread, slower but potentially useful for debugging/testing or forcing delay til log written proc write_sync {tag msg} { shellthread::manager::write_log $tag $msg -async 0 } proc close {tag} { #shellthread::manager::close_worker $tag shellthread::manager::unsubscribe [list $tag]; #workertid will be added back to free list if no tags remain subscribed } #review #configure whether we can call shellfilter::log::write without having called open first proc require_open {{is_open_required {}}} { variable allow_adhoc_tags if {![string length $is_open_required]} { return $allow_adhoc_tags } else { set truevalues [list y yes true 1] set falsevalues [list n no false 0] if {[string tolower $is_open_required] in $truevalues} { set allow_adhoc_tags 1 } elseif {[string tolower $is_open_required] in $falsevalues} { set allow_adhoc_tags 0 } else { error "shellfilter::log::require_open unrecognised value '$is_open_required' try one of $truevalues or $falsevalues" } } } } namespace eval shellfilter::pipe { #write channel for program. workerthread reads other end of fifo2 and writes data somewhere proc open_out {tag_pipename {settingsdict {}}} { package require shellthread #we are only using the fifo in a single direction to pipe to another thread # - so whilst wchan and rchan could theoretically each be both read & write we're only using them for one operation each if {![catch {package require Memchan}]} { lassign [fifo2] wchan rchan } else { package require tcl::chan::fifo2 lassign [tcl::chan::fifo2] wchan rchan } #default -translation for both types of fifo on windows is {auto crlf} # -encoding is as per '[encoding system]' on the platform - e.g utf-8 (e.g windows when beta-utf8 enabled) chan configure $wchan -buffering [dict get $settingsdict -buffering] ;# #application end must not be binary for our filters to operate on it #chan configure $rchan -buffering [dict get $settingsdict -buffering] -translation binary ;#works reasonably.. chan configure $rchan -buffering [dict get $settingsdict -buffering] -translation lf set worker_tid [shellthread::manager::new_pipe_worker $tag_pipename $settingsdict] #puts stderr "worker_tid: $worker_tid" #set_read_pipe does the thread::transfer of the rchan end. -buffering setting is maintained during thread transfer shellthread::manager::set_pipe_read_from_client $tag_pipename $worker_tid $rchan set pipeinfo [list localchan $wchan remotechan $rchan workertid $worker_tid direction out] return $pipeinfo } #read channel for program. workerthread writes to other end of fifo2 from whereever it's reading (stdin, file?) proc open_in {tag_pipename {settingsdict {} }} { package require shellthread package require tcl::chan::fifo2 lassign [tcl::chan::fifo2] wchan rchan set program_chan $rchan set worker_chan $wchan chan configure $worker_chan -buffering [dict get $settingsdict -buffering] chan configure $program_chan -buffering [dict get $settingsdict -buffering] chan configure $program_chan -blocking 0 chan configure $worker_chan -blocking 0 set worker_tid [shellthread::manager::new_worker $tag_pipename $settingsdict] shellthread::manager::set_pipe_write_to_client $tag_pipename $worker_tid $worker_chan set pipeinfo [list localchan $program_chan remotechan $worker_chan workertid $worker_tid direction in] puts stderr "|jn>pipe::open_in returning $pipeinfo" puts stderr "program_chan: [chan conf $program_chan]" return $pipeinfo } } namespace eval shellfilter::ansi2 { #shellfilter::ansi procs only: adapted from ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control except where otherwise marked variable test "blah\033\[1;33mETC\033\[0;mOK" namespace export + = ? #CSI m = SGR (Select Graphic Rendition) variable SGR_setting_map { bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 reverse 7 noreverse 27 defaultfg 39 defaultbg 49 overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 } variable SGR_colour_map { black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 } variable SGR_map set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] proc + {args} { #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation if {![llength $t]} { return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) } return "\x1b\[[join $t {;}]m" } proc = {args} { #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- if {[string match -nocase "256f*" $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } elseif {[string match -nocase 256b* $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } elseif {[string match -nocase rgbf* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } elseif {[string match -nocase rgbb* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } } namespace eval shellfilter::ansi { #maint warning - from overtype package #stripansi is better/more comprehensive proc stripcodes {text} { #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic dict set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. #line endings can theoretically occur within an ansi escape sequence (review e.g title?) set inputlist [split $text ""] set outputlist [list] #self-contained 2 byte ansi escape sequences - review more? set 2bytecodes_dict [dict create\ "reset_terminal" "\033c"\ "save_cursor_posn" "\u001b7"\ "restore_cursor_posn" "\u001b8"\ "cursor_up_one" "\u001bM"\ ] set 2bytecodes [dict values $2bytecodes_dict] set in_escapesequence 0 #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls set i 0 foreach u $inputlist { set v [lindex $inputlist $i+1] set uv ${u}${v} if {$in_escapesequence eq "2b"} { #2nd byte - done. set in_escapesequence 0 } elseif {$in_escapesequence != 0} { set escseq [dict get $escape_terminals $in_escapesequence] if {$u in $escseq} { set in_escapesequence 0 } elseif {$uv in $escseq} { set in_escapseequence 2b ;#flag next byte as last in sequence } } else { #handle both 7-bit and 8-bit CSI and OSC if {[regexp {^(?:\033\[|\u009b)} $uv]} { set in_escapesequence CSI } elseif {[regexp {^(?:\033\]|\u009c)} $uv]} { set in_escapesequence OSC } elseif {$uv in $2bytecodes} { #self-contained e.g terminal reset - don't pass through. set in_escapesequence 2b } else { lappend outputlist $u } } incr i } return [join $outputlist ""] } } namespace eval shellfilter::chan { set testobj ::shellfilter::chan::var if {$testobj ni [info commands $testobj]} { oo::class create var { variable o_datavar variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] set varname [dict get $settingsdict -varname] set o_datavar $varname if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 1 ;# as a var is diversionary - default it to be a jucntion } } method initialize {ch mode} { return [list initialize finalize write] } method finalize {ch} { my destroy } method watch {ch events} { # must be present but we ignore it because we do not # post any events } #method read {ch count} { # return ? #} method write {ch bytes} { set stringdata [encoding convertfrom $o_enc $bytes] append $o_datavar $stringdata return "" } method meta_is_redirection {} { return $o_is_junction } method meta_buffering_supported {} { return [list line full none] } } #todo - something similar for multiple grep specs each with own -pre & -post .. store to dict? oo::class create tee_grep_to_var { variable o_datavar variable o_lastxlines variable o_trecord variable o_grepfor variable o_prelines variable o_postlines variable o_postcountdown variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set o_lastxlines [list] set o_postcountdown 0 set defaults [dict create -pre 1 -post 1] set settingsdict [dict get $tf -settings] set settings [dict merge $defaults $settingsdict] set o_datavar [dict get $settings -varname] set o_grepfor [dict get $settings -grep] set o_prelines [dict get $settings -pre] set o_postlines [dict get $settings -post] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize finalize write] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #method read {transform_handle count} { # return ? #} method write {transform_handle bytes} { set logdata [encoding convertfrom $o_enc $bytes] set lastx $o_lastxlines lappend o_lastxlines $logdata if {$o_postcountdown > 0} { append $o_datavar $logdata if {[regexp $o_grepfor $logdata]} { #another match in postlines set o_postcountdown $o_postlines } else { incr o_postcountdown -1 } } else { if {[regexp $o_grepfor $logdata]} { append $o_datavar [join $lastx] append $o_datavar $logdata set o_postcountdown $o_postlines } } if {[llength $o_lastxlines] > $o_prelines} { set o_lastxlines [lrange $o_lastxlines 1 end] } return $bytes } method meta_is_redirection {} { return $o_is_junction } method meta_buffering_supported {} { return [list line] } } oo::class create tee_to_var { variable o_datavars variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] set varname [dict get $settingsdict -varname] set o_datavars $varname if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {ch mode} { return [list initialize finalize write] } method finalize {ch} { my destroy } method watch {ch events} { # must be present but we ignore it because we do not # post any events } #method read {ch count} { # return ? #} method write {ch bytes} { set stringdata [encoding convertfrom $o_enc $bytes] foreach v $o_datavars { append $v $stringdata } return $bytes } method meta_is_redirection {} { return $o_is_junction } } oo::class create tee_to_pipe { variable o_logsource variable o_localchan variable o_enc variable o_trecord variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {![dict exists $settingsdict -tag]} { error "tee_to_pipe constructor settingsdict missing -tag" } set o_localchan [dict get $settingsdict -pipechan] set o_logsource [dict get $settingsdict -tag] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { ::shellfilter::log::close $o_logsource my destroy } method watch {transform_handle events} { # must be present but we ignore it because we do not # post any events } method read {transform_handle bytes} { set logdata [encoding convertfrom $o_enc $bytes] #::shellfilter::log::write $o_logsource $logdata puts -nonewline $o_localchan $logdata return $bytes } method write {transform_handle bytes} { set logdata [encoding convertfrom $o_enc $bytes] #::shellfilter::log::write $o_logsource $logdata puts -nonewline $o_localchan $logdata return $bytes } #a tee is not a redirection - because data still flows along the main path method meta_is_redirection {} { return $o_is_junction } } oo::class create tee_to_log { variable o_tid variable o_logsource variable o_trecord variable o_enc variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {![dict exists $settingsdict -tag]} { error "tee_to_log constructor settingsdict missing -tag" } set o_logsource [dict get $settingsdict -tag] set o_tid [::shellfilter::log::open $o_logsource $settingsdict] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {ch mode} { return [list initialize read write finalize] } method finalize {ch} { ::shellfilter::log::close $o_logsource my destroy } method watch {ch events} { # must be present but we ignore it because we do not # post any events } method read {ch bytes} { set logdata [encoding convertfrom $o_enc $bytes] ::shellfilter::log::write $o_logsource $logdata return $bytes } method write {ch bytes} { set logdata [encoding convertfrom $o_enc $bytes] ::shellfilter::log::write $o_logsource $logdata return $bytes } method meta_is_redirection {} { return $o_is_junction } } oo::class create logonly { variable o_tid variable o_logsource variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {![dict exists $settingsdict -tag]} { error "logonly constructor settingsdict missing -tag" } set o_logsource [dict get $settingsdict -tag] set o_tid [::shellfilter::log::open $o_logsource $settingsdict] } method initialize {transform_handle mode} { return [list initialize finalize write] } method finalize {transform_handle} { ::shellfilter::log::close $o_logsource my destroy } method watch {transform_handle events} { # must be present but we ignore it because we do not # post any events } #method read {transform_handle count} { # return ? #} method write {transform_handle bytes} { set logdata [encoding convertfrom $o_enc $bytes] if 0 { if {"utf-16le" in [encoding names]} { set logdata [encoding convertfrom utf-16le $bytes] } else { set logdata [encoding convertto utf-8 $bytes] #set logdata [encoding convertfrom unicode $bytes] #set logdata $bytes } } #set logdata $bytes #set logdata [string map [list \r -r- \n -n-] $logdata] #if {[string equal [string range $logdata end-1 end] "\r\n"]} { # set logdata [string range $logdata 0 end-2] #} #::shellfilter::log::write_sync $o_logsource $logdata ::shellfilter::log::write $o_logsource $logdata #return $bytes return } method meta_is_redirection {} { return 1 } } #review - we should probably provide a more narrow filter than only strips color - and one that strips most(?) # - but does it ever really make sense to strip things like "esc(0" and "esc(B" which flip to the G0 G1 characters? (once stripped - things like box-lines become ordinary letters - unlikely to be desired?) #punk::ansi::stripansi converts at least some of the box drawing G0 chars to unicode - todo - more complete conversion #assumes line-buffering. a more advanced filter required if ansicodes can arrive split accross separate read or write operations! oo::class create ansistrip { variable o_trecord variable o_enc variable o_is_junction constructor {tf} { package require punk::ansi set o_trecord $tf set o_enc [dict get $tf -encoding] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring [punk::ansi::stripansi $instring] return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring [punk::ansi::stripansi $instring] return [encoding convertto $o_enc $outstring] } method meta_is_redirection {} { return $o_is_junction } } #a test oo::class create reconvert { variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } } oo::define reconvert { method meta_is_redirection {} { return 0 } } #this isn't a particularly nice thing to do to a stream - especially if someone isn't expecting ansi codes sprinkled through it. #It can be useful for test/debugging oo::class create ansiwrap { variable o_trecord variable o_enc variable o_colour variable o_do_colour variable o_do_normal variable o_is_junction constructor {tf} { package require punk::ansi set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {[dict exists $settingsdict -colour]} { set o_colour [dict get $settingsdict -colour] set o_do_colour [punk::ansi::a+ {*}$o_colour] set o_do_normal [punk::ansi::a] } else { set o_colour {} set o_do_colour "" set o_do_normal "" } if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } } method initialize {transform_handle mode} { return [list initialize write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring "$o_do_colour$instring$o_do_normal" #set outstring ">>>$instring" return [encoding convertto $o_enc $outstring] } method meta_is_redirection {} { return $o_is_junction } } #todo - something oo::class create rebuffer { variable o_trecord variable o_enc constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] } method initialize {transform_handle mode} { return [list initialize read write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] set outstring $instring return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define rebuffer { method meta_is_redirection {} { return 0 } } #has slight buffering/withholding of lone training cr - we can't be sure that a cr at end of chunk is part of \r\n sequence oo::class create tounix { variable o_trecord variable o_enc variable o_last_char_was_cr variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } set o_last_char_was_cr 0 } method initialize {transform_handle mode} { return [list initialize write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #don't use read method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] if {$o_last_char_was_cr} { set instring "\r$instring" } set outstring [string map [list \r\n \n] $instring] set lastchar [string range $outstring end end] if {$lastchar eq "\r"} { set o_last_char_was_cr 1 set outstring [string range $outstring 0 end-1] } else { set o_last_char_was_cr 0 } #review! can we detect eof here on the transform_handle? #if eof, we don't want to strip a trailing \r return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define tounix { method meta_is_redirection {} { return $o_is_junction } } #write to handle case where line-endings already \r\n too oo::class create towindows { variable o_trecord variable o_enc variable o_last_char_was_cr variable o_is_junction constructor {tf} { set o_trecord $tf set o_enc [dict get $tf -encoding] set settingsdict [dict get $tf -settings] if {[dict exists $tf -junction]} { set o_is_junction [dict get $tf -junction] } else { set o_is_junction 0 } set o_last_char_was_cr 0 } method initialize {transform_handle mode} { return [list initialize write finalize] } method finalize {transform_handle} { my destroy } method watch {transform_handle events} { } #don't use read method read {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] set outstring $instring return [encoding convertto $o_enc $outstring] } method write {transform_handle bytes} { set instring [encoding convertfrom $o_enc $bytes] #set outstring [string map [list \n ] $instring] if {$o_last_char_was_cr} { set instring "\r$instring" } set outstring [string map [list \r\n \uFFFF] $instring] set outstring [string map [list \n \r\n] $outstring] set outstring [string map [list \uFFFF \r\n] $outstring] set lastchar [string range $outstring end end] if {$lastchar eq "\r"} { set o_last_char_was_cr 1 set outstring [string range $outstring 0 end-1] } else { set o_last_char_was_cr 0 } #review! can we detect eof here on the transform_handle? #if eof, we don't want to strip a trailing \r return [encoding convertto $o_enc $outstring] #return [encoding convertto utf-16le $outstring] } } oo::define towindows { method meta_is_redirection {} { return $o_is_junction } } } } # ---------------------------------------------------------------------------- #review float/sink metaphor. #perhaps something with the concept of upstream and downstream? #need concepts for push towards data, sit in middle where placed, and lag at tail of data stream. ## upstream for stdin is at the bottom of the stack and for stdout is the top of the stack. #upstream,neutral-upstream,downstream,downstream-aside,downstream-replace (default neutral-upstream - require action 'stack' to use standard channel stacking concept and ignore other actions) #This is is a bit different from the float/sink metaphor which refers to the channel stacking order as opposed to the data-flow direction. #The idea would be that whether input or output # upstream additions go to the side closest to the datasource # downstream additions go furthest from the datasource # - all new additions go ahead of any diversions as the most upstream diversion is the current end of the stream in a way. # - this needs review regarding subsequent removal of the diversion and whether filters re-order in response.. # or if downstream & neutral additions are reclassified upon insertion if they land among existing upstreams(?) # neutral-upstream goes to the datasource side of the neutral-upstream list. # No 'neutral' option provided so that we avoid the need to think forwards or backwards when adding stdin vs stdout shellfilter does the necessary pop/push reordering. # No 'neutral-downstream' to reduce complexity. # downstream-replace & downstream-aside head downstream to the first diversion they encounter. ie these actions are no longer referring to the stack direction but only the dataflow direction. # # ---------------------------------------------------------------------------- # # 'filters' are transforms that don't redirect # - limited range of actions to reduce complexity. # - any requirement not fulfilled by float,sink,sink-replace,sink-sideline should be done by multiple pops and pushes # #actions can float to top of filters or sink to bottom of filters #when action is of type sink, it can optionally replace or sideline the first non-filter it encounters (highest redirection on the stack.. any lower are starved of the stream anyway) # - sideline means to temporarily replace the item and keep a record, restoring if/when we are removed from the transform stack # ##when action is of type float it can't replace or sideline anything. A float is added above any existing floats and they stay in the same order relative to each other, #but non-floats added later will sit below all floats. #(review - float/sink initially designed around output channels. For stdin the dataflow is reversed. implement float-aside etc?) # # #action: float sink sink-replace,sink-sideline # # ## note - whether stack is for input or output we maintain it in the same direction - which is in sync with the tcl chan pop chan push concept. ## namespace eval shellfilter::stack { #todo - implement as oo variable pipelines [list] proc items {} { #review - stdin,stdout,stderr act as pre-existing pipelines, and we can't create a new one with these names - so they should probably be autoconfigured and listed.. # - but in what contexts? only when we find them in [chan names]? variable pipelines return [dict keys $pipelines] } proc item {pipename} { variable pipelines return [dict get $pipelines $pipename] } proc status {{pipename *} args} { variable pipelines set pipecount [dict size $pipelines] set tableprefix "$pipecount pipelines active\n" foreach p [dict keys $pipelines] { append tableprefix " " $p \n } package require overtype #todo -verbose set table "" set ac1 [string repeat " " 15] set ac2 [string repeat " " 42] set ac3 [string repeat " " 70] append table "[overtype::left $ac1 channel-ident] " append table "[overtype::left $ac2 device-info] " append table "[overtype::left $ac3 stack-info]" append table \n set bc1 [string repeat " " 5] ;#stack id set bc2 [string repeat " " 25] ;#transform set bc3 [string repeat " " 50] ;#settings foreach k [dict keys $pipelines $pipename] { set lc [dict get $pipelines $k device localchan] if {[dict exists $k device workertid]} { set tid [dict get $pipelines $k device workertid] } else { set tid "" } set col1 [overtype::left $ac1 $k] set col2 [overtype::left $ac2 "localchan: $lc tid:$tid"] set stack [dict get $pipelines $k stack] if {![llength $stack]} { set col3 $ac3 } else { set rec [lindex $stack 0] set bcol1 [overtype::left $bc1 [dict get $rec -id]] set bcol2 [overtype::left $bc2 [namespace tail [dict get $rec -transform]]] set bcol3 [overtype::left $bc3 [dict get $rec -settings]] set stackrow "$bcol1 $bcol2 $bcol3" set col3 [overtype::left $ac3 $stackrow] } append table "$col1 $col2 $col3\n" foreach rec [lrange $stack 1 end] { set col1 $ac1 set col2 $ac2 if {[llength $rec]} { set bc1 [overtype::left $bc1 [dict get $rec -id]] set bc2 [overtype::left $bc2 [namespace tail [dict get $rec -transform]]] set bc3 [overtype::left $bc3 [dict get $rec -settings]] set stackrow "$bc1 $bc2 $bc3" set col3 [overtype::left $ac3 $stackrow] } else { set col3 $ac3 } append table "$col1 $col2 $col3\n" } } return $tableprefix$table } #used for output channels - we usually want to sink redirections below the floaters and down to topmost existing redir proc _get_stack_floaters {stack} { set floaters [list] foreach t [lreverse $stack] { switch -- [dict get $t -action] { float { lappend floaters $t } default { break } } } return [lreverse $floaters] } #for output-channel sinking proc _get_stack_top_redirection {stack} { set r 0 ;#reverse index foreach t [lreverse $stack] { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { set idx [expr {[llength $stack] - ($r + 1) }] ;#forward index return [list index $idx record $t] } incr r } #not found return [list index -1 record {}] } #exclude float-locked, locked, sink-locked proc _get_stack_top_redirection_replaceable {stack} { set r 0 ;#reverse index foreach t [lreverse $stack] { set action [dict get $t -action] if {![string match "*locked*" $action]} { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { set idx [expr {[llength $stack] - ($r + 1) }] ;#forward index return [list index $idx record $t] } } incr r } #not found return [list index -1 record {}] } #for input-channels ? proc _get_stack_bottom_redirection {stack} { set i 0 foreach t $stack { set obj [dict get $t -obj] if {[$obj meta_is_redirection]} { return [linst index $i record $t] } incr i } #not found return [list index -1 record {}] } proc get_next_counter {pipename} { variable pipelines #use dictn incr ? set counter [dict get $pipelines $pipename counter] incr counter dict set pipelines $pipename counter $counter return $counter } proc unwind {pipename} { variable pipelines set stack [dict get $pipelines $pipename stack] set localchan [dict get $pipelines $pipename device localchan] foreach tf [lreverse $stack] { chan pop $localchan } dict set pipelines $pipename [list] } #todo proc delete {pipename {wait 0}} { variable pipelines set pipeinfo [dict get $pipelines $pipename] set deviceinfo [dict get $pipeinfo device] set localchan [dict get $deviceinfo localchan] unwind $pipename #release associated thread set tid [dict get $deviceinfo workertid] if {$wait} { thread::release -wait $tid } else { thread::release $tid } #Memchan closes without error - tcl::chan::fifo2 raises something like 'can not find channel named "rc977"' - REVIEW. why? catch {chan close $localchan} } #review - proc name clarity is questionable. remove_stackitem? proc remove {pipename remove_id} { variable pipelines if {![dict exists $pipelines $pipename]} { puts stderr "WARNING: shellfilter::stack::remove pipename '$pipename' not found in pipelines dict: '$pipelines' [info level -1]" return } set stack [dict get $pipelines $pipename stack] set localchan [dict get $pipelines $pipename device localchan] set posn 0 set idposn -1 set asideposn -1 foreach t $stack { set id [dict get $t -id] if {$id eq $remove_id} { set idposn $posn break } #look into asides (only can be one for now) if {[llength [dict get $t -aside]]} { set a [dict get $t -aside] if {[dict get $a -id] eq $remove_id} { set asideposn $posn break } } incr posn } if {$asideposn > 0} { #id wasn't found directly in stack, but in an -aside. we don't need to pop anything - just clear this aside record set container [lindex $stack $asideposn] dict set container -aside {} lset stack $asideposn $container dict set pipelines $pipename stack $stack } else { if {$idposn < 0} { ::shellfilter::log::write shellfilter "ERROR shellfilter::stack::remove $pipename id '$remove_id' not found" puts stderr "|WARNING>shellfilter::stack::remove $pipename id '$remove_id' not found" return 0 } set removed_item [lindex $stack $idposn] #include idposn in poplist set poplist [lrange $stack $idposn end] set stack [lreplace $stack $idposn end] #pop all chans before adding anything back in! foreach p $poplist { chan pop $localchan } if {[llength [dict get $removed_item -aside]]} { set restore [dict get $removed_item -aside] set t [dict get $restore -transform] set tsettings [dict get $restore -settings] set obj [$t new $restore] set h [chan push $localchan $obj] dict set restore -handle $h dict set restore -obj $obj lappend stack $restore } #put popped back except for the first one, which we want to remove foreach p [lrange $poplist 1 end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] dict set p -handle $h dict set p -obj $obj lappend stack $p } dict set pipelines $pipename stack $stack } show_pipeline $pipename -note "after_remove $remove_id" return 1 } #pop a number of items of the top of the stack, add our transform record, and add back all (or the tail of poplist if pushstartindex > 0) proc insert_transform {pipename stack transformrecord poplist {pushstartindex 0}} { variable pipelines set bottom_pop_posn [expr {[llength $stack] - [llength $poplist]}] set poplist [lrange $stack $bottom_pop_posn end] set stack [lreplace $stack $bottom_pop_posn end] set localchan [dict get $pipelines $pipename device localchan] foreach p [lreverse $poplist] { chan pop $localchan } set transformname [dict get $transformrecord -transform] set transformsettings [dict get $transformrecord -settings] set obj [$transformname new $transformrecord] set h [chan push $localchan $obj] dict set transformrecord -handle $h dict set transformrecord -obj $obj dict set transformrecord -note "insert_transform" lappend stack $transformrecord foreach p [lrange $poplist $pushstartindex end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] #retain previous -id - code that added it may have kept reference and not expecting it to change dict set p -handle $h dict set p -obj $obj dict set p -note "re-added" lappend stack $p } return $stack } #fifo2 proc new {pipename args} { variable pipelines if {($pipename in [dict keys $pipelines]) || ($pipename in [chan names])} { error "shellfilter::stack::new error: pipename '$pipename' already exists" } set opts [dict merge {-settings {}} $args] set defaultsettings [dict create -raw 1 -buffering line -direction out] set targetsettings [dict merge $defaultsettings [dict get $opts -settings]] set direction [dict get $targetsettings -direction] #pipename is the source/facility-name ? if {$direction eq "out"} { set pipeinfo [shellfilter::pipe::open_out $pipename $targetsettings] } else { puts stderr "|jn> pipe::open_in $pipename $targetsettings" set pipeinfo [shellfilter::pipe::open_in $pipename $targetsettings] } #open_out/open_in will configure buffering based on targetsettings set program_chan [dict get $pipeinfo localchan] set worker_chan [dict get $pipeinfo remotechan] set workertid [dict get $pipeinfo workertid] set deviceinfo [dict create pipename $pipename localchan $program_chan remotechan $worker_chan workertid $workertid direction $direction] dict set pipelines $pipename [list counter 0 device $deviceinfo stack [list]] return $deviceinfo } #we 'add' rather than 'push' because transforms can float,sink and replace/sideline so they don't necessarily go to the top of the transform stack proc add {pipename transformname args} { variable pipelines if {($pipename ni [chan names]) && ($pipename ni [dict keys $pipelines])} { error "shellfilter::stack::add no existing chan or pipename matching '$pipename' use stdin/stderr/stdout or shellfilter::stack::new " } set args [dict merge {-action "" -settings {}} $args] set action [dict get $args -action] set transformsettings [dict get $args -settings] if {[string first "::" $transformname] < 0} { set transformname ::shellfilter::chan::$transformname } if {![llength [info commands $transformname]]} { error "shellfilter::stack::push unknown transform '$transformname'" } if {![dict exists $pipelines $pipename]} { #pipename must be in chan names - existing device/chan #record a -read and -write end even if the device is only being used as one or the other set deviceinfo [dict create pipename $pipename localchan $pipename remotechan {}] dict set pipelines $pipename [list counter 0 device $deviceinfo stack [list]] } else { set deviceinfo [dict get $pipelines $pipename device] } set id [get_next_counter $pipename] set stack [dict get $pipelines $pipename stack] set localchan [dict get $deviceinfo localchan] #we redundantly store chan in each transform - makes debugging clearer # -encoding similarly could be stored only at the pipeline level (or even queried directly each filter-read/write), # but here it may help detect unexpected changes during lifetime of the stack and avoids the chance of callers incorrectly using the transform handle?) # jn set transform_record [list -id $id -chan $pipename -encoding [chan configure $localchan -encoding] -transform $transformname -aside {} {*}$args] switch -glob -- $action { float - float-locked { set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj lappend stack $transform_record } "" - locked { set floaters [_get_stack_floaters $stack] if {![llength $floaters]} { set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj lappend stack $transform_record } else { set poplist $floaters set stack [insert_transform $pipename $stack $transform_record $poplist] } } "sink*" { set redirinfo [_get_stack_top_redirection $stack] set idx_existing_redir [dict get $redirinfo index] if {$idx_existing_redir == -1} { #no existing redirection transform on the stack #pop everything.. add this record as the first redirection on the stack set poplist $stack set stack [insert_transform $pipename $stack $transform_record $poplist] } else { switch -glob -- $action { "sink-replace" { #include that index in the poplist set poplist [lrange $stack $idx_existing_redir end] #pop all from idx_existing_redir to end, but put back 'lrange $poplist 1 end' set stack [insert_transform $pipename $stack $transform_record $poplist 1] } "sink-aside*" { set existing_redir_record [lindex $stack $idx_existing_redir] if {[string match "*locked*" [dict get $existing_redir_record -action]]} { set put_aside 0 #we can't aside this one - sit above it instead. set poplist [lrange $stack $idx_existing_redir+1 end] set stack [lrange $stack 0 $idx_existing_redir] } else { set put_aside 1 dict set transform_record -aside [lindex $stack $idx_existing_redir] set poplist [lrange $stack $idx_existing_redir end] set stack [lrange $stack 0 $idx_existing_redir-1] } foreach p $poplist { chan pop $localchan } set transformname [dict get $transform_record -transform] set transform_settings [dict get $transform_record -settings] set obj [$transformname new $transform_record] set h [chan push $localchan $obj] dict set transform_record -handle $h dict set transform_record -obj $obj dict set transform_record -note "insert_transform-with-aside" lappend stack $transform_record #add back poplist *except* the one we transferred into -aside (if we were able) foreach p [lrange $poplist $put_aside end] { set t [dict get $p -transform] set tsettings [dict get $p -settings] set obj [$t new $p] set h [chan push $localchan $obj] #retain previous -id - code that added it may have kept reference and not expecting it to change dict set p -handle $h dict set p -obj $obj dict set p -note "re-added-after-sink-aside" lappend stack $p } } default { #plain "sink" #we only sink to the topmost redirecting filter - which makes sense for an output channel #For stdin.. this is more problematic as we're more likely to want to intercept the bottom most redirection. #todo - review. Consider making default insert position for input channels to be at the source... and float/sink from there. # - we don't currently know from the stack api if adding input vs output channel - so this needs work to make intuitive. # consider splitting stack::add to stack::addinput stack::addoutput to split the different behaviour set poplist [lrange $stack $idx_existing_redir+1 end] set stack [insert_transform $pipename $stack $transform_record $poplist] } } } } default { error "shellfilter::stack::add unimplemented action '$action'" } } dict set pipelines $pipename stack $stack #puts stdout "==" #puts stdout "==>stack: $stack" #puts stdout "==" show_pipeline $pipename -note "after_add $transformname $args" return $id } proc show_pipeline {pipename args} { variable pipelines set stack [dict get $pipelines $pipename stack] set tag "SHELLFILTER::STACK" #JMN - load from config #::shellfilter::log::open $tag {-syslog 127.0.0.1:514} ::shellfilter::log::open $tag {-syslog ""} ::shellfilter::log::write $tag "transform stack for $pipename $args" foreach tf $stack { ::shellfilter::log::write $tag " $tf" } } } namespace eval shellfilter { variable sources [list] variable stacks [dict create] proc ::shellfilter::redir_channel_to_log {chan args} { variable sources set default_logsettings [dict create \ -tag redirected_$chan -syslog "" -file ""\ ] if {[dict exists $args -action]} { set action [dict get $args -action] } else { # action "sink" is a somewhat reasonable default for an output redirection transform # but it can make it harder to configure a plain ordered stack if the user is not expecting it, so we'll default to stack # also.. for stdin transform sink makes less sense.. #todo - default "stack" instead of empty string set action "" } if {[dict exists $args -settings]} { set logsettings [dict get $args -settings] } else { set logsettings {} } set logsettings [dict merge $default_logsettings $logsettings] set tag [dict get $logsettings -tag] if {$tag ni $sources} { lappend sources $tag } set id [shellfilter::stack::add $chan logonly -action $action -settings $logsettings] return $id } proc ::shellfilter::redir_output_to_log {tagprefix args} { variable sources set default_settings [list -tag ${tagprefix} -syslog "" -file ""] set opts [dict create -action "" -settings {}] set opts [dict merge $opts $args] set optsettings [dict get $opts -settings] set settings [dict merge $default_settings $optsettings] set tag [dict get $settings -tag] if {$tag ne $tagprefix} { error "shellfilter::redir_output_to_log -tag value must match supplied tagprefix:'$tagprefix'. Omit -tag, or make it the same. It will automatically be suffixed with stderr and stdout. Use redir_channel_to_log if you want to separately configure each channel" } lappend sources ${tagprefix}stdout ${tagprefix}stderr set stdoutsettings $settings dict set stdoutsettings -tag ${tagprefix}stdout set stderrsettings $settings dict set stderrsettings -tag ${tagprefix}stderr set idout [redir_channel_to_log stdout -action [dict get $opts -action] -settings $stdoutsettings] set iderr [redir_channel_to_log stderr -action [dict get $opts -action] -settings $stderrsettings] return [list $idout $iderr] } #return a dict keyed on numerical list index showing info about each element # - particularly # 'wouldbrace' to indicate that the item would get braced by Tcl when added to another list # 'head_tail_chars' to show current first and last character (in case it's wrapped e.g in double or single quotes or an existing set of braces) proc list_element_info {inputlist} { set i 0 set info [dict create] set testlist [list] foreach original_item $inputlist { #--- # avoid sharing internal rep with original items in the list (avoids shimmering of rep in original list for certain items such as paths) unset -nocomplain item append item $original_item {} #--- set iteminfo [dict create] set itemlen [string length $item] lappend testlist $item set tcl_len [string length $testlist] set diff [expr {$tcl_len - $itemlen}] if {$diff == 0} { dict set iteminfo wouldbrace 0 dict set iteminfo wouldescape 0 } else { #test for escaping vs bracing! set testlistchars [split $testlist ""] if {([lindex $testlistchars 0] eq "\{") && ([lindex $testlistchars end] eq "\}")} { dict set iteminfo wouldbrace 1 dict set iteminfo wouldescape 0 } else { dict set iteminfo wouldbrace 0 dict set iteminfo wouldescape 1 } } set testlist [list] set charlist [split $item ""] set char_a [lindex $charlist 0] set char_b [lindex $charlist 1] set char_ab ${char_a}${char_b} set char_y [lindex $charlist end-1] set char_z [lindex $charlist end] set char_yz ${char_y}${char_z} if { ("{" in $charlist) || ("}" in $charlist) } { dict set iteminfo has_braces 1 set innerchars [lrange $charlist 1 end-1] if {("{" in $innerchars) || ("}" in $innerchars)} { dict set iteminfo has_inner_braces 1 } else { dict set iteminfo has_inner_braces 0 } } else { dict set iteminfo has_braces 0 dict set iteminfo has_inner_braces 0 } #todo - brace/char counting to determine if actually 'wrapped' #e.g we could have list element {((abc)} - which appears wrapped if only looking at first and last chars. #also {(x) (y)} as a list member.. how to treat? if {$itemlen <= 1} { dict set iteminfo apparentwrap "not" } else { if {($char_a eq {"}) && ($char_z eq {"})} { dict set iteminfo apparentwrap "doublequotes" } elseif {($char_a eq "'") && ($char_z eq "'")} { dict set iteminfo apparentwrap "singlequotes" } elseif {($char_a eq "(") && ($char_z eq ")")} { dict set iteminfo apparentwrap "brackets" } elseif {($char_a eq "\{") && ($char_z eq "\}")} { dict set iteminfo apparentwrap "braces" } elseif {($char_a eq "^") && ($char_z eq "^")} { dict set iteminfo apparentwrap "carets" } elseif {($char_a eq "\[") && ($char_z eq "\]")} { dict set iteminfo apparentwrap "squarebrackets" } elseif {($char_a eq "`") && ($char_z eq "`")} { dict set iteminfo apparentwrap "backquotes" } elseif {($char_a eq "\n") && ($char_z eq "\n")} { dict set iteminfo apparentwrap "lf-newline" } elseif {($char_ab eq "\r\n") && ($char_yz eq "\r\n")} { dict set iteminfo apparentwrap "crlf-newline" } else { dict set iteminfo apparentwrap "not-determined" } } dict set iteminfo wrapbalance "unknown" ;#a hint to caller that apparentwrap is only a guide. todo - possibly make wrapbalance indicate 0 for unbalanced.. and positive numbers for outer-count of wrappings. #e.g {((x)} == 0 {((x))} == 1 {(x) (y (z))} == 2 dict set iteminfo head_tail_chars [list $char_a $char_z] set namemap [list \ \r cr\ \n lf\ {"} doublequote\ {'} singlequote\ "`" backquote\ "^" caret\ \t tab\ " " sp\ "\[" lsquare\ "\]" rsquare\ "(" lbracket\ ")" rbracket\ "\{" lbrace\ "\}" rbrace\ \\ backslash\ / forwardslash\ ] if {[string length $char_a]} { set char_a_name [string map $namemap $char_a] } else { set char_a_name "emptystring" } if {[string length $char_z]} { set char_z_name [string map $namemap $char_z] } else { set char_z_name "emptystring" } dict set iteminfo head_tail_names [list $char_a_name $char_z_name] dict set iteminfo len $itemlen dict set iteminfo difflen $diff ;#2 for braces, 1 for quoting?, or 0. dict set info $i $iteminfo incr i } return $info } #parse bracketed expression (e.g produced by vim "shellxquote=(" ) into a tcl (nested) list #e.g {(^c:/my spacey/path^ >^somewhere^)} #e.g {(blah (etc))}" #Result is always a list - even if only one toplevel set of brackets - so it may need [lindex $result 0] if input is the usual case of {( ...)} # - because it also supports the perhaps less likely case of: {( ...) unbraced (...)} etc # Note that #maintenance warning - duplication in branches for bracketed vs unbracketed! proc parse_cmd_brackets {str} { #wordwrappers currently best suited to non-bracket entities - no bracket matching within - anything goes until end-token reached. # - but.. they only take effect where a word can begin. so a[x y] may be split at the space unless it's within some other wraper e.g " a[x y]" will not break at the space # todo - consider extending the in-word handling of word_bdepth which is currently only applied to () i.e aaa(x y) is supported but aaa[x y] is not as the space breaks the word up. set wordwrappers [list \ "\"" [list "\"" "\"" "\""]\ {^} [list "\"" "\"" "^"]\ "'" [list "'" "'" "'"]\ "\{" [list "\{" "\}" "\}"]\ {[} [list {[} {]} {]}]\ ] ;#dict mapping start_character to {replacehead replacetail expectedtail} set shell_specials [list "|" "|&" "<" "<@" "<<" ">" "2>" ">&" ">>" "2>>" ">>&" ">@" "2>@" "2>@1" ">&@" "&" "&&" ] ;#words/chars that may precede an opening bracket but don't merge with the bracket to form a word. #puts "pb:$str" set in_bracket 0 set in_word 0 set word "" set result {} set word_bdepth 0 set word_bstack [list] set wordwrap "" ;#only one active at a time set bracketed_elements [dict create] foreach char [split $str ""] { #puts "c:$char bracketed:$bracketed_elements" if {$in_bracket > 0} { if {$in_word} { if {[string length $wordwrap]} { #anything goes until end-char #todo - lookahead and only treat as closing if before a space or ")" ? lassign [dict get $wordwrappers $wordwrap] _open closing endmark if {$char eq $endmark} { set wordwrap "" append word $closing dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } else { append word $char } } else { if {$word_bdepth == 0} { #can potentially close off a word - or start a new one if word-so-far is a shell-special if {$word in $shell_specials} { if {$char eq ")"} { dict lappend bracketed_elements $in_bracket $word set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } set word "" set in_word 0 } elseif {[regexp {[\s]} $char]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } elseif {$char eq "("} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 incr in_bracket } else { #at end of shell-specials is another point to look for word started by a wordwrapper char #- expect common case of things like >^/my/path^ if {$char in [dict keys $wordwrappers]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 1 ;#just for explicitness.. we're straight into the next word. set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { #something unusual.. keep going with word! append word $char } } } else { if {$char eq ")"} { dict lappend bracketed_elements $in_bracket $word set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } set word "" set in_word 0 } elseif {[regexp {[\s]} $char]} { dict lappend bracketed_elements $in_bracket $word set word "" set in_word 0 } elseif {$char eq "("} { #ordinary word up-against and opening bracket - brackets are part of word. incr word_bdepth append word "(" } else { append word $char } } } else { #currently only () are used for word_bdepth - todo add all or some wordwrappers chars so that the word_bstack can have multiple active. switch -- $char { "(" { incr word_bdepth lappend word_bstack $char append word $char } ")" { incr word_bdepth -1 set word_bstack [lrange $word_bstack 0 end-1] append word $char } default { #spaces and chars added to word as it's still in a bracketed section append word $char } } } } } else { if {$char eq "("} { incr in_bracket } elseif {$char eq ")"} { set subresult [dict get $bracketed_elements $in_bracket] dict set bracketed_elements $in_bracket [list] incr in_bracket -1 if {$in_bracket == 0} { lappend result $subresult } else { dict lappend bracketed_elements $in_bracket $subresult } } elseif {[regexp {[\s]} $char]} { # } else { #first char of word - look for word-wrappers if {$char in [dict keys $wordwrappers]} { set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { set word $char } set in_word 1 } } } else { if {$in_word} { if {[string length $wordwrap]} { lassign [dict get $wordwrappers $wordwrap] _open closing endmark if {$char eq $endmark} { set wordwrap "" append word $closing lappend result $word set word "" set in_word 0 } else { append word $char } } else { if {$word_bdepth == 0} { if {$word in $shell_specials} { if {[regexp {[\s]} $char]} { lappend result $word set word "" set in_word 0 } elseif {$char eq "("} { lappend result $word set word "" set in_word 0 incr in_bracket } else { #at end of shell-specials is another point to look for word started by a wordwrapper char #- expect common case of things like >^/my/path^ if {$char in [dict keys $wordwrappers]} { lappend result $word set word "" set in_word 1 ;#just for explicitness.. we're straight into the next word. set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { #something unusual.. keep going with word! append word $char } } } else { if {[regexp {[\s)]} $char]} { lappend result $word set word "" set in_word 0 } elseif {$char eq "("} { incr word_bdepth append word $char } else { append word $char } } } else { switch -- $char { "(" { incr word_bdepth append word $char } ")" { incr word_bdepth -1 append word $char } default { append word $char } } } } } else { if {[regexp {[\s]} $char]} { #insig whitespace(?) } elseif {$char eq "("} { incr in_bracket dict set bracketed_elements $in_bracket [list] } elseif {$char eq ")"} { error "unbalanced bracket - unable to proceed result so far: $result bracketed_elements:$bracketed_elements" } else { #first char of word - look for word-wrappers if {$char in [dict keys $wordwrappers]} { set wordwrap $char set word [lindex [dict get $wordwrappers $char] 0] ;#replace trigger char with the start value it maps to. } else { set word $char } set in_word 1 } } } #puts "----$bracketed_elements" } if {$in_bracket > 0} { error "shellfilter::parse_cmd_brackets missing close bracket. input was '$str'" } if {[dict exists $bracketed_elements 0]} { #lappend result [lindex [dict get $bracketed_elements 0] 0] lappend result [dict get $bracketed_elements 0] } if {$in_word} { lappend result $word } return $result } #only double quote if argument not quoted with single or double quotes proc dquote_if_not_quoted {a} { set wrapchars [string cat [string range $a 0 0] [string range $a end end]] switch -- $wrapchars { {""} - {''} { return $a } default { set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } } } #proc dquote_if_not_bracketed/braced? #wrap in double quotes if not double-quoted proc dquote_if_not_dquoted {a} { set wrapchars [string cat [string range $a 0 0] [string range $a end end]] switch -- $wrapchars { {""} { return $a } default { #escape any inner quotes.. set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } } } proc dquote {a} { #escape any inner quotes.. set newinner [string map [list {"} "\\\""] $a] return "\"$newinner\"" } proc get_scriptrun_from_cmdlist_dquote_if_not {cmdlist {shellcmdflag ""}} { set scr [auto_execok "script"] if {[string length $scr]} { #set scriptrun "( $c1 [lrange $cmdlist 1 end] )" set arg1 [lindex $cmdlist 0] if {[string first " " $arg1]>0} { set c1 [dquote_if_not_quoted $arg1] #set c1 "\"$arg1\"" } else { set c1 $arg1 } if {[string length $shellcmdflag]} { set scriptrun "$shellcmdflag \$($c1 " } else { set scriptrun "\$($c1 " } #set scriptrun "$c1 " foreach a [lrange $cmdlist 1 end] { #set a [string map [list "/" "//"] $a] #set a [string map [list "\"" "\\\""] $a] if {[string first " " $a] > 0} { append scriptrun [dquote_if_not_quoted $a] } else { append scriptrun $a } append scriptrun " " } set scriptrun [string trim $scriptrun] append scriptrun ")" #return [list $scr -q -e -c $scriptrun /dev/null] return [list $scr -e -c $scriptrun /dev/null] } else { return $cmdlist } } proc ::shellfilter::trun {commandlist args} { #jmn } # run a command (or tcl script) with tees applied to stdout/stderr/stdin (or whatever channels are being used) # By the point run is called - any transforms should already be in place on the channels if they're needed. # The tees will be inline with none,some or all of those transforms depending on how the stack was configured # (upstream,downstream configured via -float,-sink etc) proc ::shellfilter::run {commandlist args} { #must be a list. If it was a shell commandline string. convert it elsewhere first. variable sources set runtag "shellfilter-run" #set tid [::shellfilter::log::open $runtag [list -syslog 127.0.0.1:514]] set tid [::shellfilter::log::open $runtag [list -syslog ""]] ::shellfilter::log::write $runtag " commandlist:'$commandlist' len:[llength $commandlist]" #flush stdout #flush stderr #adding filters with sink-aside will temporarily disable the existing redirection #All stderr/stdout from the shellcommand will now tee to the underlying stderr/stdout as well as the configured syslog set defaults [dict create \ -teehandle command \ -outchan stdout \ -errchan stderr \ -inchan stdin \ -tclscript 0 \ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set outchan [dict get $opts -outchan] set errchan [dict get $opts -errchan] set inchan [dict get $opts -inchan] set teehandle [dict get $opts -teehandle] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set is_script [dict get $opts -tclscript] dict unset opts -tclscript ;#don't pass it any further # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set teehandle_out ${teehandle}out ;#default commandout set teehandle_err ${teehandle}err set teehandle_in ${teehandle}in #puts stdout "shellfilter initialising tee_to_pipe transforms for in/out/err" # sources should be added when stack::new called instead(?) foreach source [list $teehandle_out $teehandle_err] { if {$source ni $sources} { lappend sources $source } } set outdeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_out device] set outpipechan [dict get $outdeviceinfo localchan] set errdeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_err device] set errpipechan [dict get $errdeviceinfo localchan] #set indeviceinfo [dict get $::shellfilter::stack::pipelines $teehandle_in device] #set inpipechan [dict get $indeviceinfo localchan] #NOTE:These transforms are not necessarily at the top of each stack! #The float/sink mechanism, along with whether existing transforms are diversionary decides where they sit. set id_out [shellfilter::stack::add $outchan tee_to_pipe -action sink-aside -settings [list -tag $teehandle_out -pipechan $outpipechan]] set id_err [shellfilter::stack::add $errchan tee_to_pipe -action sink-aside -settings [list -tag $teehandle_err -pipechan $errpipechan]] # need to use os level channel handle for stdin - try named pipes (or even sockets) instead of fifo2 for this # If non os-level channel - the command can't be run with the redirection # stderr/stdout can be run with non-os handles in the call - # but then it does introduce issues with terminal-detection and behaviour for stdout at least # # input is also a tee - we never want to change the source at this point - just log/process a side-channel of it. # #set id_in [shellfilter::stack::add $inchan tee_to_pipe -action sink-aside -settings [list -tag commandin -pipechan $inpipechan]] #set id_out [shellfilter::stack::add stdout tee_to_log -action sink-aside -settings [list -tag shellstdout -syslog 127.0.0.1:514 -file ""]] #set id_err [shellfilter::stack::add stderr tee_to_log -action sink-aside -settings [list -tag shellstderr -syslog 127.0.0.1:514 -file "stderr.txt"]] #we need to catch errors - and ensure stack::remove calls occur. #An error can be raised if the command couldn't even launch, as opposed to a non-zero exitcode and stderr output from the command itself. # if {!$is_script} { set experiment 0 if $experiment { try { set results [exec {*}$commandlist] set exitinfo [list exitcode 0] } trap CHILDSTATUS {results options} { set exitcode [lindex [dict get $options -errorcode] 2] set exitinfo [list exitcode $exitcode] } } else { if {[catch { #run process with stdout/stderr/stdin or with configured channels #set exitinfo [shellcommand_stdout_stderr $commandlist $outchan $errchan $inpipechan {*}$opts] set exitinfo [shellcommand_stdout_stderr $commandlist $outchan $errchan stdin {*}$opts] #puts stderr "---->exitinfo $exitinfo" #subprocess result should usually have an "exitcode" key #but for background execution we will get a "pids" key of process ids. } errMsg]} { set exitinfo [list error "$errMsg" source shellcommand_stdout_stderr] } } } else { if {[catch { #script result set exitinfo [list result [uplevel #0 [list eval $commandlist]]] } errMsg]} { set exitinfo [list error "$errMsg" errorCode $::errorCode errorInfo "$::errorInfo"] } } #the previous redirections on the underlying inchan/outchan/errchan items will be restored from the -aside setting during removal #Remove execution-time Tees from stack shellfilter::stack::remove stdout $id_out shellfilter::stack::remove stderr $id_err #shellfilter::stack::remove stderr $id_in #chan configure stderr -buffering line #flush stdout ::shellfilter::log::write $runtag " return '$exitinfo'" ::shellfilter::log::close $runtag return $exitinfo } proc ::shellfilter::logtidyup { {tags {}} } { variable sources set worker_errorlist [list] set tidied_sources [list] set tidytag "logtidy" # opening a thread or writing to a log/syslog close to possible process exit is probably not a great idea. # we should ensure the thread already exists early on if we really need logging here. # #set tid [::shellfilter::log::open $tidytag {-syslog 127.0.0.1:514}] #::shellfilter::log::write $tidytag " logtidyuptags '$tags'" foreach s $sources { if {$s eq $tidytag} { continue } #puts "logtidyup source $s" set close 1 if {[llength $tags]} { if {$s ni $tags} { set close 0 } } if {$close} { lappend tidied_sources $s shellfilter::log::close $s lappend worker_errorlist {*}[shellthread::manager::get_and_clear_errors $s] } } set remaining_sources [list] foreach s $sources { if {$s ni $tidied_sources} { lappend remaining_sources $s } } #set sources [concat $remaining_sources $tidytag] set sources $remaining_sources #shellfilter::stack::unwind stdout #shellfilter::stack::unwind stderr return [list tidied $tidied_sources errors $worker_errorlist] } #package require tcl::chan::null # e.g set errchan [tcl::chan::null] # e.g chan push stdout [shellfilter::chan::var new ::some_var] proc ::shellfilter::shellcommand_stdout_stderr {commandlist outchan errchan inchan args} { set valid_flags [list \ -timeout \ -outprefix \ -errprefix \ -debug \ -copytempfile \ -outbuffering \ -errbuffering \ -inbuffering \ -readprocesstranslation \ -outtranslation \ -stdinhandler \ -outchan \ -errchan \ -inchan \ -teehandle\ ] set runtag shellfilter-run2 #JMN - load from config #set tid [::shellfilter::log::open $runtag [list -syslog "127.0.0.1:514"]] set tid [::shellfilter::log::open $runtag [list -syslog ""]] if {([llength $args] % 2) != 0} { error "Trailing arguments after any positional arguments must be in pairs of the form -argname argvalue. Valid flags are:'$valid_flags'" } set invalid_flags [list] dict for {k -} $args { switch -- $k { -timeout - -outprefix - -errprefix - -debug - -copytempfile - -outbuffering - -errbuffering - -inbuffering - -readprocesstranslation - -outtranslation - -stdinhandler - -outchan - -errchan - -inchan - -teehandle { } default { lappend invalid_flags $k } } } if {[llength $invalid_flags]} { error "Unknown option(s)'$invalid_flags': must be one of '$valid_flags'" } #line buffering generally best for output channels.. keeps relative output order of stdout/stdin closer to source order #there may be data where line buffering is inappropriate, so it's configurable per std channel #reading inputs with line buffering can result in extraneous newlines as we can't detect trailing data with no newline before eof. set defaults [dict create \ -outchan stdout \ -errchan stderr \ -inchan stdin \ -outbuffering none \ -errbuffering none \ -readprocesstranslation auto \ -outtranslation lf \ -inbuffering none \ -timeout 900000\ -outprefix ""\ -errprefix ""\ -debug 0\ -copytempfile 0\ -stdinhandler ""\ ] set args [dict merge $defaults $args] set outbuffering [dict get $args -outbuffering] set errbuffering [dict get $args -errbuffering] set inbuffering [dict get $args -inbuffering] set readprocesstranslation [dict get $args -readprocesstranslation] set outtranslation [dict get $args -outtranslation] set timeout [dict get $args -timeout] set outprefix [dict get $args -outprefix] set errprefix [dict get $args -errprefix] set debug [dict get $args -debug] set copytempfile [dict get $args -copytempfile] set stdinhandler [dict get $args -stdinhandler] set debugname "shellfilter-debug" if {$debug} { set tid [::shellfilter::log::open $debugname [list -syslog "127.0.0.1:514"]] ::shellfilter::log::write $debugname " commandlist '$commandlist'" } #'clock micros' good enough id for shellcommand calls unless one day they can somehow be called concurrently or sequentially within a microsecond and within the same interp. # a simple counter would probably work too #consider other options if an alternative to the single vwait in this function is used. set call_id [clock micros] ; set ::shellfilter::shellcommandvars($call_id,exitcode) "" set waitvar ::shellfilter::shellcommandvars($call_id,waitvar) if {$debug} { ::shellfilter::log::write $debugname " waitvar '$waitvar'" } lassign [chan pipe] rderr wrerr chan configure $wrerr -blocking 0 set custom_stderr "" set lastitem [lindex $commandlist end] #todo - ensure we can handle 2> file (space after >) #review - reconsider the handling of redirections such that tcl-style are handled totally separately to other shell syntaxes! # #note 2>@1 must ocur as last word for tcl - but 2@stdout can occur elsewhere #(2>@stdout echoes to main stdout - not into pipeline) #To properly do pipelines it looks like we will have to split on | and call this proc multiple times and wire it up accordingly (presumably in separate threads) switch -- [string trim $lastitem] { {&} { set name [lindex $commandlist 0] #background execution - stdout and stderr from child still comes here - but process is backgrounded #FIX! - this is broken for paths with backslashes for example #set pidlist [exec {*}[concat $name [lrange $commandlist 1 end]]] set pidlist [exec {*}$commandlist] return [list pids $pidlist] } {2>&1} - {2>@1} { set custom_stderr {2>@1} ;#use the tcl style set commandlist [lrange $commandlist 0 end-1] } default { # 2> filename # 2>> filename # 2>@ openfileid set redir2test [string range $lastitem 0 1] if {$redir2test eq "2>"} { set custom_stderr $lastitem set commandlist [lrange $commandlist 0 end-1] } } } set lastitem [lindex $commandlist end] set teefile "" ;#empty string, write, append #an ugly hack.. because redirections seem to arrive wrapped - review! #There be dragons here.. #Be very careful with list manipulation of the commandlist string.. backslashes cause havoc. commandlist must always be a well-formed list. generally avoid string manipulations on entire list or accidentally breaking a list element into parts if it shouldn't be.. #The problem here - is that we can't always know what was intended on the commandline regarding quoting ::shellfilter::log::write $runtag "checking for redirections in $commandlist" #sometimes we see a redirection without a following space e.g >C:/somewhere #normalize switch -regexp -- $lastitem\ {^>[/[:alpha:]]+} { set lastitem "> [string range $lastitem 1 end]" }\ {^>>[/[:alpha:]]+} { set lastitem ">> [string range $lastitem 2 end]" } #for a redirection, we assume either a 2-element list at tail of form {> {some path maybe with spaces}} #or that the tail redirection is not wrapped.. x y z > {some path maybe with spaces} #we can't use list methods such as llenth on a member of commandlist set wordlike_parts [regexp -inline -all {\S+} $lastitem] if {([llength $wordlike_parts] >= 2) && ([lindex $wordlike_parts 0] in [list ">>" ">"])} { #wrapped redirection - but maybe not 'well' wrapped (unquoted filename) set lastitem [string trim $lastitem] ;#we often see { > something} #don't use lassign or lrange on the element itself without checking first #we can treat the commandlist as a whole as a well formed list but not neccessarily each element within. #lassign $lastitem redir redirtarget #set commandlist [lrange $commandlist 0 end-1] # set itemchars [split $lastitem ""] set firstchar [lindex $itemchars 0] set lastchar [lindex $itemchars end] #NAIVE test for double quoted only! #consider for example {"a" x="b"} #testing first and last is not decisive #We need to decide what level of drilling down is even appropriate here.. #if something was double wrapped - it was perhaps deliberate so we don't interpret it as something(?) set head_tail_chars [list $firstchar $lastchar] set doublequoted [expr {[llength [lsearch -all $head_tail_chars "\""]] == 2}] if {[string equal "\{" $firstchar] && [string equal "\}" $lastchar]} { set curlyquoted 1 } else { set curlyquoted 0 } if {$curlyquoted} { #these are not the tcl protection brackets but ones supplied in the argument #it's still not valid to use list operations on a member of the commandlist set inner [string range $lastitem 1 end-1] #todo - fix! we still must assume there could be list-breaking data! set innerwords [regexp -inline -all {\S+} $inner] ;#better than [split $inner] because we don't get extra empty elements for each whitespace char set redir [lindex $innerwords 0] ;#a *potential* redir - to be tested below set redirtarget [lrange $innerwords 1 end] ;#all the rest } elseif {$doublequoted} { ::shellfilter::log::write $debugname "doublequoting at tail of command '$commandlist'" set inner [string range $lastitem 1 end-1] set innerwords [regexp -inline -all {\S+} $inner] set redir [lindex $innerwords 0] set redirtarget [lrange $innerwords 1 end] } else { set itemwords [regexp -inline -all {\S+} $lastitem] # e.g > c:\test becomes > {c:\test} # but > c/mnt/c/test/temp.txt stays as > /mnt/c/test/temp.txt set redir [lindex $itemwords 0] set redirtarget [lrange $itemwords 1 end] } set commandlist [lrange $commandlist 0 end-1] } elseif {[lindex $commandlist end-1] in [list ">>" ">"]} { #unwrapped redirection #we should be able to use list operations like lindex and lrange here as the command itself is hopefully still a well formed list set redir [lindex $commandlist end-1] set redirtarget [lindex $commandlist end] set commandlist [lrange $commandlist 0 end-2] } else { #no redirection set redir "" set redirtarget "" #no change to command list } switch -- $redir { ">>" - ">" { set redirtarget [string trim $redirtarget "\""] ::shellfilter::log::write $runtag " have redirection '$redir' to '$redirtarget'" set winfile $redirtarget ;#default assumption switch -glob -- $redirtarget { "/c/*" { set winfile "c:/[string range $redirtarget 3 end]" } "/mnt/c/*" { set winfile "c:/[string range $redirtarget 7 end]" } } if {[file exists [file dirname $winfile]]} { #containing folder for target exists if {$redir eq ">"} { set teefile "write" } else { set teefile "append" } ::shellfilter::log::write $runtag "Directory exists '[file dirname $winfile]' operation:$teefile" } else { #we should be writing to a file.. but can't ::shellfilter::log::write $runtag "cannot verify directory exists '[file dirname $winfile]'" } } default { ::shellfilter::log::write $runtag "No redir found!!" } } #often first element of command list is wrapped and cannot be run directly #e.g {{ls -l} {> {temp.tmp}}} #we will assume that if there is a single element which is a pathname containing a space - it is doubly wrapped. # this may not be true - and the command may fail if it's just {c:\program files\etc} but it is the less common case and we currently have no way to detect. #unwrap first element.. will not affect if not wrapped anyway (subject to comment above re spaces) set commandlist [concat [lindex $commandlist 0] [lrange $commandlist 1 end]] #todo? #child process environment. # - to pass a different environment to the child - we would need to save the env array, modify as required, and then restore the env array. #to restore buffering states after run set remember_in_out_err_buffering [list \ [chan configure $inchan -buffering] \ [chan configure $outchan -buffering] \ [chan configure $errchan -buffering] \ ] set remember_in_out_err_translation [list \ [chan configure $inchan -translation] \ [chan configure $outchan -translation] \ [chan configure $errchan -translation] \ ] chan configure $inchan -buffering $inbuffering -blocking 0 ;#we are setting up a readable handler for this - so non-blocking ok chan configure $errchan -buffering $errbuffering #chan configure $outchan -blocking 0 chan configure $outchan -buffering $outbuffering ;#don't configure non-blocking. weird duplicate of *second* line occurs if you do. # #-------------------------------------------- #Tested on windows. Works to stop in output when buffering is none, reading from channel with -translation auto #cmd, pwsh, tcl #chan configure $outchan -translation lf #chan configure $errchan -translation lf #-------------------------------------------- chan configure $outchan -translation $outtranslation chan configure $errchan -translation $outtranslation #puts stderr "chan configure $wrerr [chan configure $wrerr]" if {$debug} { ::shellfilter::log::write $debugname "COMMAND [list $commandlist] strlen:[string length $commandlist] llen:[llength $commandlist]" } #todo - handle custom redirection of stderr to a file? if {[string length $custom_stderr]} { #::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist $custom_stderr] a+" #set rdout [open |[concat $commandlist $custom_stderr] a+] ::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist [list $custom_stderr <@$inchan]] [list RDONLY]" set rdout [open |[concat $commandlist [list <@$inchan $custom_stderr]] [list RDONLY]] set rderr "bogus" ;#so we don't wait for it } else { ::shellfilter::log::write $runtag "LAUNCH open |[concat $commandlist [list 2>@$wrerr <@$inchan]] [list RDONLY]" #set rdout [open |[concat $commandlist [list 2>@$wrerr]] a+] #set rdout [open |[concat $commandlist [list 2>@$wrerr]] [list RDWR]] # If we don't redirect stderr to our own tcl-based channel - then the transforms don't get applied. # This is the whole reason we need these file-event loops. # Ideally we need something like exec,open in tcl that interacts with transformed channels directly and emits as it runs, not only at termination # - and that at least appears like a terminal to the called command. #set rdout [open |[concat $commandlist [list 2>@stderr <@$inchan]] [list RDONLY]] set rdout [open |[concat $commandlist [list 2>@$wrerr <@$inchan]] [list RDONLY]] chan configure $rderr -buffering $errbuffering -blocking 0 chan configure $rderr -translation $readprocesstranslation } set command_pids [pid $rdout] #puts stderr "command_pids: $command_pids" #tcl::process ensemble only available in 8.7+ - and it didn't prove useful here anyway # the child process generally won't shut down until channels are closed. # premature EOF on grandchild process launch seems to be due to lack of terminal emulation when redirecting stdin/stdout. # worked around in punk/repl using 'script' command as a fake tty. #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $command_pids 0] ni $subprocesses} { # puts stderr "pid [lindex $command_pids 0] not running $errMsg" #} else { # puts stderr "pid [lindex $command_pids 0] is running" #} if {$debug} { ::shellfilter::log::write $debugname "pipeline pids: $command_pids" } #jjj chan configure $rdout -buffering $outbuffering -blocking 0 chan configure $rdout -translation $readprocesstranslation if {![string length $custom_stderr]} { chan event $rderr readable [list apply {{chan other wrerr outchan errchan waitfor errprefix errbuffering debug debugname pids} { if {$errbuffering eq "line"} { set countchunk [chan gets $chan chunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved #errprefix only applicable to line buffered output if {$countchunk >= 0} { if {[chan eof $chan]} { puts -nonewline $errchan ${errprefix}$chunk } else { puts $errchan "${errprefix}$chunk" } } } else { set chunk [chan read $chan] if {[string length $chunk]} { puts -nonewline $errchan $chunk } } if {[chan eof $chan]} { flush $errchan ;#jmn #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $pids 0] ni $subprocesses} { # puts stderr "stderr reader: pid [lindex $pids 0] no longer running" #} else { # puts stderr "stderr reader: pid [lindex $pids 0] still running" #} chan close $chan #catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stderr } } }} $rderr $rdout $wrerr $outchan $errchan $waitvar $errprefix $errbuffering $debug $debugname $command_pids] } #todo - handle case where large amount of stdin coming in faster than rdout can handle #as is - arbitrary amount of memory could be used because we aren't using a filevent for rdout being writable # - we're just pumping it in to the non-blocking rdout buffers # ie there is no backpressure and stdin will suck in as fast as possible. # for most commandlines this probably isn't too big a deal.. but it could be a problem for multi-GB disk images etc # # ## Note - detecting trailing missing nl before eof is basically the same here as when reading rdout from executable # - but there is a slight difference in that with rdout we get an extra blocked state just prior to the final read. # Not known if that is significant ## with inchan configured -buffering line #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl -r cat #warning reading input with -buffering line. Cannot detect missing trailing-newline at eof #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl -r cat #warning reading input with -buffering line. Cannot detect missing trailing-newline at eof #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:1 pend:-1 count:3 #etc if 0 { chan event $inchan readable [list apply {{chan wrchan inbuffering waitfor} { #chan copy stdin $chan ;#doesn't work in a chan event if {$inbuffering eq "line"} { set countchunk [chan gets $chan chunk] #puts $wrchan "stdinstate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" if {$countchunk >= 0} { if {[chan eof $chan]} { puts -nonewline $wrchan $chunk } else { puts $wrchan $chunk } } } else { set chunk [chan read $chan] if {[string length $chunk]} { puts -nonewline $wrchan $chunk } } if {[chan eof $chan]} { puts stderr "|stdin_reader>eof [chan configure stdin]" chan event $chan readable {} #chan close $chan chan close $wrchan write ;#half close #set $waitfor "stdin" } }} $inchan $rdout $inbuffering $waitvar] if {[string length $stdinhandler]} { chan configure stdin -buffering line -blocking 0 chan event stdin readable $stdinhandler } } set actual_proc_out_buffering [chan configure $rdout -buffering] set actual_outchan_buffering [chan configure $outchan -buffering] #despite whatever is configured - we match our reading to how we need to output set read_proc_out_buffering $actual_outchan_buffering if {[string length $teefile]} { set logname "redir_[string map [list : _ ] $winfile]_[clock micros]" set tid [::shellfilter::log::open $logname {-syslog 127.0.0.1:514}] if {$teefile eq "write"} { ::shellfilter::log::write $logname "opening '$winfile' for write" set fd [open $winfile w] } else { ::shellfilter::log::write $logname "opening '$winfile' for appending" set fd [open $winfile a] } #chan configure $fd -translation lf chan configure $fd -translation $outtranslation chan configure $fd -encoding utf-8 set tempvar_bytetotal [namespace current]::totalbytes[clock micros] set $tempvar_bytetotal 0 chan event $rdout readable [list apply {{chan other wrerr outchan errchan read_proc_out_buffering waitfor outprefix call_id debug debugname writefile writefilefd copytempfile bytevar logtag} { #review - if we write outprefix to normal stdout.. why not to redirected file? #usefulness of outprefix is dubious upvar $bytevar totalbytes if {$read_proc_out_buffering eq "line"} { #set outchunk [chan read $chan] set countchunk [chan gets $chan outchunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved if {$countchunk >= 0} { if {![chan eof $chan]} { set numbytes [expr {[string length $outchunk] + 1}] ;#we are assuming \n not \r\n - but count won't/can't be completely accurate(?) - review puts $writefilefd $outchunk } else { set numbytes [string length $outchunk] puts -nonewline $writefilefd $outchunk } incr totalbytes $numbytes ::shellfilter::log::write $logtag "${outprefix} wrote $numbytes bytes to $writefile" #puts $outchan "${outprefix} wrote $numbytes bytes to $writefile" } } else { set outchunk [chan read $chan] if {[string length $outchunk]} { puts -nonewline $writefilefd $outchunk set numbytes [string length $outchunk] incr totalbytes $numbytes ::shellfilter::log::write $logtag "${outprefix} wrote $numbytes bytes to $writefile" } } if {[chan eof $chan]} { flush $writefilefd ;#jmn #set blocking so we can get exit code chan configure $chan -blocking 1 catch {::shellfilter::log::write $logtag "${outprefix} total bytes $totalbytes written to $writefile"} #puts $outchan "${outprefix} total bytes $totalbytes written to $writefile" catch {close $writefilefd} if {$copytempfile} { catch {file copy $writefile "[file rootname $writefile]_copy[file extension $writefile]"} } try { chan close $chan set ::shellfilter::shellcommandvars($call_id,exitcode) 0 if {$debug} { ::shellfilter::log::write $debugname "(teefile) -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] if {$debug} { ::shellfilter::log::write $debugname "(teefile) CHILD PROCESS EXITED with code: $code" } set ::shellfilter::shellcommandvars($call_id,exitcode) $code } catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stdout } } }} $rdout $rderr $wrerr $outchan $errchan $read_proc_out_buffering $waitvar $outprefix $call_id $debug $debugname $winfile $fd $copytempfile $tempvar_bytetotal $logname] } else { # This occurs when we have outbuffering set to 'line' - as the 'input' from rdout which comes from the executable is also configured to 'line' # where b:0|1 is whether chan blocked $chan returns 0 or 1 # pend is the result of chan pending $chan # eof is the resot of chan eof $chan ##------------------------- ##If we still read with gets,to retrieve line by line for output to line-buffered output - but the input channel is configured with -buffering none ## then we can detect the difference # there is an extra blocking read - but we can stil use eof with data to detect the absent newline and avoid passing an extra one on. #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl /u/c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:1 eof:0 pend:-1 count:-1 #instate b:0 eof:1 pend:-1 count:3 #etc ##------------------------ #this should only occur if upstream is coming from stdin reader that has line buffering and hasn't handled the difference properly.. ###reading with gets from line buffered input with trailing newline #c:\repo\jn\punk\test>printf "test\netc\n" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 ###reading with gets from line buffered input with trailing newline ##No detectable difference! #c:\repo\jn\punk\test>printf "test\netc" | tclsh punk.vfs/main.tcl /c cat #instate b:0 eof:0 pend:-1 count:4 #test #instate b:0 eof:0 pend:-1 count:3 #etc #instate b:0 eof:1 pend:-1 count:-1 ##------------------------- #Note that reading from -buffering none and writing straight out gives no problem because we pass the newlines through as is #set ::shellfilter::chan::lastreadblocked_nodata_noeof($rdout) 0 ;#a very specific case of readblocked prior to eof.. possibly not important #this detection is disabled for now - but left for debugging in case it means something.. or changes chan event $rdout readable [list apply {{chan other wrerr outchan errchan read_proc_out_buffering waitfor outprefix call_id debug debugname pids} { #set outchunk [chan read $chan] if {$read_proc_out_buffering eq "line"} { set countchunk [chan gets $chan outchunk] ;#only get one line so that order between stderr and stdout is more likely to be preserved #countchunk can be -1 before eof e.g when blocked #debugging output inline with data - don't leave enabled #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" if {$countchunk >= 0} { if {![chan eof $chan]} { puts $outchan ${outprefix}$outchunk } else { puts -nonewline $outchan ${outprefix}$outchunk #if {$::shellfilter::chan::lastreadblocked_nodata_noeof($chan)} { # seems to be the usual case #} else { # #false alarm, or ? we've reached eof with data but didn't get an empty blocking read just prior # #Not known if this occurs # #debugging output inline with data - don't leave enabled # puts $outchan "!!!prev read didn't block: instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:$countchunk" #} } #set ::shellfilter::chan::lastreadblocked_nodata_noeof($chan) 0 } else { #set ::shellfilter::chan::lastreadblocked_nodata_noeof($chan) [expr {[chan blocked $chan] && ![chan eof $chan]}] } } else { #puts $outchan "read CHANNEL $chan [chan configure $chan]" #puts $outchan "write CHANNEL $outchan b:[chan configure $outchan -buffering] t:[chan configure $outchan -translation] e:[chan configure $outchan -encoding]" set outchunk [chan read $chan] #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan] count:[string length $outchunk]" if {[string length $outchunk]} { #set stringrep [encoding convertfrom utf-8 $outchunk] #set newbytes [encoding convertto utf-16 $stringrep] #puts -nonewline $outchan $newbytes puts -nonewline $outchan $outchunk } } if {[chan eof $chan]} { flush $outchan ;#jmn #for now just look for first element in the pid list.. #set subprocesses [tcl::process::list] #puts stderr "subprocesses: $subprocesses" #if {[lindex $pids 0] ni $subprocesses} { # puts stderr "stdout reader pid: [lindex $pids 0] no longer running" #} else { # puts stderr "stdout reader pid: [lindex $pids 0] still running" #} #puts $outchan "instate b:[chan blocked $chan] eof:[chan eof $chan] pend:[chan pending output $chan]" chan configure $chan -blocking 1 ;#so we can get exit code try { chan close $chan set ::shellfilter::shellcommandvars($call_id,exitcode) 0 if {$debug} { ::shellfilter::log::write $debugname " -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] set ::shellfilter::shellcommandvars($call_id,exitcode) $code if {$debug} { ::shellfilter::log::write $debugname " CHILD PROCESS EXITED with code: $code" } } trap CHILDKILLED {result options} { #set code [lindex [dict get $options -errorcode] 2] #set ::shellfilter::shellcommandvars(%id%,exitcode) $code set ::shellfilter::shellcommandvars($call_id,exitcode) "childkilled" if {$debug} { ::shellfilter::log::write $debugname " CHILD PROCESS EXITED with result:'$result' options:'$options'" } } finally { #puts stdout "HERE" #flush stdout } catch {chan close $wrerr} if {$other ni [chan names]} { set $waitfor stdout } } }} $rdout $rderr $wrerr $outchan $errchan $read_proc_out_buffering $waitvar $outprefix $call_id $debug $debugname $command_pids] } #todo - add ability to detect activity/data-flow and change timeout to only apply for period with zero data #e.g x hrs with no data(?) #reset timeout when data detected. after $timeout [string map [list %w% $waitvar %id% $call_id %wrerr% $wrerr %rdout% $rdout %rderr% $rderr %debug% $debug %debugname% $debugname] { if {[info exists ::shellfilter::shellcommandvars(%id%,exitcode)]} { if {[set ::shellfilter::shellcommandvars(%id%,exitcode)] ne ""} { catch { chan close %wrerr% } catch { chan close %rdout%} catch { chan close %rderr%} } else { chan configure %rdout% -blocking 1 try { chan close %rdout% set ::shellfilter::shellcommandvars(%id%,exitcode) 0 if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) -- child process returned no error. (exit code 0) --" } } trap CHILDSTATUS {result options} { set code [lindex [dict get $options -errorcode] 2] if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) CHILD PROCESS EXITED with code: $code" } set ::shellfilter::shellcommandvars(%id%,exitcode) $code } trap CHILDKILLED {result options} { set code [lindex [dict get $options -errorcode] 2] #set code [dict get $options -code] #set ::shellfilter::shellcommandvars(%id%,exitcode) $code #set ::shellfilter::shellcommandvars($call_id,exitcode) "childkilled-timeout" set ::shellfilter::shellcommandvars(%id%,exitcode) "childkilled-timeout" if {%debug%} { ::shellfilter::log::write %debugname% "(timeout) CHILDKILLED with code: $code" ::shellfilter::log::write %debugname% "(timeout) result:$result options:$options" } } catch { chan close %wrerr% } catch { chan close %rderr%} } set %w% "timeout" } }] vwait $waitvar set exitcode [set ::shellfilter::shellcommandvars($call_id,exitcode)] if {![string is digit -strict $exitcode]} { puts stderr "Process exited with non-numeric code: $exitcode" flush stderr } if {[string length $teefile]} { #cannot be called from within an event handler above.. vwait reentrancy etc catch {::shellfilter::log::close $logname} } if {$debug} { ::shellfilter::log::write $debugname " closed by: [set $waitvar] with exitcode: $exitcode" catch {::shellfilter::log::close $debugname} } array unset ::shellfilter::shellcommandvars $call_id,* #restore buffering to pre shellfilter::run state lassign $remember_in_out_err_buffering bin bout berr chan configure $inchan -buffering $bin chan configure $outchan -buffering $bout chan configure $errchan -buffering $berr lassign $remember_in_out_err_translation tin tout terr chan configure $inchan -translation $tin chan configure $outchan -translation $tout chan configure $errchan -translation $terr #in channel probably closed..(? review - should it be?) catch { chan configure $inchan -buffering $bin } return [list exitcode $exitcode] } } package provide shellfilter [namespace eval shellfilter { variable version set version 0.1.9 }]