#! todo? for now, emit a clue as to what's happening.
puts stderr "[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload cannot trigger existing handler $existing_handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
if {$::repl::running} {
if {[eof $input]} {
puts stdout "restarting repl"
repl::reopen_stdin
}
}
}
}
#Note - we still may be in_repl_handler here (which disables its own reader while executing commandlines)
#The input_chunks_waiting may really belong to the existing_handler we found - but if it doesn't consume them they will end up being read by the repl_handler when it eventually re-enables.
#todo - some better structure than just a list of waiting chunks indexed by input channel, so repl/other handlers can determine the context in which these waiting chunks were generated?
} elseif {[llength $::repl::in_repl_handler]} {
} elseif {$::repl::running} {
if {[llength $input_chunks_waiting($input)]} {
#don't trigger the repl handler manually - we will inevitably get things out of order - as it knows when to enable/disable itself based on whether chunks are waiting.
#triggering it by putting it on the eventloop will potentially result in re-entrancy
#The cooperating reader must be designed to consume waiting chunks and only reschedule it's channel read handler once all waiting chunks have been consumed.
#puts stderr "[punk::ansi::a+ green bold]--> repl_handler has chunks to consume [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
variable in_sb ;#whether we are in subnegotiation parameters and waiting for SE e.g for STATUS we may get multiple or evan all other codes as a report of the other side's perception of the option states.
#specifically named 'waiting' argument as last argument for cooperative input reading with other punk channel handlers (repl in particular)
#waiting data will be supplied to this handler if the other handler over-read (e.g repl handling ANSI ESC \[6n response on stdin finding other data before the ANSI response.)
set chunksize 4096 ;#No choice of chunksize can avoid the possibility of splitting a token such as a Telnet protocol command or an ANSI sequence.
#in theory, a split ANSI sequence won't cause a problem - except if we have debug on which could emit a request on stdout (e.g get_cursor_pos)
#as a byte oriented supposedly ascii-by-default protocol - we shouldn't expect to get utf-8 without having negotiated it - but it looks suspiciously like this is the sort of thing that happens (2024) review? Examples? mapscii.me 1984.ws? Test.
#randomly chosen chunk boundaries - whether due to size or a combination of network speed and event scheduling can mean we get some utf8 characters split too.
set data $fromserver_unprocessed
set fromserver_unprocessed ""
append data [read $sock $chunksize]
#repeatedly appending when not fblocked - will somewhat reduce the risk of splitting both ANSI and TELNET commands - but at the cost of starving the output processing
#somewhat conveniently? - the IAC \xFF byte is not valid in utf-8 or ascii
#this whole mechanism may need to be reviewed/modified if/when Telnet binary mode and/or charset changing is implemented/understood by the author.
#The current basic system is tested on the few available public telnet servers. - todo - test on some old industrial equipment, read more RFCs.
#for now we'll use punk::lib::get_utf8_leading as a hack way to determine if we should throw some trailing data aside for next loop to process?
#while {![fblocked $sock] && ![eof $sock]} {
# add_debug "[a+ red bold]RE-READ[a]\n" stdin $sock
#! todo? for now, emit a clue as to what's happening.
puts stderr "[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload cannot trigger existing handler $existing_handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
if {$::repl::running} {
if {[eof $input]} {
puts stdout "restarting repl"
repl::reopen_stdin
}
}
}
}
#Note - we still may be in_repl_handler here (which disables its own reader while executing commandlines)
#The input_chunks_waiting may really belong to the existing_handler we found - but if it doesn't consume them they will end up being read by the repl_handler when it eventually re-enables.
#todo - some better structure than just a list of waiting chunks indexed by input channel, so repl/other handlers can determine the context in which these waiting chunks were generated?
} elseif {[llength $::repl::in_repl_handler]} {
} elseif {$::repl::running} {
if {[llength $input_chunks_waiting($input)]} {
#don't trigger the repl handler manually - we will inevitably get things out of order - as it knows when to enable/disable itself based on whether chunks are waiting.
#triggering it by putting it on the eventloop will potentially result in re-entrancy
#The cooperating reader must be designed to consume waiting chunks and only reschedule it's channel read handler once all waiting chunks have been consumed.
#puts stderr "[punk::ansi::a+ green bold]--> repl_handler has chunks to consume [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5)
)
$
} $1234bytes
}
proc get_utf8_leading {rawbytes} {
#*** !doctools
#[call [fun get_utf8_leading] [arg rawbytes]]
#[para] return the leading portion of rawbytes that is a valid utf8 sequence.
#[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint
#[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character.
#[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all.
#[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics
#[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned
#[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes
if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-)
#todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache
#
set fossil_prog [auto_execok fossil]
if {![string length $fossil_prog]} {
puts stderr "The fossil program was not found. A fossil executable is required to use most deck features."