[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5)
)
$
} $1234bytes
}
proc get_utf8_leading {rawbytes} {
#*** !doctools
#[call [fun get_utf8_leading] [arg rawbytes]]
#[para] return the leading portion of rawbytes that is a valid utf8 sequence.
#[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint
#[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character.
#[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all.
#[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics
#[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned
#[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes
if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-)
#todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache
#
set fossil_prog [auto_execok fossil]
if {![string length $fossil_prog]} {
puts stderr "The fossil program was not found. A fossil executable is required to use most deck features."
#define our positive responses here for those that we will do
variable respond_will_do
set respond_will_do [list]
lappend respond_will_do 0 ;#binary
#lappend respond_will_do 0 ;#binary
lappend respond_will_do 1 ;#echo
lappend respond_will_do 3 ;#suppress go-ahead
lappend respond_will_do 5 ;#status - by agreeing to this we should be able to read unsolicited "IAC SB STATUS IS ... IAC SE" reports and compare to our perception of state. (and do something if mismatches?)
#passively enabled client features - requests for our own behaviours we will respond positively
variable respond_do_will
set respond_do_will [list]
lappend respond_do_will 0 ;#binary
#lappend respond_do_will 0 ;#binary
lappend respond_do_will 3 ;#Suppress go-ahead
lappend respond_do_will 5 ;#status - by agreeing to this - we need to handle the subnegotiation "IAC SB STATUS SEND IAC SE" and respond with "IAC SB STATUS IS ... IAC SE"
#puts -nonewline stdout [punk::ansi::cursor_on] ;#Enabling cursor should take account of whether it was enabled before - we don't have that info currently! review
#as a byte oriented supposedly ascii-by-default protocol - we shouldn't expect to get utf-8 without having negotiated it - but it looks suspiciously like this is the sort of thing that happens (2024) review? Examples? mapscii.me 1984.ws? Test.
#randomly chosen chunk boundaries - whether due to size or a combination of network speed and event scheduling can mean we get some utf8 characters split too.
set ansisplits [punk::ansi::ta::split_codes_single $prefix]
set last_pt [lindex $ansisplits end] ;#last part is supposed to be plaintext - if it looks like it contains a partial ansi - throw it to fromserver_unprocessed for next fromServer call
if {[string first "\x1b" $last_pt] >= 0} {
set complete [join [lrange $ansisplits 0 end-1] ""]