Julian Noble
11 months ago
32 changed files with 2676 additions and 149 deletions
@ -0,0 +1,146 @@
|
||||
[comment {--- punk::docgen generated from inline doctools comments ---}] |
||||
[comment {--- punk::docgen DO NOT EDIT DOCS HERE UNLESS YOU REMOVE THESE COMMENT LINES ---}] |
||||
[comment {--- punk::docgen overwrites this file ---}] |
||||
[manpage_begin punkshell_module_punk::fileline 0 0.1.0] |
||||
[copyright "2024"] |
||||
[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] |
||||
[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] |
||||
[require punk::fileline] |
||||
[keywords module text parse file] |
||||
[description] |
||||
[para] - |
||||
[section Overview] |
||||
[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||
[para]This is important for certain text files where examining the number of chars/bytes is important |
||||
[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. |
||||
[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. |
||||
[subsection Concepts] |
||||
[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. |
||||
[example_begin] |
||||
package require punk::fileline |
||||
package require fileutil |
||||
set rawdata [lb]fileutil::cat data.txt -translation binary[rb] |
||||
punk::fileline::class::textinfo create obj_data $rawdata |
||||
puts stdout [lb]obj_data linecount[rb] |
||||
[example_end] |
||||
[subsection Notes] |
||||
[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. |
||||
[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. |
||||
[para]No support for lone carriage-returns being interpreted as line-endings. |
||||
[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. |
||||
[subsection dependencies] |
||||
[para] packages used by punk::fileline |
||||
[list_begin itemized] |
||||
[item] [package {Tcl 8.6}] |
||||
[list_end] |
||||
[section API] |
||||
[subsection {Namespace punk::fileline::class}] |
||||
[para] class definitions |
||||
[list_begin enumerated] |
||||
[enum] CLASS [class textinfo] |
||||
[list_begin definitions] |
||||
[para] [emph METHODS] |
||||
[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] |
||||
[para] Constructor for textinfo object which represents a chunk or all of a file |
||||
[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: |
||||
[example_begin] |
||||
fconfigure $fd -translation binary |
||||
set chunkdata [lb]read $fd[rb]] |
||||
or |
||||
set chunkdata [lb]fileutil::cat <filename> -translation binary[rb] |
||||
[example_end] |
||||
[para] when loading the data |
||||
[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] |
||||
[para]Return a range of bytes from the underlying raw chunk data. |
||||
[para] e.g The following retrieves the entire chunk |
||||
[para] objName chunk 0 end |
||||
[call class::textinfo [method chunklen]] |
||||
[para] Number of bytes/characters in the raw data of the file |
||||
[call class::textinfo [method linecount]] |
||||
[para] Number of lines in the raw data of the file, counted as per the policy in effect |
||||
[call class::textinfo [method regenerate_lines]] |
||||
[para]generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||
[call class::textinfo [method line] [arg lineindex]] |
||||
[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||
[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||
[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||
[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending |
||||
[call class::textinfo [method linepayload] [arg lineindex]] |
||||
[para]Return the text of the line indicated by the zero-based lineindex |
||||
[para]The line-ending is not returned in the data - but is still stored against this lineindex |
||||
[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method |
||||
[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used |
||||
[para]To retrieve an entire line including line-ending use the [method line] method. |
||||
[call class::textinfo [method linemeta] [arg lineindex]] |
||||
[para]Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||
[para]Keys returned include |
||||
[list_begin itemized] |
||||
[item] le |
||||
[para] A string representing the type of line-ending: crlf|lf|none |
||||
[item] linelen |
||||
[para] The number of characters/bytes in the whole line including line-ending if any |
||||
[item] payloadlen |
||||
[para] The number of character/bytes in the line excluding line-ending |
||||
[item] start |
||||
[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||
[item] end |
||||
[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||
[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||
[list_end] |
||||
[call class::textinfo [method lineinfo] [arg lineindex]] |
||||
[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||
[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. |
||||
[para]The 'payload' value is the same as is returned from the [method linepayload] method. |
||||
[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] |
||||
[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] |
||||
[call class::textinfo [method lines] [arg startidx] [arg endidx]] |
||||
[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] |
||||
[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] |
||||
[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] |
||||
[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||
[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. |
||||
[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||
[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. |
||||
[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] |
||||
[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||
[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices |
||||
[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] |
||||
[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||
[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] |
||||
[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||
[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||
[para]startidx higher than endidx is allowed |
||||
[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||
[list_end] |
||||
[list_end] [comment {--- end class enumeration ---}] |
||||
[subsection {Namespace punk::fileline}] |
||||
[para] Core API functions for punk::fileline |
||||
[list_begin definitions] |
||||
[list_end] [comment {--- end definitions namespace punk::fileline ---}] |
||||
[subsection {Namespace punk::fileline::lib}] |
||||
[para] Secondary functions that are part of the API |
||||
[list_begin definitions] |
||||
[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] |
||||
[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. |
||||
[list_begin arguments] |
||||
[arg_def integer start] |
||||
[para] zero-based start index of range |
||||
[arg_def integer end] |
||||
[para] zero-based end index of range |
||||
[arg_def integer chunksize] |
||||
[para] Number of bytes/characters in chunk |
||||
[list_end] |
||||
[para]returns a dict with the keys is_span and boundaries |
||||
[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||
[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||
[para]e.g |
||||
[example_begin] |
||||
range_spans_chunk_boundaries 10 1750 512 |
||||
is_span 1 boundaries {512 1024 1536} |
||||
[example_end] |
||||
[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 |
||||
[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] |
||||
[section Internal] |
||||
[subsection {Namespace punk::fileline::system}] |
||||
[para] Internal functions that are not part of the API |
||||
[manpage_end] |
@ -0,0 +1,559 @@
|
||||
'\" |
||||
'\" Generated from file '_module_fileline-0\&.1\&.0\&.tm\&.man' by tcllib/doctools with format 'nroff' |
||||
'\" Copyright (c) 2024 |
||||
'\" |
||||
.TH "punkshell_module_punk::fileline" 0 0\&.1\&.0 doc "punk fileline" |
||||
.\" The -*- nroff -*- definitions below are for supplemental macros used |
||||
.\" in Tcl/Tk manual entries. |
||||
.\" |
||||
.\" .AP type name in/out ?indent? |
||||
.\" Start paragraph describing an argument to a library procedure. |
||||
.\" type is type of argument (int, etc.), in/out is either "in", "out", |
||||
.\" or "in/out" to describe whether procedure reads or modifies arg, |
||||
.\" and indent is equivalent to second arg of .IP (shouldn't ever be |
||||
.\" needed; use .AS below instead) |
||||
.\" |
||||
.\" .AS ?type? ?name? |
||||
.\" Give maximum sizes of arguments for setting tab stops. Type and |
||||
.\" name are examples of largest possible arguments that will be passed |
||||
.\" to .AP later. If args are omitted, default tab stops are used. |
||||
.\" |
||||
.\" .BS |
||||
.\" Start box enclosure. From here until next .BE, everything will be |
||||
.\" enclosed in one large box. |
||||
.\" |
||||
.\" .BE |
||||
.\" End of box enclosure. |
||||
.\" |
||||
.\" .CS |
||||
.\" Begin code excerpt. |
||||
.\" |
||||
.\" .CE |
||||
.\" End code excerpt. |
||||
.\" |
||||
.\" .VS ?version? ?br? |
||||
.\" Begin vertical sidebar, for use in marking newly-changed parts |
||||
.\" of man pages. The first argument is ignored and used for recording |
||||
.\" the version when the .VS was added, so that the sidebars can be |
||||
.\" found and removed when they reach a certain age. If another argument |
||||
.\" is present, then a line break is forced before starting the sidebar. |
||||
.\" |
||||
.\" .VE |
||||
.\" End of vertical sidebar. |
||||
.\" |
||||
.\" .DS |
||||
.\" Begin an indented unfilled display. |
||||
.\" |
||||
.\" .DE |
||||
.\" End of indented unfilled display. |
||||
.\" |
||||
.\" .SO ?manpage? |
||||
.\" Start of list of standard options for a Tk widget. The manpage |
||||
.\" argument defines where to look up the standard options; if |
||||
.\" omitted, defaults to "options". The options follow on successive |
||||
.\" lines, in three columns separated by tabs. |
||||
.\" |
||||
.\" .SE |
||||
.\" End of list of standard options for a Tk widget. |
||||
.\" |
||||
.\" .OP cmdName dbName dbClass |
||||
.\" Start of description of a specific option. cmdName gives the |
||||
.\" option's name as specified in the class command, dbName gives |
||||
.\" the option's name in the option database, and dbClass gives |
||||
.\" the option's class in the option database. |
||||
.\" |
||||
.\" .UL arg1 arg2 |
||||
.\" Print arg1 underlined, then print arg2 normally. |
||||
.\" |
||||
.\" .QW arg1 ?arg2? |
||||
.\" Print arg1 in quotes, then arg2 normally (for trailing punctuation). |
||||
.\" |
||||
.\" .PQ arg1 ?arg2? |
||||
.\" Print an open parenthesis, arg1 in quotes, then arg2 normally |
||||
.\" (for trailing punctuation) and then a closing parenthesis. |
||||
.\" |
||||
.\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. |
||||
.if t .wh -1.3i ^B |
||||
.nr ^l \n(.l |
||||
.ad b |
||||
.\" # Start an argument description |
||||
.de AP |
||||
.ie !"\\$4"" .TP \\$4 |
||||
.el \{\ |
||||
. ie !"\\$2"" .TP \\n()Cu |
||||
. el .TP 15 |
||||
.\} |
||||
.ta \\n()Au \\n()Bu |
||||
.ie !"\\$3"" \{\ |
||||
\&\\$1 \\fI\\$2\\fP (\\$3) |
||||
.\".b |
||||
.\} |
||||
.el \{\ |
||||
.br |
||||
.ie !"\\$2"" \{\ |
||||
\&\\$1 \\fI\\$2\\fP |
||||
.\} |
||||
.el \{\ |
||||
\&\\fI\\$1\\fP |
||||
.\} |
||||
.\} |
||||
.. |
||||
.\" # define tabbing values for .AP |
||||
.de AS |
||||
.nr )A 10n |
||||
.if !"\\$1"" .nr )A \\w'\\$1'u+3n |
||||
.nr )B \\n()Au+15n |
||||
.\" |
||||
.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n |
||||
.nr )C \\n()Bu+\\w'(in/out)'u+2n |
||||
.. |
||||
.AS Tcl_Interp Tcl_CreateInterp in/out |
||||
.\" # BS - start boxed text |
||||
.\" # ^y = starting y location |
||||
.\" # ^b = 1 |
||||
.de BS |
||||
.br |
||||
.mk ^y |
||||
.nr ^b 1u |
||||
.if n .nf |
||||
.if n .ti 0 |
||||
.if n \l'\\n(.lu\(ul' |
||||
.if n .fi |
||||
.. |
||||
.\" # BE - end boxed text (draw box now) |
||||
.de BE |
||||
.nf |
||||
.ti 0 |
||||
.mk ^t |
||||
.ie n \l'\\n(^lu\(ul' |
||||
.el \{\ |
||||
.\" Draw four-sided box normally, but don't draw top of |
||||
.\" box if the box started on an earlier page. |
||||
.ie !\\n(^b-1 \{\ |
||||
\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
||||
.\} |
||||
.el \}\ |
||||
\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
||||
.\} |
||||
.\} |
||||
.fi |
||||
.br |
||||
.nr ^b 0 |
||||
.. |
||||
.\" # VS - start vertical sidebar |
||||
.\" # ^Y = starting y location |
||||
.\" # ^v = 1 (for troff; for nroff this doesn't matter) |
||||
.de VS |
||||
.if !"\\$2"" .br |
||||
.mk ^Y |
||||
.ie n 'mc \s12\(br\s0 |
||||
.el .nr ^v 1u |
||||
.. |
||||
.\" # VE - end of vertical sidebar |
||||
.de VE |
||||
.ie n 'mc |
||||
.el \{\ |
||||
.ev 2 |
||||
.nf |
||||
.ti 0 |
||||
.mk ^t |
||||
\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' |
||||
.sp -1 |
||||
.fi |
||||
.ev |
||||
.\} |
||||
.nr ^v 0 |
||||
.. |
||||
.\" # Special macro to handle page bottom: finish off current |
||||
.\" # box/sidebar if in box/sidebar mode, then invoked standard |
||||
.\" # page bottom macro. |
||||
.de ^B |
||||
.ev 2 |
||||
'ti 0 |
||||
'nf |
||||
.mk ^t |
||||
.if \\n(^b \{\ |
||||
.\" Draw three-sided box if this is the box's first page, |
||||
.\" draw two sides but no top otherwise. |
||||
.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
||||
.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
||||
.\} |
||||
.if \\n(^v \{\ |
||||
.nr ^x \\n(^tu+1v-\\n(^Yu |
||||
\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c |
||||
.\} |
||||
.bp |
||||
'fi |
||||
.ev |
||||
.if \\n(^b \{\ |
||||
.mk ^y |
||||
.nr ^b 2 |
||||
.\} |
||||
.if \\n(^v \{\ |
||||
.mk ^Y |
||||
.\} |
||||
.. |
||||
.\" # DS - begin display |
||||
.de DS |
||||
.RS |
||||
.nf |
||||
.sp |
||||
.. |
||||
.\" # DE - end display |
||||
.de DE |
||||
.fi |
||||
.RE |
||||
.sp |
||||
.. |
||||
.\" # SO - start of list of standard options |
||||
.de SO |
||||
'ie '\\$1'' .ds So \\fBoptions\\fR |
||||
'el .ds So \\fB\\$1\\fR |
||||
.SH "STANDARD OPTIONS" |
||||
.LP |
||||
.nf |
||||
.ta 5.5c 11c |
||||
.ft B |
||||
.. |
||||
.\" # SE - end of list of standard options |
||||
.de SE |
||||
.fi |
||||
.ft R |
||||
.LP |
||||
See the \\*(So manual entry for details on the standard options. |
||||
.. |
||||
.\" # OP - start of full description for a single option |
||||
.de OP |
||||
.LP |
||||
.nf |
||||
.ta 4c |
||||
Command-Line Name: \\fB\\$1\\fR |
||||
Database Name: \\fB\\$2\\fR |
||||
Database Class: \\fB\\$3\\fR |
||||
.fi |
||||
.IP |
||||
.. |
||||
.\" # CS - begin code excerpt |
||||
.de CS |
||||
.RS |
||||
.nf |
||||
.ta .25i .5i .75i 1i |
||||
.. |
||||
.\" # CE - end code excerpt |
||||
.de CE |
||||
.fi |
||||
.RE |
||||
.. |
||||
.\" # UL - underline word |
||||
.de UL |
||||
\\$1\l'|0\(ul'\\$2 |
||||
.. |
||||
.\" # QW - apply quotation marks to word |
||||
.de QW |
||||
.ie '\\*(lq'"' ``\\$1''\\$2 |
||||
.\"" fix emacs highlighting |
||||
.el \\*(lq\\$1\\*(rq\\$2 |
||||
.. |
||||
.\" # PQ - apply parens and quotation marks to word |
||||
.de PQ |
||||
.ie '\\*(lq'"' (``\\$1''\\$2)\\$3 |
||||
.\"" fix emacs highlighting |
||||
.el (\\*(lq\\$1\\*(rq\\$2)\\$3 |
||||
.. |
||||
.\" # QR - quoted range |
||||
.de QR |
||||
.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3 |
||||
.\"" fix emacs highlighting |
||||
.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3 |
||||
.. |
||||
.\" # MT - "empty" string |
||||
.de MT |
||||
.QW "" |
||||
.. |
||||
.BS |
||||
.SH NAME |
||||
punkshell_module_punk::fileline \- file line-handling utilities |
||||
.SH SYNOPSIS |
||||
package require \fBpunk::fileline \fR |
||||
.sp |
||||
class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? |
||||
.sp |
||||
class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR |
||||
.sp |
||||
class::textinfo \fBchunklen\fR |
||||
.sp |
||||
class::textinfo \fBlinecount\fR |
||||
.sp |
||||
class::textinfo \fBregenerate_lines\fR |
||||
.sp |
||||
class::textinfo \fBline\fR \fIlineindex\fR |
||||
.sp |
||||
class::textinfo \fBlinepayload\fR \fIlineindex\fR |
||||
.sp |
||||
class::textinfo \fBlinemeta\fR \fIlineindex\fR |
||||
.sp |
||||
class::textinfo \fBlineinfo\fR \fIlineindex\fR |
||||
.sp |
||||
class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR |
||||
.sp |
||||
class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? |
||||
.sp |
||||
class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR |
||||
.sp |
||||
\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR |
||||
.sp |
||||
.BE |
||||
.SH DESCRIPTION |
||||
.PP |
||||
- |
||||
.SH OVERVIEW |
||||
.PP |
||||
Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||
.PP |
||||
This is important for certain text files where examining the number of chars/bytes is important |
||||
.PP |
||||
For example - windows \&.cmd/\&.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved\&. |
||||
.PP |
||||
Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem\&. It is for operating on text-file like data\&. |
||||
.SS CONCEPTS |
||||
.PP |
||||
A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation\&. |
||||
.CS |
||||
|
||||
|
||||
package require punk::fileline |
||||
package require fileutil |
||||
set rawdata [fileutil::cat data\&.txt -translation binary] |
||||
punk::fileline::class::textinfo create obj_data $rawdata |
||||
puts stdout [obj_data linecount] |
||||
|
||||
.CE |
||||
.SS NOTES |
||||
.PP |
||||
Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files\&. |
||||
.PP |
||||
This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired\&. |
||||
.PP |
||||
No support for lone carriage-returns being interpreted as line-endings\&. |
||||
.PP |
||||
CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module\&. |
||||
.SS DEPENDENCIES |
||||
.PP |
||||
packages used by punk::fileline |
||||
.IP \(bu |
||||
\fBTcl 8\&.6\fR |
||||
.PP |
||||
.SH API |
||||
.SS "NAMESPACE PUNK::FILELINE::CLASS" |
||||
.PP |
||||
class definitions |
||||
.IP [1] |
||||
CLASS \fBtextinfo\fR |
||||
.RS |
||||
.sp |
||||
\fIMETHODS\fR |
||||
.TP |
||||
class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? |
||||
.sp |
||||
Constructor for textinfo object which represents a chunk or all of a file |
||||
.sp |
||||
datachunk should be passed with the file data including line-endings as-is for full functionality\&. ie use something like: |
||||
.CS |
||||
|
||||
|
||||
fconfigure $fd -translation binary |
||||
set chunkdata [read $fd]] |
||||
or |
||||
set chunkdata [fileutil::cat <filename> -translation binary] |
||||
|
||||
.CE |
||||
.sp |
||||
when loading the data |
||||
.TP |
||||
class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR |
||||
.sp |
||||
Return a range of bytes from the underlying raw chunk data\&. |
||||
.sp |
||||
e\&.g The following retrieves the entire chunk |
||||
.sp |
||||
objName chunk 0 end |
||||
.TP |
||||
class::textinfo \fBchunklen\fR |
||||
.sp |
||||
Number of bytes/characters in the raw data of the file |
||||
.TP |
||||
class::textinfo \fBlinecount\fR |
||||
.sp |
||||
Number of lines in the raw data of the file, counted as per the policy in effect |
||||
.TP |
||||
class::textinfo \fBregenerate_lines\fR |
||||
.sp |
||||
generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||
.TP |
||||
class::textinfo \fBline\fR \fIlineindex\fR |
||||
.sp |
||||
Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||
.sp |
||||
A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||
.sp |
||||
Whilst such data may not conform to definitions (e\&.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||
.sp |
||||
To return just the data which might more commonly be needed for dealing with lines, use the \fBlinepayload\fR method - which returns the line data minus line-ending |
||||
.TP |
||||
class::textinfo \fBlinepayload\fR \fIlineindex\fR |
||||
.sp |
||||
Return the text of the line indicated by the zero-based lineindex |
||||
.sp |
||||
The line-ending is not returned in the data - but is still stored against this lineindex |
||||
.sp |
||||
Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the \fBlinemeta\fR method |
||||
.sp |
||||
To retrieve both the line text and metadata in a single call the \fBlineinfo\fR method can be used |
||||
.sp |
||||
To retrieve an entire line including line-ending use the \fBline\fR method\&. |
||||
.TP |
||||
class::textinfo \fBlinemeta\fR \fIlineindex\fR |
||||
.sp |
||||
Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||
.sp |
||||
Keys returned include |
||||
.RS |
||||
.IP \(bu |
||||
le |
||||
.sp |
||||
A string representing the type of line-ending: crlf|lf|none |
||||
.IP \(bu |
||||
linelen |
||||
.sp |
||||
The number of characters/bytes in the whole line including line-ending if any |
||||
.IP \(bu |
||||
payloadlen |
||||
.sp |
||||
The number of character/bytes in the line excluding line-ending |
||||
.IP \(bu |
||||
start |
||||
.sp |
||||
The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||
.IP \(bu |
||||
end |
||||
.sp |
||||
The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||
.sp |
||||
This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||
.RE |
||||
.TP |
||||
class::textinfo \fBlineinfo\fR \fIlineindex\fR |
||||
.sp |
||||
Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||
.sp |
||||
This returns the same info as the \fBlinemeta\fR with an added key of 'payload' which is the text of the line without line-ending\&. |
||||
.sp |
||||
The 'payload' value is the same as is returned from the \fBlinepayload\fR method\&. |
||||
.TP |
||||
class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||
.TP |
||||
class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR |
||||
.TP |
||||
class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR |
||||
.TP |
||||
class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR |
||||
.TP |
||||
class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR |
||||
.TP |
||||
class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? |
||||
.sp |
||||
Return a list of dicts each with structure like the result of the \fBlineinfo\fR method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||
.sp |
||||
The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list\&. |
||||
.sp |
||||
truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||
.sp |
||||
Note that this truncation info is only in the return value of this method - and will not be reflected in \fBlineinfo\fR queries to the main chunk\&. |
||||
.TP |
||||
class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||
.sp |
||||
This is used internally by API functions such as \fBline\fR to enable it to accept more expressive indices |
||||
.TP |
||||
class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||
.sp |
||||
A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||
.TP |
||||
class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR |
||||
.sp |
||||
A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||
.sp |
||||
Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||
.sp |
||||
startidx higher than endidx is allowed |
||||
.sp |
||||
Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||
.RE |
||||
.PP |
||||
.SS "NAMESPACE PUNK::FILELINE" |
||||
.PP |
||||
Core API functions for punk::fileline |
||||
.PP |
||||
.SS "NAMESPACE PUNK::FILELINE::LIB" |
||||
.PP |
||||
Secondary functions that are part of the API |
||||
.TP |
||||
\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR |
||||
.sp |
||||
Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range\&. |
||||
.RS |
||||
.TP |
||||
integer \fIstart\fR |
||||
.sp |
||||
zero-based start index of range |
||||
.TP |
||||
integer \fIend\fR |
||||
.sp |
||||
zero-based end index of range |
||||
.TP |
||||
integer \fIchunksize\fR |
||||
.sp |
||||
Number of bytes/characters in chunk |
||||
.RE |
||||
.sp |
||||
returns a dict with the keys is_span and boundaries |
||||
.sp |
||||
is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||
.sp |
||||
boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||
.sp |
||||
e\&.g |
||||
.CS |
||||
|
||||
|
||||
range_spans_chunk_boundaries 10 1750 512 |
||||
is_span 1 boundaries {512 1024 1536} |
||||
|
||||
.CE |
||||
.sp |
||||
This function automatically uses lseq (if Tcl >= 8\&.7) when number of boundaries spanned is approximately greater than 75 |
||||
.PP |
||||
.SH INTERNAL |
||||
.SS "NAMESPACE PUNK::FILELINE::SYSTEM" |
||||
.PP |
||||
Internal functions that are not part of the API |
||||
.SH KEYWORDS |
||||
file, module, parse, text |
||||
.SH COPYRIGHT |
||||
.nf |
||||
Copyright (c) 2024 |
||||
|
||||
.fi |
@ -1 +1 @@
|
||||
{shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}}} {{changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {shell doc/files/project_intro.md punkshell__project_intro} . {punk doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/main.md punkshell} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} |
||||
{file {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}}} {{shell doc/files/project_changes.md punkshell__project_changes} . {changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {punk doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_intro.md punkshell__project_intro} . {parse doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {punk doc/files/main.md punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {file doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} |
@ -1 +1 @@
|
||||
doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} |
||||
doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} |
@ -1 +1 @@
|
||||
kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} punk {index.md punk} module {index.md module} plugin {index.md plugin} |
||||
kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md kw,parse {index.md parse} sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md kw,file {index.md file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} file {index.md file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} kw,text {index.md text} parse {index.md parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md text {index.md text} sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} module {index.md module} punk {index.md punk} plugin {index.md plugin} |
@ -0,0 +1,353 @@
|
||||
|
||||
[//000000001]: # (punkshell\_module\_punk::fileline \- punk fileline) |
||||
[//000000002]: # (Generated from file '\_module\_fileline\-0\.1\.0\.tm\.man' by tcllib/doctools with format 'markdown') |
||||
[//000000003]: # (Copyright © 2024) |
||||
[//000000004]: # (punkshell\_module\_punk::fileline\(0\) 0\.1\.0 doc "punk fileline") |
||||
|
||||
<hr> [ <a href="../../../toc.md">Main Table Of Contents</a> | <a |
||||
href="../../toc.md">Table Of Contents</a> | <a |
||||
href="../../../index.md">Keyword Index</a> ] <hr> |
||||
|
||||
# NAME |
||||
|
||||
punkshell\_module\_punk::fileline \- file line\-handling utilities |
||||
|
||||
# <a name='toc'></a>Table Of Contents |
||||
|
||||
- [Table Of Contents](#toc) |
||||
|
||||
- [Synopsis](#synopsis) |
||||
|
||||
- [Description](#section1) |
||||
|
||||
- [Overview](#section2) |
||||
|
||||
- [Concepts](#subsection1) |
||||
|
||||
- [Notes](#subsection2) |
||||
|
||||
- [dependencies](#subsection3) |
||||
|
||||
- [API](#section3) |
||||
|
||||
- [Namespace punk::fileline::class](#subsection4) |
||||
|
||||
- [Namespace punk::fileline](#subsection5) |
||||
|
||||
- [Namespace punk::fileline::lib](#subsection6) |
||||
|
||||
- [Internal](#section4) |
||||
|
||||
- [Namespace punk::fileline::system](#subsection7) |
||||
|
||||
- [Keywords](#keywords) |
||||
|
||||
- [Copyright](#copyright) |
||||
|
||||
# <a name='synopsis'></a>SYNOPSIS |
||||
|
||||
package require punk::fileline |
||||
|
||||
[class::textinfo __constructor__ *datachunk* ?option value\.\.\.?](#1) |
||||
[class::textinfo __chunk__ *chunkstart* *chunkend*](#2) |
||||
[class::textinfo __chunklen__](#3) |
||||
[class::textinfo __linecount__](#4) |
||||
[class::textinfo __regenerate\_lines__](#5) |
||||
[class::textinfo __line__ *lineindex*](#6) |
||||
[class::textinfo __linepayload__ *lineindex*](#7) |
||||
[class::textinfo __linemeta__ *lineindex*](#8) |
||||
[class::textinfo __lineinfo__ *lineindex*](#9) |
||||
[class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx*](#10) |
||||
[class::textinfo __linerange\_to\_chunk__ *startidx* *endidx*](#11) |
||||
[class::textinfo __lines__ *startidx* *endidx*](#12) |
||||
[class::textinfo __linepayloads__ *startidx* *endidx*](#13) |
||||
[class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend*](#14) |
||||
[class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.?](#15) |
||||
[class::textinfo __numeric\_linerange__ *startidx* *endidx*](#16) |
||||
[class::textinfo __numeric\_chunkrange__ *startidx* *endidx*](#17) |
||||
[class::textinfo __normalize\_indices__ *startidx* *endidx* *max*](#18) |
||||
[__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize*](#19) |
||||
|
||||
# <a name='description'></a>DESCRIPTION |
||||
|
||||
\- |
||||
|
||||
# <a name='section2'></a>Overview |
||||
|
||||
Utilities for in\-memory analysis of text file data as both line data and |
||||
byte/char\-counted data whilst preserving the line\-endings \(even if mixed\) |
||||
|
||||
This is important for certain text files where examining the number of |
||||
chars/bytes is important |
||||
|
||||
For example \- windows \.cmd/\.bat files need some byte counting to determine if |
||||
labels lie on chunk boundaries and need to be moved\. |
||||
|
||||
Despite including the word 'file', the library doesn't deal with reading/writing |
||||
to the filesystem\. It is for operating on text\-file like data\. |
||||
|
||||
## <a name='subsection1'></a>Concepts |
||||
|
||||
A chunk of textfile data \(possibly representing a whole file \- but usually at |
||||
least a complete set of lines\) is loaded into a punk::fileline::class::textinfo |
||||
instance at object creation\. |
||||
|
||||
package require punk::fileline |
||||
package require fileutil |
||||
set rawdata [fileutil::cat data.txt -translation binary] |
||||
punk::fileline::class::textinfo create obj_data $rawdata |
||||
puts stdout [obj_data linecount] |
||||
|
||||
## <a name='subsection2'></a>Notes |
||||
|
||||
Line records are referred to by a zero\-based index instead of a one\-based index |
||||
as is commonly used when displaying files\. |
||||
|
||||
This is for programming consistency and convenience, and the module user should |
||||
do their own conversion to one\-based indexing for line display or messaging if |
||||
desired\. |
||||
|
||||
No support for lone carriage\-returns being interpreted as line\-endings\. |
||||
|
||||
CR line\-endings that are intended to be interpreted as such should be mapped to |
||||
something else before the data is supplied to this module\. |
||||
|
||||
## <a name='subsection3'></a>dependencies |
||||
|
||||
packages used by punk::fileline |
||||
|
||||
- __Tcl 8\.6__ |
||||
|
||||
# <a name='section3'></a>API |
||||
|
||||
## <a name='subsection4'></a>Namespace punk::fileline::class |
||||
|
||||
class definitions |
||||
|
||||
1. CLASS __textinfo__ |
||||
|
||||
- <a name='1'></a>class::textinfo __constructor__ *datachunk* ?option value\.\.\.? |
||||
|
||||
*METHODS* |
||||
|
||||
Constructor for textinfo object which represents a chunk or all of a |
||||
file |
||||
|
||||
datachunk should be passed with the file data including line\-endings |
||||
as\-is for full functionality\. ie use something like: |
||||
|
||||
fconfigure $fd -translation binary |
||||
set chunkdata [read $fd]] |
||||
or |
||||
set chunkdata [fileutil::cat <filename> -translation binary] |
||||
|
||||
when loading the data |
||||
|
||||
- <a name='2'></a>class::textinfo __chunk__ *chunkstart* *chunkend* |
||||
|
||||
Return a range of bytes from the underlying raw chunk data\. |
||||
|
||||
e\.g The following retrieves the entire chunk |
||||
|
||||
objName chunk 0 end |
||||
|
||||
- <a name='3'></a>class::textinfo __chunklen__ |
||||
|
||||
Number of bytes/characters in the raw data of the file |
||||
|
||||
- <a name='4'></a>class::textinfo __linecount__ |
||||
|
||||
Number of lines in the raw data of the file, counted as per the policy |
||||
in effect |
||||
|
||||
- <a name='5'></a>class::textinfo __regenerate\_lines__ |
||||
|
||||
generate a list of lines from the stored raw data chunk and keep a map |
||||
of line\-endings indexed by lineindex |
||||
|
||||
- <a name='6'></a>class::textinfo __line__ *lineindex* |
||||
|
||||
Reconstructs and returns the raw line using the payload and per\-line |
||||
stored line\-ending metadata |
||||
|
||||
A 'line' may be returned without a line\-ending if the unerlying chunk |
||||
had trailing data without a line\-ending \(or the chunk was loaded under |
||||
a non\-standard \-policy setting\) |
||||
|
||||
Whilst such data may not conform to definitions \(e\.g POSIX\) of the |
||||
terms 'textfile' and 'line' \- it is useful here to represent it as a |
||||
line with metadata le set to "none" |
||||
|
||||
To return just the data which might more commonly be needed for dealing |
||||
with lines, use the __linepayload__ method \- which returns the line |
||||
data minus line\-ending |
||||
|
||||
- <a name='7'></a>class::textinfo __linepayload__ *lineindex* |
||||
|
||||
Return the text of the line indicated by the zero\-based lineindex |
||||
|
||||
The line\-ending is not returned in the data \- but is still stored |
||||
against this lineindex |
||||
|
||||
Line Metadata such as the line\-ending for a particular line and the |
||||
byte/character range it occupies within the chunk can be retrieved with |
||||
the __linemeta__ method |
||||
|
||||
To retrieve both the line text and metadata in a single call the |
||||
__lineinfo__ method can be used |
||||
|
||||
To retrieve an entire line including line\-ending use the __line__ |
||||
method\. |
||||
|
||||
- <a name='8'></a>class::textinfo __linemeta__ *lineindex* |
||||
|
||||
Return a dict of the metadata for the line indicated by the zero\-based |
||||
lineindex |
||||
|
||||
Keys returned include |
||||
|
||||
* le |
||||
|
||||
A string representing the type of line\-ending: crlf|lf|none |
||||
|
||||
* linelen |
||||
|
||||
The number of characters/bytes in the whole line including |
||||
line\-ending if any |
||||
|
||||
* payloadlen |
||||
|
||||
The number of character/bytes in the line excluding line\-ending |
||||
|
||||
* start |
||||
|
||||
The zero\-based index into the associated raw file data indicating |
||||
at which byte/character index this line begins |
||||
|
||||
* end |
||||
|
||||
The zero\-based index into the associated raw file data indicating |
||||
at which byte/character index this line ends |
||||
|
||||
This end\-point corresponds to the last character of the line\-ending |
||||
if any \- not necessarily the last character of the line's payload |
||||
|
||||
- <a name='9'></a>class::textinfo __lineinfo__ *lineindex* |
||||
|
||||
Return a dict of the metadata and text for the line indicated by the |
||||
zero\-based lineindex |
||||
|
||||
This returns the same info as the __linemeta__ with an added key of |
||||
'payload' which is the text of the line without line\-ending\. |
||||
|
||||
The 'payload' value is the same as is returned from the |
||||
__linepayload__ method\. |
||||
|
||||
- <a name='10'></a>class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx* |
||||
|
||||
- <a name='11'></a>class::textinfo __linerange\_to\_chunk__ *startidx* *endidx* |
||||
|
||||
- <a name='12'></a>class::textinfo __lines__ *startidx* *endidx* |
||||
|
||||
- <a name='13'></a>class::textinfo __linepayloads__ *startidx* *endidx* |
||||
|
||||
- <a name='14'></a>class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend* |
||||
|
||||
- <a name='15'></a>class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.? |
||||
|
||||
Return a list of dicts each with structure like the result of the |
||||
__lineinfo__ method \- but possibly with extra keys for truncation |
||||
information if \-show\_truncated 1 is supplied |
||||
|
||||
The truncation key in a lineinfo dict may be returned for first and/or |
||||
last line in the resulting list\. |
||||
|
||||
truncation shows the shortened \(missing bytes on left and/or right |
||||
side\) part of the entire line \(potentially including line\-ending or |
||||
even partial line\-ending\) |
||||
|
||||
Note that this truncation info is only in the return value of this |
||||
method \- and will not be reflected in __lineinfo__ queries to the |
||||
main chunk\. |
||||
|
||||
- <a name='16'></a>class::textinfo __numeric\_linerange__ *startidx* *endidx* |
||||
|
||||
A helper to return any Tcl\-style end end\-x values given to startidx or |
||||
endidx; converted to their specific values based on the current state |
||||
of the underlying line data |
||||
|
||||
This is used internally by API functions such as __line__ to enable |
||||
it to accept more expressive indices |
||||
|
||||
- <a name='17'></a>class::textinfo __numeric\_chunkrange__ *startidx* *endidx* |
||||
|
||||
A helper to return any Tcl\-style end end\-x entries supplied to startidx |
||||
or endidx; converted to their specific values based on the current |
||||
state of the underlying chunk data |
||||
|
||||
- <a name='18'></a>class::textinfo __normalize\_indices__ *startidx* *endidx* *max* |
||||
|
||||
A utility to convert some of the of Tcl\-style list\-index expressions |
||||
such as end, end\-1 etc to valid indices in the range 0 to the supplied |
||||
max |
||||
|
||||
Basic addition and subtraction expressions such as 4\-1 5\+2 are accepted |
||||
|
||||
startidx higher than endidx is allowed |
||||
|
||||
Unlike Tcl's index expressions \- we raise an error if the calculated |
||||
index is out of bounds 0 to max |
||||
|
||||
## <a name='subsection5'></a>Namespace punk::fileline |
||||
|
||||
Core API functions for punk::fileline |
||||
|
||||
## <a name='subsection6'></a>Namespace punk::fileline::lib |
||||
|
||||
- <a name='19'></a>__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize* |
||||
|
||||
Takes start and end offset, generally representing bytes or character |
||||
indices, and computes a list of boundaries at multiples of the chunksize |
||||
that are spanned by the start and end range\. |
||||
|
||||
* integer *start* |
||||
|
||||
zero\-based start index of range |
||||
|
||||
* integer *end* |
||||
|
||||
zero\-based end index of range |
||||
|
||||
* integer *chunksize* |
||||
|
||||
Number of bytes/characters in chunk |
||||
|
||||
returns a dict with the keys is\_span and boundaries |
||||
|
||||
is\_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||
|
||||
boundaries contains a list of the spanned boundaries \- which are always |
||||
multiples of the chunksize |
||||
|
||||
e\.g |
||||
|
||||
range_spans_chunk_boundaries 10 1750 512 |
||||
is_span 1 boundaries {512 1024 1536} |
||||
|
||||
This function automatically uses lseq \(if Tcl >= 8\.7\) when number of |
||||
boundaries spanned is approximately greater than 75 |
||||
|
||||
# <a name='section4'></a>Internal |
||||
|
||||
## <a name='subsection7'></a>Namespace punk::fileline::system |
||||
|
||||
Internal functions that are not part of the API |
||||
|
||||
# <a name='keywords'></a>KEYWORDS |
||||
|
||||
[file](\.\./\.\./\.\./index\.md\#file), [module](\.\./\.\./\.\./index\.md\#module), |
||||
[parse](\.\./\.\./\.\./index\.md\#parse), [text](\.\./\.\./\.\./index\.md\#text) |
||||
|
||||
# <a name='copyright'></a>COPYRIGHT |
||||
|
||||
Copyright © 2024 |
@ -1 +1 @@
|
||||
{shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}}} {{repl doc/files/main.html punkshell} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {punk doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/main.html punkshell} . {repl doc/files/project_changes.html punkshell__project_changes} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} |
||||
{file {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}}} {{repl doc/files/main.html punkshell} . {file doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {shell doc/files/project_changes.html punkshell__project_changes} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/project_changes.html punkshell__project_changes} . {parse doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/main.html punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_changes.html punkshell__project_changes} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} |
@ -1 +1 @@
|
||||
doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} |
||||
doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} |
@ -1 +1 @@
|
||||
kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} punk {index.html punk} module {index.html module} plugin {index.html plugin} |
||||
kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html kw,parse {index.html parse} sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html kw,file {index.html file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} file {index.html file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} kw,text {index.html text} parse {index.html parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html text {index.html text} sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} module {index.html module} punk {index.html punk} plugin {index.html plugin} |
@ -0,0 +1,326 @@
|
||||
<!DOCTYPE html><html><head> |
||||
<title>punkshell_module_punk::fileline - punk fileline</title> |
||||
<style type="text/css"><!-- |
||||
HTML { |
||||
background: #FFFFFF; |
||||
color: black; |
||||
} |
||||
BODY { |
||||
background: #FFFFFF; |
||||
color: black; |
||||
} |
||||
DIV.doctools { |
||||
margin-left: 10%; |
||||
margin-right: 10%; |
||||
} |
||||
DIV.doctools H1,DIV.doctools H2 { |
||||
margin-left: -5%; |
||||
} |
||||
H1, H2, H3, H4 { |
||||
margin-top: 1em; |
||||
font-family: sans-serif; |
||||
font-size: large; |
||||
color: #005A9C; |
||||
background: transparent; |
||||
text-align: left; |
||||
} |
||||
H1.doctools_title { |
||||
text-align: center; |
||||
} |
||||
UL,OL { |
||||
margin-right: 0em; |
||||
margin-top: 3pt; |
||||
margin-bottom: 3pt; |
||||
} |
||||
UL LI { |
||||
list-style: disc; |
||||
} |
||||
OL LI { |
||||
list-style: decimal; |
||||
} |
||||
DT { |
||||
padding-top: 1ex; |
||||
} |
||||
UL.doctools_toc,UL.doctools_toc UL, UL.doctools_toc UL UL { |
||||
font: normal 12pt/14pt sans-serif; |
||||
list-style: none; |
||||
} |
||||
LI.doctools_section, LI.doctools_subsection { |
||||
list-style: none; |
||||
margin-left: 0em; |
||||
text-indent: 0em; |
||||
padding: 0em; |
||||
} |
||||
PRE { |
||||
display: block; |
||||
font-family: monospace; |
||||
white-space: pre; |
||||
margin: 0%; |
||||
padding-top: 0.5ex; |
||||
padding-bottom: 0.5ex; |
||||
padding-left: 1ex; |
||||
padding-right: 1ex; |
||||
width: 100%; |
||||
} |
||||
PRE.doctools_example { |
||||
color: black; |
||||
background: #f5dcb3; |
||||
border: 1px solid black; |
||||
} |
||||
UL.doctools_requirements LI, UL.doctools_syntax LI { |
||||
list-style: none; |
||||
margin-left: 0em; |
||||
text-indent: 0em; |
||||
padding: 0em; |
||||
} |
||||
DIV.doctools_synopsis { |
||||
color: black; |
||||
background: #80ffff; |
||||
border: 1px solid black; |
||||
font-family: serif; |
||||
margin-top: 1em; |
||||
margin-bottom: 1em; |
||||
} |
||||
UL.doctools_syntax { |
||||
margin-top: 1em; |
||||
border-top: 1px solid black; |
||||
} |
||||
UL.doctools_requirements { |
||||
margin-bottom: 1em; |
||||
border-bottom: 1px solid black; |
||||
} |
||||
--></style> |
||||
</head> |
||||
<!-- Generated from file '_module_fileline-0.1.0.tm.man' by tcllib/doctools with format 'html' |
||||
--> |
||||
<!-- Copyright &copy; 2024 |
||||
--> |
||||
<!-- punkshell_module_punk::fileline.0 |
||||
--> |
||||
<body><hr> [ |
||||
<a href="../../../toc.html">Main Table Of Contents</a> |
||||
| <a href="../../toc.html">Table Of Contents</a> |
||||
| <a href="../../../index.html">Keyword Index</a> |
||||
] <hr> |
||||
<div class="doctools"> |
||||
<h1 class="doctools_title">punkshell_module_punk::fileline(0) 0.1.0 doc "punk fileline"</h1> |
||||
<div id="name" class="doctools_section"><h2><a name="name">Name</a></h2> |
||||
<p>punkshell_module_punk::fileline - file line-handling utilities</p> |
||||
</div> |
||||
<div id="toc" class="doctools_section"><h2><a name="toc">Table Of Contents</a></h2> |
||||
<ul class="doctools_toc"> |
||||
<li class="doctools_section"><a href="#toc">Table Of Contents</a></li> |
||||
<li class="doctools_section"><a href="#synopsis">Synopsis</a></li> |
||||
<li class="doctools_section"><a href="#section1">Description</a></li> |
||||
<li class="doctools_section"><a href="#section2">Overview</a> |
||||
<ul> |
||||
<li class="doctools_subsection"><a href="#subsection1">Concepts</a></li> |
||||
<li class="doctools_subsection"><a href="#subsection2">Notes</a></li> |
||||
<li class="doctools_subsection"><a href="#subsection3">dependencies</a></li> |
||||
</ul> |
||||
</li> |
||||
<li class="doctools_section"><a href="#section3">API</a> |
||||
<ul> |
||||
<li class="doctools_subsection"><a href="#subsection4">Namespace punk::fileline::class</a></li> |
||||
<li class="doctools_subsection"><a href="#subsection5">Namespace punk::fileline</a></li> |
||||
<li class="doctools_subsection"><a href="#subsection6">Namespace punk::fileline::lib</a></li> |
||||
</ul> |
||||
</li> |
||||
<li class="doctools_section"><a href="#section4">Internal</a> |
||||
<ul> |
||||
<li class="doctools_subsection"><a href="#subsection7">Namespace punk::fileline::system</a></li> |
||||
</ul> |
||||
</li> |
||||
<li class="doctools_section"><a href="#keywords">Keywords</a></li> |
||||
<li class="doctools_section"><a href="#copyright">Copyright</a></li> |
||||
</ul> |
||||
</div> |
||||
<div id="synopsis" class="doctools_section"><h2><a name="synopsis">Synopsis</a></h2> |
||||
<div class="doctools_synopsis"> |
||||
<ul class="doctools_requirements"> |
||||
<li>package require <b class="pkgname">punk::fileline</b></li> |
||||
</ul> |
||||
<ul class="doctools_syntax"> |
||||
<li><a href="#1">class::textinfo <b class="method">constructor</b> <i class="arg">datachunk</i> <span class="opt">?option value...?</span></a></li> |
||||
<li><a href="#2">class::textinfo <b class="method">chunk</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></li> |
||||
<li><a href="#3">class::textinfo <b class="method">chunklen</b></a></li> |
||||
<li><a href="#4">class::textinfo <b class="method">linecount</b></a></li> |
||||
<li><a href="#5">class::textinfo <b class="method">regenerate_lines</b></a></li> |
||||
<li><a href="#6">class::textinfo <b class="method">line</b> <i class="arg">lineindex</i></a></li> |
||||
<li><a href="#7">class::textinfo <b class="method">linepayload</b> <i class="arg">lineindex</i></a></li> |
||||
<li><a href="#8">class::textinfo <b class="method">linemeta</b> <i class="arg">lineindex</i></a></li> |
||||
<li><a href="#9">class::textinfo <b class="method">lineinfo</b> <i class="arg">lineindex</i></a></li> |
||||
<li><a href="#10">class::textinfo <b class="method">linerange_to_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#11">class::textinfo <b class="method">linerange_to_chunk</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#12">class::textinfo <b class="method">lines</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#13">class::textinfo <b class="method">linepayloads</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#14">class::textinfo <b class="method">chunkrange_to_linerange</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></li> |
||||
<li><a href="#15">class::textinfo <b class="method">chunkrange_to_lineinfolist</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i> <span class="opt">?option value...?</span></a></li> |
||||
<li><a href="#16">class::textinfo <b class="method">numeric_linerange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#17">class::textinfo <b class="method">numeric_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||
<li><a href="#18">class::textinfo <b class="method">normalize_indices</b> <i class="arg">startidx</i> <i class="arg">endidx</i> <i class="arg">max</i></a></li> |
||||
<li><a href="#19"><b class="function">lib::range_spans_chunk_boundaries</b> <i class="arg">start</i> <i class="arg">end</i> <i class="arg">chunksize</i></a></li> |
||||
</ul> |
||||
</div> |
||||
</div> |
||||
<div id="section1" class="doctools_section"><h2><a name="section1">Description</a></h2> |
||||
<p>-</p> |
||||
</div> |
||||
<div id="section2" class="doctools_section"><h2><a name="section2">Overview</a></h2> |
||||
<p>Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed)</p> |
||||
<p>This is important for certain text files where examining the number of chars/bytes is important</p> |
||||
<p>For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved.</p> |
||||
<p>Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data.</p> |
||||
<div id="subsection1" class="doctools_subsection"><h3><a name="subsection1">Concepts</a></h3> |
||||
<p>A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation.</p> |
||||
<pre class="doctools_example"> |
||||
package require punk::fileline |
||||
package require fileutil |
||||
set rawdata [fileutil::cat data.txt -translation binary] |
||||
punk::fileline::class::textinfo create obj_data $rawdata |
||||
puts stdout [obj_data linecount] |
||||
</pre> |
||||
</div> |
||||
<div id="subsection2" class="doctools_subsection"><h3><a name="subsection2">Notes</a></h3> |
||||
<p>Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files.</p> |
||||
<p>This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired.</p> |
||||
<p>No support for lone carriage-returns being interpreted as line-endings.</p> |
||||
<p>CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module.</p> |
||||
</div> |
||||
<div id="subsection3" class="doctools_subsection"><h3><a name="subsection3">dependencies</a></h3> |
||||
<p>packages used by punk::fileline</p> |
||||
<ul class="doctools_itemized"> |
||||
<li><p><b class="package">Tcl 8.6</b></p></li> |
||||
</ul> |
||||
</div> |
||||
</div> |
||||
<div id="section3" class="doctools_section"><h2><a name="section3">API</a></h2> |
||||
<div id="subsection4" class="doctools_subsection"><h3><a name="subsection4">Namespace punk::fileline::class</a></h3> |
||||
<p>class definitions</p> |
||||
<ol class="doctools_enumerated"> |
||||
<li><p>CLASS <b class="class">textinfo</b></p> |
||||
<dl class="doctools_definitions"> |
||||
<p><em>METHODS</em></p> |
||||
<dt><a name="1">class::textinfo <b class="method">constructor</b> <i class="arg">datachunk</i> <span class="opt">?option value...?</span></a></dt> |
||||
<dd><p>Constructor for textinfo object which represents a chunk or all of a file</p> |
||||
<p>datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like:</p> |
||||
<pre class="doctools_example"> |
||||
fconfigure $fd -translation binary |
||||
set chunkdata [read $fd]] |
||||
or |
||||
set chunkdata [fileutil::cat <filename> -translation binary] |
||||
</pre> |
||||
<p>when loading the data</p></dd> |
||||
<dt><a name="2">class::textinfo <b class="method">chunk</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></dt> |
||||
<dd><p>Return a range of bytes from the underlying raw chunk data.</p> |
||||
<p>e.g The following retrieves the entire chunk</p> |
||||
<p>objName chunk 0 end</p></dd> |
||||
<dt><a name="3">class::textinfo <b class="method">chunklen</b></a></dt> |
||||
<dd><p>Number of bytes/characters in the raw data of the file</p></dd> |
||||
<dt><a name="4">class::textinfo <b class="method">linecount</b></a></dt> |
||||
<dd><p>Number of lines in the raw data of the file, counted as per the policy in effect</p></dd> |
||||
<dt><a name="5">class::textinfo <b class="method">regenerate_lines</b></a></dt> |
||||
<dd><p>generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex</p></dd> |
||||
<dt><a name="6">class::textinfo <b class="method">line</b> <i class="arg">lineindex</i></a></dt> |
||||
<dd><p>Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata</p> |
||||
<p>A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting)</p> |
||||
<p>Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none"</p> |
||||
<p>To return just the data which might more commonly be needed for dealing with lines, use the <b class="method">linepayload</b> method - which returns the line data minus line-ending</p></dd> |
||||
<dt><a name="7">class::textinfo <b class="method">linepayload</b> <i class="arg">lineindex</i></a></dt> |
||||
<dd><p>Return the text of the line indicated by the zero-based lineindex</p> |
||||
<p>The line-ending is not returned in the data - but is still stored against this lineindex</p> |
||||
<p>Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the <b class="method">linemeta</b> method</p> |
||||
<p>To retrieve both the line text and metadata in a single call the <b class="method">lineinfo</b> method can be used</p> |
||||
<p>To retrieve an entire line including line-ending use the <b class="method">line</b> method.</p></dd> |
||||
<dt><a name="8">class::textinfo <b class="method">linemeta</b> <i class="arg">lineindex</i></a></dt> |
||||
<dd><p>Return a dict of the metadata for the line indicated by the zero-based lineindex</p> |
||||
<p>Keys returned include</p> |
||||
<ul class="doctools_itemized"> |
||||
<li><p>le</p> |
||||
<p>A string representing the type of line-ending: crlf|lf|none</p></li> |
||||
<li><p>linelen</p> |
||||
<p>The number of characters/bytes in the whole line including line-ending if any</p></li> |
||||
<li><p>payloadlen</p> |
||||
<p>The number of character/bytes in the line excluding line-ending</p></li> |
||||
<li><p>start</p> |
||||
<p>The zero-based index into the associated raw file data indicating at which byte/character index this line begins</p></li> |
||||
<li><p>end</p> |
||||
<p>The zero-based index into the associated raw file data indicating at which byte/character index this line ends</p> |
||||
<p>This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload</p></li> |
||||
</ul></dd> |
||||
<dt><a name="9">class::textinfo <b class="method">lineinfo</b> <i class="arg">lineindex</i></a></dt> |
||||
<dd><p>Return a dict of the metadata and text for the line indicated by the zero-based lineindex</p> |
||||
<p>This returns the same info as the <b class="method">linemeta</b> with an added key of 'payload' which is the text of the line without line-ending.</p> |
||||
<p>The 'payload' value is the same as is returned from the <b class="method">linepayload</b> method.</p></dd> |
||||
<dt><a name="10">class::textinfo <b class="method">linerange_to_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd></dd> |
||||
<dt><a name="11">class::textinfo <b class="method">linerange_to_chunk</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd></dd> |
||||
<dt><a name="12">class::textinfo <b class="method">lines</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd></dd> |
||||
<dt><a name="13">class::textinfo <b class="method">linepayloads</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd></dd> |
||||
<dt><a name="14">class::textinfo <b class="method">chunkrange_to_linerange</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></dt> |
||||
<dd></dd> |
||||
<dt><a name="15">class::textinfo <b class="method">chunkrange_to_lineinfolist</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i> <span class="opt">?option value...?</span></a></dt> |
||||
<dd><p>Return a list of dicts each with structure like the result of the <b class="method">lineinfo</b> method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied</p> |
||||
<p>The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list.</p> |
||||
<p>truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending)</p> |
||||
<p>Note that this truncation info is only in the return value of this method - and will not be reflected in <b class="method">lineinfo</b> queries to the main chunk.</p></dd> |
||||
<dt><a name="16">class::textinfo <b class="method">numeric_linerange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd><p>A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data</p> |
||||
<p>This is used internally by API functions such as <b class="method">line</b> to enable it to accept more expressive indices</p></dd> |
||||
<dt><a name="17">class::textinfo <b class="method">numeric_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||
<dd><p>A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data</p></dd> |
||||
<dt><a name="18">class::textinfo <b class="method">normalize_indices</b> <i class="arg">startidx</i> <i class="arg">endidx</i> <i class="arg">max</i></a></dt> |
||||
<dd><p>A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max</p> |
||||
<p>Basic addition and subtraction expressions such as 4-1 5+2 are accepted</p> |
||||
<p>startidx higher than endidx is allowed</p> |
||||
<p>Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max</p></dd> |
||||
</dl> |
||||
</li> |
||||
</ol> |
||||
</div> |
||||
<div id="subsection5" class="doctools_subsection"><h3><a name="subsection5">Namespace punk::fileline</a></h3> |
||||
<p>Core API functions for punk::fileline</p> |
||||
<dl class="doctools_definitions"> |
||||
</dl> |
||||
</div> |
||||
<div id="subsection6" class="doctools_subsection"><h3><a name="subsection6">Namespace punk::fileline::lib</a></h3> |
||||
<p>Secondary functions that are part of the API</p> |
||||
<dl class="doctools_definitions"> |
||||
<dt><a name="19"><b class="function">lib::range_spans_chunk_boundaries</b> <i class="arg">start</i> <i class="arg">end</i> <i class="arg">chunksize</i></a></dt> |
||||
<dd><p>Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range.</p> |
||||
<dl class="doctools_arguments"> |
||||
|
||||
<dt>integer <i class="arg">start</i></dt> |
||||
<dd><p>zero-based start index of range</p></dd> |
||||
<dt>integer <i class="arg">end</i></dt> |
||||
<dd><p>zero-based end index of range</p></dd> |
||||
<dt>integer <i class="arg">chunksize</i></dt> |
||||
<dd><p>Number of bytes/characters in chunk</p></dd> |
||||
</dl> |
||||
<p>returns a dict with the keys is_span and boundaries</p> |
||||
<p>is_span 0|1 indicates if the range specified spans a boundary of chunksize</p> |
||||
<p>boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize</p> |
||||
<p>e.g</p> |
||||
<pre class="doctools_example"> |
||||
range_spans_chunk_boundaries 10 1750 512 |
||||
is_span 1 boundaries {512 1024 1536} |
||||
</pre> |
||||
<p>This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75</p></dd> |
||||
</dl> |
||||
</div> |
||||
</div> |
||||
<div id="section4" class="doctools_section"><h2><a name="section4">Internal</a></h2> |
||||
<div id="subsection7" class="doctools_subsection"><h3><a name="subsection7">Namespace punk::fileline::system</a></h3> |
||||
<p>Internal functions that are not part of the API</p> |
||||
</div> |
||||
</div> |
||||
<div id="keywords" class="doctools_section"><h2><a name="keywords">Keywords</a></h2> |
||||
<p><a href="../../../index.html#file">file</a>, <a href="../../../index.html#module">module</a>, <a href="../../../index.html#parse">parse</a>, <a href="../../../index.html#text">text</a></p> |
||||
</div> |
||||
<div id="copyright" class="doctools_section"><h2><a name="copyright">Copyright</a></h2> |
||||
<p>Copyright © 2024</p> |
||||
</div> |
||||
</div></body></html> |
@ -0,0 +1,829 @@
|
||||
# -*- tcl -*- |
||||
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
||||
# |
||||
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
||||
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# (C) 2024 |
||||
# |
||||
# @@ Meta Begin |
||||
# Application punk::fileline 999999.0a1.0 |
||||
# Meta platform tcl |
||||
# Meta license BSD |
||||
# @@ Meta End |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# doctools header |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
#*** !doctools |
||||
#[manpage_begin punkshell_module_punk::fileline 0 999999.0a1.0] |
||||
#[copyright "2024"] |
||||
#[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] |
||||
#[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] |
||||
#[require punk::fileline] |
||||
#[keywords module text parse file] |
||||
#[description] |
||||
#[para] - |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[section Overview] |
||||
#[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||
#[para]This is important for certain text files where examining the number of chars/bytes is important |
||||
#[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. |
||||
#[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. |
||||
#[subsection Concepts] |
||||
#[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. |
||||
#[example_begin] |
||||
# package require punk::fileline |
||||
# package require fileutil |
||||
# set rawdata [lb]fileutil::cat data.txt -translation binary[rb] |
||||
# punk::fileline::class::textinfo create obj_data $rawdata |
||||
# puts stdout [lb]obj_data linecount[rb] |
||||
#[example_end] |
||||
#[subsection Notes] |
||||
#[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. |
||||
#[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. |
||||
#[para]No support for lone carriage-returns being interpreted as line-endings. |
||||
#[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Requirements |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[subsection dependencies] |
||||
#[para] packages used by punk::fileline |
||||
#[list_begin itemized] |
||||
|
||||
package require Tcl 8.6 |
||||
#*** !doctools |
||||
#[item] [package {Tcl 8.6}] |
||||
|
||||
# #package require frobz |
||||
# #*** !doctools |
||||
# #[item] [package {frobz}] |
||||
|
||||
#*** !doctools |
||||
#[list_end] |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[section API] |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# oo::class namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
namespace eval punk::fileline::class { |
||||
namespace export * |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::fileline::class}] |
||||
#[para] class definitions |
||||
if {[info commands [namespace current]::textinfo] eq ""} { |
||||
#*** !doctools |
||||
#[list_begin enumerated] |
||||
|
||||
# oo::class create interface_sample1 { |
||||
# #*** !doctools |
||||
# #[enum] CLASS [class interface_sample1] |
||||
# #[list_begin definitions] |
||||
|
||||
# method test {arg1} { |
||||
# #*** !doctools |
||||
# #[call class::interface_sample1 [method test] [arg arg1]] |
||||
# #[para] test method |
||||
# puts "test: $arg1" |
||||
# } |
||||
|
||||
# #*** !doctools |
||||
# #[list_end] [comment {-- end definitions interface_sample1}] |
||||
# } |
||||
|
||||
|
||||
#uses zero based indexing. Caller can add 1 for line numbers |
||||
oo::class create [namespace current]::textinfo { |
||||
#*** !doctools |
||||
#[enum] CLASS [class textinfo] |
||||
#[list_begin definitions] |
||||
# [para] [emph METHODS] |
||||
|
||||
variable o_chunk |
||||
variable o_chunk_epoch |
||||
variable o_payloadlist |
||||
variable o_linemap |
||||
variable o_line_epoch |
||||
variable o_LF_C |
||||
variable o_CRLF_C |
||||
|
||||
constructor {datachunk args} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] |
||||
#[para] Constructor for textinfo object which represents a chunk or all of a file |
||||
#[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: |
||||
#[example_begin] |
||||
# fconfigure $fd -translation binary |
||||
# set chunkdata [lb]read $fd[rb]] |
||||
#or |
||||
# set chunkdata [lb]fileutil::cat <filename> -translation binary[rb] |
||||
#[example_end] |
||||
#[para] when loading the data |
||||
set o_chunk $datachunk |
||||
set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message |
||||
set defaults [dict create\ |
||||
-substitutionmap {}\ |
||||
-crlf_lf_placeholders $crlf_lf_placeholders\ |
||||
] |
||||
set known_opts [dict keys $defaults] |
||||
foreach {k v} $args { |
||||
if {$k ni $known_opts} { |
||||
error "textinfo::constructor error: unknown option '$k'. Known options: $known_opts" |
||||
} |
||||
} |
||||
set opts [dict merge $defaults $args] |
||||
# -- --- --- --- --- --- --- |
||||
set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy |
||||
set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders] |
||||
# -- --- --- --- --- --- --- |
||||
|
||||
if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} { |
||||
error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars" |
||||
} |
||||
lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C |
||||
if {[string first $o_LF_C $o_chunk] >=0} { |
||||
set decval [scan $o_LF_C %c] |
||||
if {$decval < 32 || $decval > 127} { |
||||
set char_desc "(decimal value $decval)" |
||||
} else { |
||||
set char_desc "'$o_LF_C' (decimal value $decval)" |
||||
} |
||||
error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders" |
||||
} |
||||
if {[string first $o_CRLF_C $o_chunk] >=0} { |
||||
set decval [scan $o_CRLF_C %c] |
||||
if {$decval < 32 || $decval > 127} { |
||||
set char_desc "(decimal value $decval)" |
||||
} else { |
||||
set char_desc "'$o_CRLF_C' (decimal value $decval)" |
||||
} |
||||
error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders" |
||||
} |
||||
if {$o_LF_C eq $o_CRLF_C} { |
||||
puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings" |
||||
} |
||||
set o_chunk_epoch "initial" |
||||
set o_line_epoch "" |
||||
my regenerate_lines |
||||
|
||||
} |
||||
|
||||
method chunk {chunkstart chunkend} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] |
||||
#[para]Return a range of bytes from the underlying raw chunk data. |
||||
#[para] e.g The following retrieves the entire chunk |
||||
#[para] objName chunk 0 end |
||||
return [string range $o_chunk $chunkstart $chunkend] |
||||
} |
||||
method chunklen {} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method chunklen]] |
||||
#[para] Number of bytes/characters in the raw data of the file |
||||
return [string length $o_chunk] |
||||
} |
||||
method linecount {} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linecount]] |
||||
#[para] Number of lines in the raw data of the file, counted as per the policy in effect |
||||
return [llength $o_payloadlist] |
||||
} |
||||
|
||||
|
||||
method line {lineindex} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method line] [arg lineindex]] |
||||
#[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||
#[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||
#[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||
#[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending |
||||
|
||||
lassign [my numeric_linerange $lineindex 0] lineindex |
||||
|
||||
set le [dict get $o_linemap $lineindex le] |
||||
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] |
||||
return [lindex $o_payloadlist $lineindex]$le_chars |
||||
} |
||||
method linepayload {lineindex} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linepayload] [arg lineindex]] |
||||
#[para]Return the text of the line indicated by the zero-based lineindex |
||||
#[para]The line-ending is not returned in the data - but is still stored against this lineindex |
||||
#[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method |
||||
#[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used |
||||
#[para]To retrieve an entire line including line-ending use the [method line] method. |
||||
lassign [my numeric_linerange $lineindex 0] lineindex |
||||
return [lindex $o_payloadlist $lineindex] |
||||
} |
||||
method linemeta {lineindex} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linemeta] [arg lineindex]] |
||||
#[para]Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||
#[para]Keys returned include |
||||
#[list_begin itemized] |
||||
#[item] le |
||||
#[para] A string representing the type of line-ending: crlf|lf|none |
||||
#[item] linelen |
||||
#[para] The number of characters/bytes in the whole line including line-ending if any |
||||
#[item] payloadlen |
||||
#[para] The number of character/bytes in the line excluding line-ending |
||||
#[item] start |
||||
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||
#[item] end |
||||
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||
#[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||
#[list_end] |
||||
lassign [my numeric_linerange $lineindex 0] lineindex |
||||
dict get $o_linemap $lineindex |
||||
} |
||||
method lineinfo {lineindex} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method lineinfo] [arg lineindex]] |
||||
#[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||
#[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. |
||||
#[para]The 'payload' value is the same as is returned from the [method linepayload] method. |
||||
lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index |
||||
return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]] |
||||
} |
||||
method lineinfolist {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] |
||||
#[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx |
||||
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||
set chunkstart [dict get $o_linemap $startidx start] |
||||
set chunkend [dict get $o_linemap $endidx end] |
||||
set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines |
||||
#verify sanity |
||||
set l_start [lindex $line_list 0] |
||||
if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { |
||||
error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx" |
||||
} |
||||
set l_end [lindex $line_list end] |
||||
if {[set idx_end [dict get $l_end lineindex]] ne $endidx} { |
||||
error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx" |
||||
} |
||||
return $line_list |
||||
} |
||||
|
||||
method linerange_to_chunkrange {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] |
||||
|
||||
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||
#inclusive range |
||||
return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]] |
||||
} |
||||
method linerange_to_chunk {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] |
||||
set chunkrange [my linerange_to_chunkrange $startidx $endidx] |
||||
return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]] |
||||
} |
||||
method lines {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method lines] [arg startidx] [arg endidx]] |
||||
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||
set linelist [list] |
||||
set le_map [dict create lf \n crlf \r\n none ""] |
||||
for {set i $startidx} {$i <= $endidx} {incr i} { |
||||
lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]" |
||||
} |
||||
return $linelist |
||||
} |
||||
method linepayloads {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] |
||||
return [lrange $o_payloadlist $startidx $endidx] |
||||
} |
||||
method chunkrange_to_linerange {chunkstart chunkend} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] |
||||
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend |
||||
|
||||
set linestart -1 |
||||
for {set i 0} {$i < [llength $o_payloadlist]} {incr i} { |
||||
if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} { |
||||
set linestart $i |
||||
break |
||||
} |
||||
} |
||||
if {$linestart == -1} { |
||||
error "Line with range in chunk spanning start index $chunkstart not found" |
||||
} |
||||
set lineend -1 |
||||
for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} { |
||||
if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} { |
||||
set lineend $i |
||||
break |
||||
} |
||||
} |
||||
if {$lineend == -1} { |
||||
error "Line with range spanning end index $chunkend not found" |
||||
} |
||||
return [list $linestart $lineend] |
||||
} |
||||
method chunkrange_to_lineinfolist {chunkstart chunkend args} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] |
||||
#[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||
#[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. |
||||
#[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||
#[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. |
||||
|
||||
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend |
||||
set defaults [dict create\ |
||||
-show_truncated 0\ |
||||
] |
||||
set known_opts [dict keys $defaults] |
||||
foreach {k v} $args { |
||||
if {$k ni $known_opts} { |
||||
error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts" |
||||
} |
||||
} |
||||
set opts [dict merge $defaults $args] |
||||
# -- --- --- --- --- --- --- --- |
||||
set opt_show_truncated [dict get $opts -show_truncated] |
||||
# -- --- --- --- --- --- --- --- |
||||
|
||||
set infolist [list] |
||||
set linerange [my chunkrange_to_linerange $chunkstart $chunkend] |
||||
lassign $linerange start_lineindex end_lineindex |
||||
|
||||
#if -show_truncated |
||||
#return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line) |
||||
#add is_truncated 0|1 to all lines |
||||
#Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data |
||||
########################### |
||||
# first line may have payload tail truncated - or just linefeed, or even a split linefeed |
||||
########################### |
||||
set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] |
||||
set start_info [dict get $o_linemap $start_lineindex] |
||||
if {$opt_show_truncated} { |
||||
#line1 |
||||
if {$chunkstart > [dict get $start_info start]} { |
||||
#there is lhs truncation |
||||
set payload [lindex $o_payloadlist $start_lineindex] |
||||
set line_start [dict get $start_info start] |
||||
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]] |
||||
set payload_and_le "${payload}${le_chars}" |
||||
set split [expr {$chunkstart - $line_start}] |
||||
set truncated [string range $payload_and_le $split end] |
||||
set lhs [string range $payload_and_le 0 $split-1] |
||||
|
||||
dict set first truncated $truncated |
||||
dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line |
||||
dict set first truncatedleft $lhs |
||||
dict set first is_truncated 1 |
||||
} else { |
||||
dict set first is_truncated 0 |
||||
} |
||||
} |
||||
########################### |
||||
|
||||
########################### |
||||
# middle lines if any - no truncation |
||||
########################### |
||||
#difference in indexes of 1 would only mean 2 items to return |
||||
set middle_list [list] |
||||
if {($end_lineindex - $start_lineindex) > 1} { |
||||
for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} { |
||||
#lineindex is key into main list |
||||
lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0] |
||||
} |
||||
} |
||||
########################### |
||||
|
||||
########################### |
||||
# tail line may have beginning or all of payload truncated - linefeed may be split if crlf |
||||
# may be same line as first line - in which case truncation at beginning as well |
||||
if {$end_lineindex == $start_lineindex} { |
||||
#same record |
||||
set end_info $start_info |
||||
if {$opt_show_truncated} { |
||||
if {$chunkend < [dict get $end_info end]} { |
||||
#lhere is rhs truncation |
||||
if {[dict get $first is_truncated]} { |
||||
dict set first truncatedside [list left right] |
||||
} else { |
||||
dict set first is_truncated 1 |
||||
dict set first truncatedside [list right] |
||||
} |
||||
#do rhs truncation - possibly in addition to existing lhs truncation |
||||
# ... |
||||
if {"left" ni [dict get $first truncatedside]} { |
||||
#rhs truncation only |
||||
set payload [lindex $o_payloadlist $end_lineindex] |
||||
set line_start [dict get $end_info start] |
||||
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] |
||||
set payload_and_le "${payload}${le_chars}" |
||||
puts "payload_and_le: $payload_and_le" |
||||
puts "LENGHT: [string length $payload_and_le]" |
||||
#--- |
||||
set split [expr {$chunkend - $line_start}] |
||||
set truncated [string range $payload_and_le 0 $split] |
||||
set rhs [string range $payload_and_le $split+1 end] |
||||
#--- |
||||
dict set first truncated $truncated |
||||
dict set first truncatedside [list right] |
||||
dict set first truncatedright $rhs |
||||
} else { |
||||
#truncated on both sides |
||||
} |
||||
} |
||||
} |
||||
#no middle or last to append |
||||
lappend infolist $first |
||||
} else { |
||||
set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] |
||||
set end_info [dict get $o_linemap $end_lineindex] |
||||
if {$opt_show_truncated} { |
||||
if {$chunkend < [dict get $end_info end]} { |
||||
#there is rhs truncation - and last line in range is a different line to first one |
||||
dict set last is_truncated 1 |
||||
set payload [lindex $o_payloadlist $end_lineindex] |
||||
set line_start [dict get $end_info start] |
||||
set line_end [dict get $end_info end] |
||||
set le [dict get $end_info le] |
||||
set le_size [dict get {lf 1 crlf 2 none 0} $le] |
||||
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] |
||||
set payload_and_le "${payload}${le_chars}" |
||||
|
||||
set split [expr {$chunkend - $line_start}] |
||||
set truncated [string range $payload_and_le 0 $split] |
||||
set rhs [string range $payload_and_le $split+1 end] |
||||
|
||||
dict set last truncated $truncated |
||||
dict set last truncatedside [list right] |
||||
dict set last truncatedright $rhs |
||||
#this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr |
||||
#this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' |
||||
} |
||||
} |
||||
|
||||
|
||||
lappend infolist $first |
||||
if {[llength $middle_list]} { |
||||
lappend infolist {*}$middle_list |
||||
} |
||||
lappend infolist $last |
||||
} |
||||
########################### |
||||
|
||||
return $infolist |
||||
} |
||||
|
||||
method chunk_le_counts {chunkstart chunkend} { |
||||
set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend] |
||||
set lf_count 0 |
||||
set crlf_count 0 |
||||
set none_count 0 |
||||
foreach d $infolines { |
||||
set le [dict get $d le] |
||||
if {$le eq "lf"} { |
||||
incr lf_count |
||||
} elseif {$le eq "crlf"} { |
||||
incr crlf_count |
||||
} else { |
||||
incr none_count |
||||
} |
||||
} |
||||
return [dict create lf $lf_count crlf $crlf_count unterminated $none_count] |
||||
} |
||||
|
||||
#todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk |
||||
method append_chunk {rawchunk} { |
||||
error "sorry - unimplemented" |
||||
} |
||||
|
||||
method numeric_linerange {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] |
||||
#[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||
#[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices |
||||
return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]] |
||||
} |
||||
method numeric_chunkrange {startidx endidx} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] |
||||
#[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||
return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]] |
||||
} |
||||
method normalize_indices {startidx endidx max} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] |
||||
#[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||
#[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||
#[para]startidx higher than endidx is allowed |
||||
#[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||
set original_startidx $startidx |
||||
set original_endidx $endidx |
||||
set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x |
||||
set endidx [string map [list _ ""] $endidx] |
||||
if {![string is digit -strict "$startidx$endidx"]} { |
||||
foreach whichvar [list start end] { |
||||
upvar 0 ${whichvar}idx index |
||||
if {![string is digit -strict $index]} { |
||||
if {"end" eq $index} { |
||||
set index $max |
||||
} elseif {[string match "*-*" $index]} { |
||||
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions |
||||
lassign [split $index -] A B |
||||
if {$A eq "end"} { |
||||
set index [expr {$max - $B}] |
||||
} else { |
||||
set index [expr {$A - $B}] |
||||
} |
||||
} elseif {[string match "*+*" $index]} { |
||||
lassign [split $index +] A B |
||||
if {$A eq "end"} { |
||||
#review - this will just result in out of bounds error in final test - as desired |
||||
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all. |
||||
set index [expr {$max + $B}] |
||||
} else { |
||||
set index [expr {$A + $B}] |
||||
} |
||||
} else { |
||||
#May be something like +2 or -0 which braced expr can hanle |
||||
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources. |
||||
if {[catch {expr {$index}} index]} { |
||||
#could be end+x - but we don't want out of bounds to be valid |
||||
#set it to something that the final bounds expr test can deal with |
||||
set index Inf |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
#Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices. |
||||
#show the supplied index and what it was mapped to in the error message. |
||||
if {$startidx < 0 || $startidx > $max} { |
||||
error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" |
||||
} |
||||
if {$endidx < 0 || $endidx > $max} { |
||||
error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max" |
||||
} |
||||
return [list $startidx $endidx] |
||||
} |
||||
|
||||
method regenerate_lines {} { |
||||
#*** !doctools |
||||
#[call class::textinfo [method regenerate_lines]] |
||||
#[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||
#[para]This is called automatically by the Constructor during object creation |
||||
#[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. |
||||
#[para]TODO - review whether such manual control will be necessary/desirable |
||||
|
||||
#we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none |
||||
|
||||
# first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter. |
||||
set o_payloadlist [list] |
||||
set o_linemap [dict create] |
||||
set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C] |
||||
set normalised_data [string map $crlf_replace $o_chunk] |
||||
|
||||
set lf_lines [split $normalised_data $o_LF_C] |
||||
|
||||
set idx 0 |
||||
set lf_count 0 |
||||
set crlf_count 0 |
||||
set filedata_offset 0 |
||||
set i 0 |
||||
set imax [expr {[llength $lf_lines]-1}] |
||||
foreach lfln $lf_lines { |
||||
set crlf_parts [split $lfln $o_CRLF_C] |
||||
if {[llength $crlf_parts] <= 1} { |
||||
#no crlf |
||||
set payloadlen [string length $lfln] |
||||
set le_size 1 |
||||
set le lf |
||||
if {$i == $imax} { |
||||
#no more lf segments - and no crlfs |
||||
if {$payloadlen > 0} { |
||||
#last line in split has chars - therefore there was no trailing line-ending |
||||
set le_size 0 |
||||
set le none |
||||
} else { |
||||
#empty space after last line-ending |
||||
#not really a line - we get here from splitting on our lf-replacement char |
||||
#An editor might display this pseudo-line with a line number - but we won't treat it as one here |
||||
break |
||||
} |
||||
} |
||||
lappend o_payloadlist $lfln |
||||
set linelen [expr {$payloadlen + $le_size}] |
||||
#we include line-ending in byte count for a line. |
||||
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||
incr filedata_offset $linelen |
||||
incr lf_count |
||||
incr idx |
||||
} else { |
||||
foreach crlfpart [lrange $crlf_parts 0 end-1] { |
||||
lappend o_payloadlist $crlfpart |
||||
set payloadlen [string length $crlfpart] |
||||
set linelen [expr {$payloadlen + 2}] |
||||
dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||
incr filedata_offset $linelen |
||||
incr crlf_count |
||||
incr idx |
||||
} |
||||
set lfpart [lindex $crlf_parts end] |
||||
set payloadlen [string length $lfpart] |
||||
if {$i == $imax} { |
||||
#no more lf segments - but we did find crlf in last (or perhaps only) lf line |
||||
#last element must be an empty crlf line or has no le |
||||
if {$payloadlen > 0} { |
||||
set le_size 0 |
||||
set le none |
||||
} else { |
||||
#set le_size 2 |
||||
#set le crlf |
||||
break |
||||
} |
||||
} else { |
||||
#more lf segments to come |
||||
#last element must be an empty lf line or has no le |
||||
if {$payloadlen > 0} { |
||||
set le_size 0 |
||||
set le none |
||||
} else { |
||||
set le_size 1 |
||||
set le lf |
||||
} |
||||
} |
||||
|
||||
lappend o_payloadlist $lfpart |
||||
set linelen [expr {$payloadlen + $le_size}] |
||||
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||
incr filedata_offset $linelen |
||||
incr lf_count |
||||
incr idx |
||||
} |
||||
incr i |
||||
#incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line |
||||
} |
||||
set le_count [expr {$lf_count + $crlf_count}] |
||||
if {$le_count != [llength $o_payloadlist]} { |
||||
puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]" |
||||
} |
||||
|
||||
} |
||||
method regenerate_chunk {} { |
||||
|
||||
} |
||||
|
||||
|
||||
#*** !doctools |
||||
#[list_end] |
||||
} |
||||
#*** !doctools |
||||
#[list_end] [comment {--- end class enumeration ---}] |
||||
} |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# Base namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
namespace eval punk::fileline { |
||||
namespace export * |
||||
#variable xyz |
||||
|
||||
#*** !doctools |
||||
#[subsection {Namespace punk::fileline}] |
||||
#[para] Core API functions for punk::fileline |
||||
#[list_begin definitions] |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end definitions namespace punk::fileline ---}] |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# Secondary API namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
namespace eval punk::fileline::lib { |
||||
namespace export * |
||||
namespace path [namespace parent] |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::fileline::lib}] |
||||
#[para] Secondary functions that are part of the API |
||||
#[list_begin definitions] |
||||
|
||||
|
||||
|
||||
proc range_spans_chunk_boundaries {start end chunksize} { |
||||
#*** !doctools |
||||
#[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] |
||||
#[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. |
||||
#[list_begin arguments] |
||||
# [arg_def integer start] |
||||
# [para] zero-based start index of range |
||||
# [arg_def integer end] |
||||
# [para] zero-based end index of range |
||||
# [arg_def integer chunksize] |
||||
# [para] Number of bytes/characters in chunk |
||||
#[list_end] |
||||
#[para]returns a dict with the keys is_span and boundaries |
||||
#[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||
#[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||
#[para]e.g |
||||
#[example_begin] |
||||
# range_spans_chunk_boundaries 10 1750 512 |
||||
# is_span 1 boundaries {512 1024 1536} |
||||
#[example_end] |
||||
#[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 |
||||
if {[catch {package require Tcl 8.7}]} { |
||||
#only one implementation available for older Tcl |
||||
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize |
||||
} |
||||
if {(($end - $start) / $chunksize) < 75} { |
||||
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize |
||||
} else { |
||||
tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
#*** !doctools |
||||
#[section Internal] |
||||
namespace eval punk::fileline::system { |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::fileline::system}] |
||||
#[para] Internal functions that are not part of the API |
||||
|
||||
|
||||
#for 8.7+ using lseq |
||||
#much faster when resultant boundary size is large |
||||
proc _range_spans_chunk_boundaries_lseq {start end chunksize} { |
||||
set smod [expr {$start % $chunksize}] |
||||
if {$smod != 0} { |
||||
set start [expr {$start + ($chunksize - $smod)}] |
||||
if {$start > $end} { |
||||
return [list is_span 0 boundaries {}] |
||||
} |
||||
} |
||||
set boundaries [lseq $start to $end $chunksize] |
||||
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] |
||||
} |
||||
|
||||
#faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) |
||||
#gets very slow (comparitively) with large resultsets |
||||
proc _range_spans_chunk_boundaries_tcl {start end chunksize} { |
||||
set is_span 0 |
||||
set smod [expr {$start % $chunksize}] |
||||
if {$smod != 0} { |
||||
set start [expr {$start + ($chunksize - $smod)}] |
||||
} |
||||
set boundaries [list] |
||||
for {set b $start} {$b <= $end} {incr b $chunksize} { |
||||
lappend boundaries $b |
||||
} |
||||
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] |
||||
} |
||||
|
||||
proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { |
||||
puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]" |
||||
puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]" |
||||
if {![catch {package require Tcl 8.7}]} { |
||||
puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]" |
||||
} |
||||
} |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Ready |
||||
package provide punk::fileline [namespace eval punk::fileline { |
||||
variable pkg punk::fileline |
||||
variable version |
||||
set version 999999.0a1.0 |
||||
}] |
||||
return |
||||
|
||||
#*** !doctools |
||||
#[manpage_end] |
||||
|
@ -0,0 +1,3 @@
|
||||
0.1.0 |
||||
#First line must be a semantic version number |
||||
#all other lines are ignored. |
Loading…
Reference in new issue