tabwriter/
lib.rs

1//! This crate provides an implementation of
2//! [elastic tabstops](http://nickgravgaard.com/elastictabstops/index.html).
3//! It is a minimal port of Go's
4//! [tabwriter](http://golang.org/pkg/text/tabwriter/) package.
5//! Namely, its main mode of operation is to wrap a `Writer` and implement
6//! elastic tabstops for the text written to the wrapped `Writer`.
7//!
8//! This package is also bundled with a program, `tabwriter`,
9//! that exposes this functionality at the command line.
10//!
11//! Here's an example that shows basic alignment:
12//!
13//! ```rust
14//! use std::io::Write;
15//! use tabwriter::TabWriter;
16//!
17//! let mut tw = TabWriter::new(vec![]);
18//! write!(&mut tw, "
19//! Bruce Springsteen\tBorn to Run
20//! Bob Seger\tNight Moves
21//! Metallica\tBlack
22//! The Boss\tDarkness on the Edge of Town
23//! ").unwrap();
24//! tw.flush().unwrap();
25//!
26//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
27//! assert_eq!(&*written, "
28//! Bruce Springsteen  Born to Run
29//! Bob Seger          Night Moves
30//! Metallica          Black
31//! The Boss           Darkness on the Edge of Town
32//! ");
33//! ```
34//!
35//! Note that `flush` **must** be called or else `TabWriter` may never write
36//! anything. This is because elastic tabstops requires knowing about future
37//! lines in order to align output. More precisely, all text considered in a
38//! single alignment must fit into memory.
39//!
40//! Here's another example that demonstrates how *only* contiguous columns
41//! are aligned:
42//!
43//! ```rust
44//! use std::io::Write;
45//! use tabwriter::TabWriter;
46//!
47//! let mut tw = TabWriter::new(vec![]).padding(1);
48//! write!(&mut tw, "
49//!fn foobar() {{
50//!    let mut x = 1+1;\t// addition
51//!    x += 1;\t// increment in place
52//!    let y = x * x * x * x;\t// multiply!
53//!
54//!    y += 1;\t// this is another group
55//!    y += 2 * 2;\t// that is separately aligned
56//!}}
57//!").unwrap();
58//! tw.flush().unwrap();
59//!
60//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
61//! assert_eq!(&*written, "
62//!fn foobar() {
63//!    let mut x = 1+1;       // addition
64//!    x += 1;                // increment in place
65//!    let y = x * x * x * x; // multiply!
66//!
67//!    y += 1;     // this is another group
68//!    y += 2 * 2; // that is separately aligned
69//!}
70//!");
71//! ```
72
73#![deny(missing_docs)]
74
75use std::cmp;
76use std::error;
77use std::fmt;
78use std::io::{self, Write};
79use std::iter;
80use std::mem;
81use std::str;
82
83#[cfg(test)]
84mod test;
85
86/// TabWriter wraps an arbitrary writer and aligns tabbed output.
87///
88/// Elastic tabstops work by aligning *contiguous* tabbed delimited fields
89/// known as *column blocks*. When a line appears that breaks all contiguous
90/// blocks, all buffered output will be flushed to the underlying writer.
91/// Otherwise, output will stay buffered until `flush` is explicitly called.
92#[derive(Debug)]
93pub struct TabWriter<W> {
94    w: W,
95    buf: io::Cursor<Vec<u8>>,
96    lines: Vec<Vec<Cell>>,
97    curcell: Cell,
98    minwidth: usize,
99    padding: usize,
100    alignment: Alignment,
101    ansi: bool,
102    tab_indent: bool,
103}
104
105/// `Alignment` represents how a `TabWriter` should align text within its cell.
106#[derive(Debug)]
107pub enum Alignment {
108    /// Text should be aligned with the left edge of the cell
109    Left,
110    /// Text should be centered within the cell
111    Center,
112    /// Text should be aligned with the right edge of the cell
113    Right,
114}
115
116#[derive(Debug)]
117struct Cell {
118    start: usize, // offset into TabWriter.buf
119    width: usize, // in characters
120    size: usize,  // in bytes
121}
122
123impl<W: io::Write> TabWriter<W> {
124    /// Create a new `TabWriter` from an existing `Writer`.
125    ///
126    /// All output written to `Writer` is passed through `TabWriter`.
127    /// Contiguous column blocks indicated by tabs are aligned.
128    ///
129    /// Note that `flush` must be called to guarantee that `TabWriter` will
130    /// write to the given writer.
131    pub fn new(w: W) -> TabWriter<W> {
132        TabWriter {
133            w,
134            buf: io::Cursor::new(Vec::with_capacity(1024)),
135            lines: vec![vec![]],
136            curcell: Cell::new(0),
137            minwidth: 2,
138            padding: 2,
139            alignment: Alignment::Left,
140            ansi: cfg!(feature = "ansi_formatting"),
141            tab_indent: false,
142        }
143    }
144
145    /// Set the minimum width of each column. That is, all columns will have
146    /// *at least* the size given here. If a column is smaller than `minwidth`,
147    /// then it is padded with spaces.
148    ///
149    /// The default minimum width is `2`.
150    pub fn minwidth(mut self, minwidth: usize) -> TabWriter<W> {
151        self.minwidth = minwidth;
152        self
153    }
154
155    /// Set the padding between columns. All columns will be separated by
156    /// *at least* the number of spaces indicated by `padding`. If `padding`
157    /// is zero, then columns may run up against each other without any
158    /// separation.
159    ///
160    /// The default padding is `2`.
161    pub fn padding(mut self, padding: usize) -> TabWriter<W> {
162        self.padding = padding;
163        self
164    }
165
166    /// Set the alignment of text within cells. This will effect future flushes.
167    ///
168    /// The default alignment is `Alignment::Left`.
169    pub fn alignment(mut self, alignment: Alignment) -> TabWriter<W> {
170        self.alignment = alignment;
171        self
172    }
173
174    /// Ignore ANSI escape codes when computing the number of display columns.
175    ///
176    /// This is disabled by default. (But is enabled by default when the
177    /// deprecated `ansi_formatting` crate feature is enabled.)
178    pub fn ansi(mut self, yes: bool) -> TabWriter<W> {
179        self.ansi = yes;
180        self
181    }
182
183    /// Always use tabs for indentation columns (i.e., padding of
184    /// leading empty cells on the left).
185    ///
186    /// This is disabled by default.
187    pub fn tab_indent(mut self, yes: bool) -> TabWriter<W> {
188        self.tab_indent = yes;
189        self
190    }
191
192    /// Unwraps this `TabWriter`, returning the underlying writer.
193    ///
194    /// This internal buffer is flushed before returning the writer. If the
195    /// flush fails, then an error is returned.
196    pub fn into_inner(mut self) -> Result<W, IntoInnerError<TabWriter<W>>> {
197        match self.flush() {
198            Ok(()) => Ok(self.w),
199            Err(err) => Err(IntoInnerError(self, err)),
200        }
201    }
202
203    /// Resets the state of the aligner. Once the aligner is reset, all future
204    /// writes will start producing a new alignment.
205    fn reset(&mut self) {
206        self.buf = io::Cursor::new(Vec::with_capacity(1024));
207        self.lines = vec![vec![]];
208        self.curcell = Cell::new(0);
209    }
210
211    /// Adds the bytes received into the buffer and updates the size of
212    /// the current cell.
213    fn add_bytes(&mut self, bytes: &[u8]) {
214        self.curcell.size += bytes.len();
215        let _ = self.buf.write_all(bytes); // cannot fail
216    }
217
218    /// Ends the current cell, updates the UTF8 width of the cell and starts
219    /// a fresh cell.
220    fn term_curcell(&mut self) {
221        let mut curcell = Cell::new(self.buf.position() as usize);
222        mem::swap(&mut self.curcell, &mut curcell);
223
224        if self.ansi {
225            curcell.update_width(&self.buf.get_ref(), count_columns_ansi);
226        } else {
227            curcell.update_width(&self.buf.get_ref(), count_columns_noansi);
228        }
229        self.curline_mut().push(curcell);
230    }
231
232    /// Return a view of the current line of cells.
233    fn curline(&mut self) -> &[Cell] {
234        let i = self.lines.len() - 1;
235        &*self.lines[i]
236    }
237
238    /// Return a mutable view of the current line of cells.
239    fn curline_mut(&mut self) -> &mut Vec<Cell> {
240        let i = self.lines.len() - 1;
241        &mut self.lines[i]
242    }
243}
244
245impl Cell {
246    fn new(start: usize) -> Cell {
247        Cell { start, width: 0, size: 0 }
248    }
249
250    fn update_width(
251        &mut self,
252        buf: &[u8],
253        count_columns: impl Fn(&[u8]) -> usize,
254    ) {
255        let end = self.start + self.size;
256        self.width = count_columns(&buf[self.start..end]);
257    }
258}
259
260impl<W: io::Write> io::Write for TabWriter<W> {
261    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
262        let mut lastterm = 0usize;
263        for (i, &c) in buf.iter().enumerate() {
264            match c {
265                b'\t' | b'\n' => {
266                    self.add_bytes(&buf[lastterm..i]);
267                    self.term_curcell();
268                    lastterm = i + 1;
269                    if c == b'\n' {
270                        let ncells = self.curline().len();
271                        self.lines.push(vec![]);
272                        // Having a single cell means that *all* previous
273                        // columns have been broken, so we should just flush.
274                        if ncells == 1 {
275                            self.flush()?;
276                        }
277                    }
278                }
279                _ => {}
280            }
281        }
282        self.add_bytes(&buf[lastterm..]);
283        Ok(buf.len())
284    }
285
286    fn flush(&mut self) -> io::Result<()> {
287        if self.curcell.size > 0 {
288            self.term_curcell();
289        }
290        let widths = cell_widths(&self.lines, self.minwidth);
291
292        // This is a trick to avoid allocating padding for every cell.
293        // Just allocate the most we'll ever need and borrow from it.
294        let biggest_width = widths
295            .iter()
296            .map(|ws| ws.iter().map(|&w| w).max().unwrap_or(0))
297            .max()
298            .unwrap_or(0);
299        let padding: String =
300            iter::repeat(' ').take(biggest_width + self.padding).collect();
301
302        let mut first = true;
303        for (line, widths) in self.lines.iter().zip(widths.iter()) {
304            if !first {
305                self.w.write_all(b"\n")?;
306            } else {
307                first = false
308            }
309
310            let mut use_tabs = self.tab_indent;
311            for (i, cell) in line.iter().enumerate() {
312                let bytes =
313                    &self.buf.get_ref()[cell.start..cell.start + cell.size];
314                if i >= widths.len() {
315                    // There is no width for the last column
316                    assert_eq!(i, line.len() - 1);
317                    self.w.write_all(bytes)?;
318                } else {
319                    if use_tabs && cell.size == 0 {
320                        write!(&mut self.w, "\t")?;
321                        continue;
322                    }
323                    use_tabs = false;
324
325                    assert!(widths[i] >= cell.width);
326                    let extra_space = widths[i] - cell.width;
327                    let (left_spaces, mut right_spaces) = match self.alignment
328                    {
329                        Alignment::Left => (0, extra_space),
330                        Alignment::Right => (extra_space, 0),
331                        Alignment::Center => {
332                            (extra_space / 2, extra_space - extra_space / 2)
333                        }
334                    };
335                    right_spaces += self.padding;
336                    write!(&mut self.w, "{}", &padding[0..left_spaces])?;
337                    self.w.write_all(bytes)?;
338                    write!(&mut self.w, "{}", &padding[0..right_spaces])?;
339                }
340            }
341        }
342
343        self.reset();
344        Ok(())
345    }
346}
347
348/// An error returned by `into_inner`.
349///
350/// This combines the error that happened while flushing the buffer with the
351/// `TabWriter` itself.
352pub struct IntoInnerError<W>(W, io::Error);
353
354impl<W> IntoInnerError<W> {
355    /// Returns the error which caused the `into_error()` call to fail.
356    pub fn error(&self) -> &io::Error {
357        &self.1
358    }
359
360    /// Returns the `TabWriter` instance which generated the error.
361    pub fn into_inner(self) -> W {
362        self.0
363    }
364}
365
366impl<W> fmt::Debug for IntoInnerError<W> {
367    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
368        self.error().fmt(f)
369    }
370}
371
372impl<W> fmt::Display for IntoInnerError<W> {
373    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
374        self.error().fmt(f)
375    }
376}
377
378impl<W: ::std::any::Any> error::Error for IntoInnerError<W> {
379    #[allow(deprecated)]
380    fn description(&self) -> &str {
381        self.error().description()
382    }
383
384    fn cause(&self) -> Option<&dyn error::Error> {
385        Some(self.error())
386    }
387}
388
389fn cell_widths(lines: &Vec<Vec<Cell>>, minwidth: usize) -> Vec<Vec<usize>> {
390    // Naively, this algorithm looks like it could be O(n^2m) where `n` is
391    // the number of lines and `m` is the number of contiguous columns.
392    //
393    // However, I claim that it is actually O(nm). That is, the width for
394    // every contiguous column is computed exactly once.
395    let mut ws: Vec<_> = (0..lines.len()).map(|_| vec![]).collect();
396    for (i, iline) in lines.iter().enumerate() {
397        if iline.is_empty() {
398            continue;
399        }
400        for col in ws[i].len()..(iline.len() - 1) {
401            let mut width = minwidth;
402            let mut contig_count = 0;
403            for line in lines[i..].iter() {
404                if col + 1 >= line.len() {
405                    // ignores last column
406                    break;
407                }
408                contig_count += 1;
409                width = cmp::max(width, line[col].width);
410            }
411            assert!(contig_count >= 1);
412            for j in i..(i + contig_count) {
413                ws[j].push(width);
414            }
415        }
416    }
417    ws
418}
419
420fn count_columns_noansi(bytes: &[u8]) -> usize {
421    use unicode_width::UnicodeWidthChar;
422
423    // If we have a Unicode string, then attempt to guess the number of
424    // *display* columns used.
425    match str::from_utf8(bytes) {
426        Err(_) => bytes.len(),
427        Ok(s) => s
428            .chars()
429            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
430            .fold(0, |sum, width| sum + width),
431    }
432}
433
434fn count_columns_ansi(bytes: &[u8]) -> usize {
435    use unicode_width::UnicodeWidthChar;
436
437    // If we have a Unicode string, then attempt to guess the number of
438    // *display* columns used.
439    match str::from_utf8(bytes) {
440        Err(_) => bytes.len(),
441        Ok(s) => strip_formatting(s)
442            .chars()
443            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
444            .fold(0, |sum, width| sum + width),
445    }
446}
447
448fn strip_formatting<'t>(input: &'t str) -> std::borrow::Cow<'t, str> {
449    let mut escapes = find_ansi_escapes(input).peekable();
450    if escapes.peek().is_none() {
451        return std::borrow::Cow::Borrowed(input);
452    }
453    let mut without_escapes = String::with_capacity(input.len());
454    let mut last_end = 0;
455    for mat in escapes {
456        without_escapes.push_str(&input[last_end..mat.start]);
457        last_end = mat.end;
458    }
459    without_escapes.push_str(&input[last_end..]);
460    std::borrow::Cow::Owned(without_escapes)
461}
462
463fn find_ansi_escapes<'t>(
464    input: &'t str,
465) -> impl Iterator<Item = std::ops::Range<usize>> + 't {
466    const ESCAPE_PREFIX: &str = "\x1B[";
467    let mut last_end = 0;
468    std::iter::from_fn(move || {
469        let start = last_end
470            + input[last_end..].match_indices(ESCAPE_PREFIX).next()?.0;
471        let after_prefix = start + ESCAPE_PREFIX.len();
472        let end = after_prefix
473            + input[after_prefix..].match_indices('m').next()?.0
474            + 1;
475        last_end = end;
476        Some(start..end)
477    })
478}