tabwriter/lib.rs
1//! This crate provides an implementation of
2//! [elastic tabstops](http://nickgravgaard.com/elastictabstops/index.html).
3//! It is a minimal port of Go's
4//! [tabwriter](http://golang.org/pkg/text/tabwriter/) package.
5//! Namely, its main mode of operation is to wrap a `Writer` and implement
6//! elastic tabstops for the text written to the wrapped `Writer`.
7//!
8//! This package is also bundled with a program, `tabwriter`,
9//! that exposes this functionality at the command line.
10//!
11//! Here's an example that shows basic alignment:
12//!
13//! ```rust
14//! use std::io::Write;
15//! use tabwriter::TabWriter;
16//!
17//! let mut tw = TabWriter::new(vec![]);
18//! write!(&mut tw, "
19//! Bruce Springsteen\tBorn to Run
20//! Bob Seger\tNight Moves
21//! Metallica\tBlack
22//! The Boss\tDarkness on the Edge of Town
23//! ").unwrap();
24//! tw.flush().unwrap();
25//!
26//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
27//! assert_eq!(&*written, "
28//! Bruce Springsteen Born to Run
29//! Bob Seger Night Moves
30//! Metallica Black
31//! The Boss Darkness on the Edge of Town
32//! ");
33//! ```
34//!
35//! Note that `flush` **must** be called or else `TabWriter` may never write
36//! anything. This is because elastic tabstops requires knowing about future
37//! lines in order to align output. More precisely, all text considered in a
38//! single alignment must fit into memory.
39//!
40//! Here's another example that demonstrates how *only* contiguous columns
41//! are aligned:
42//!
43//! ```rust
44//! use std::io::Write;
45//! use tabwriter::TabWriter;
46//!
47//! let mut tw = TabWriter::new(vec![]).padding(1);
48//! write!(&mut tw, "
49//!fn foobar() {{
50//! let mut x = 1+1;\t// addition
51//! x += 1;\t// increment in place
52//! let y = x * x * x * x;\t// multiply!
53//!
54//! y += 1;\t// this is another group
55//! y += 2 * 2;\t// that is separately aligned
56//!}}
57//!").unwrap();
58//! tw.flush().unwrap();
59//!
60//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
61//! assert_eq!(&*written, "
62//!fn foobar() {
63//! let mut x = 1+1; // addition
64//! x += 1; // increment in place
65//! let y = x * x * x * x; // multiply!
66//!
67//! y += 1; // this is another group
68//! y += 2 * 2; // that is separately aligned
69//!}
70//!");
71//! ```
72
73#![deny(missing_docs)]
74
75use std::cmp;
76use std::error;
77use std::fmt;
78use std::io::{self, Write};
79use std::iter;
80use std::mem;
81use std::str;
82
83#[cfg(test)]
84mod test;
85
86/// TabWriter wraps an arbitrary writer and aligns tabbed output.
87///
88/// Elastic tabstops work by aligning *contiguous* tabbed delimited fields
89/// known as *column blocks*. When a line appears that breaks all contiguous
90/// blocks, all buffered output will be flushed to the underlying writer.
91/// Otherwise, output will stay buffered until `flush` is explicitly called.
92#[derive(Debug)]
93pub struct TabWriter<W> {
94 w: W,
95 buf: io::Cursor<Vec<u8>>,
96 lines: Vec<Vec<Cell>>,
97 curcell: Cell,
98 minwidth: usize,
99 padding: usize,
100 alignment: Alignment,
101 ansi: bool,
102 tab_indent: bool,
103}
104
105/// `Alignment` represents how a `TabWriter` should align text within its cell.
106#[derive(Debug)]
107pub enum Alignment {
108 /// Text should be aligned with the left edge of the cell
109 Left,
110 /// Text should be centered within the cell
111 Center,
112 /// Text should be aligned with the right edge of the cell
113 Right,
114}
115
116#[derive(Debug)]
117struct Cell {
118 start: usize, // offset into TabWriter.buf
119 width: usize, // in characters
120 size: usize, // in bytes
121}
122
123impl<W: io::Write> TabWriter<W> {
124 /// Create a new `TabWriter` from an existing `Writer`.
125 ///
126 /// All output written to `Writer` is passed through `TabWriter`.
127 /// Contiguous column blocks indicated by tabs are aligned.
128 ///
129 /// Note that `flush` must be called to guarantee that `TabWriter` will
130 /// write to the given writer.
131 pub fn new(w: W) -> TabWriter<W> {
132 TabWriter {
133 w,
134 buf: io::Cursor::new(Vec::with_capacity(1024)),
135 lines: vec![vec![]],
136 curcell: Cell::new(0),
137 minwidth: 2,
138 padding: 2,
139 alignment: Alignment::Left,
140 ansi: cfg!(feature = "ansi_formatting"),
141 tab_indent: false,
142 }
143 }
144
145 /// Set the minimum width of each column. That is, all columns will have
146 /// *at least* the size given here. If a column is smaller than `minwidth`,
147 /// then it is padded with spaces.
148 ///
149 /// The default minimum width is `2`.
150 pub fn minwidth(mut self, minwidth: usize) -> TabWriter<W> {
151 self.minwidth = minwidth;
152 self
153 }
154
155 /// Set the padding between columns. All columns will be separated by
156 /// *at least* the number of spaces indicated by `padding`. If `padding`
157 /// is zero, then columns may run up against each other without any
158 /// separation.
159 ///
160 /// The default padding is `2`.
161 pub fn padding(mut self, padding: usize) -> TabWriter<W> {
162 self.padding = padding;
163 self
164 }
165
166 /// Set the alignment of text within cells. This will effect future flushes.
167 ///
168 /// The default alignment is `Alignment::Left`.
169 pub fn alignment(mut self, alignment: Alignment) -> TabWriter<W> {
170 self.alignment = alignment;
171 self
172 }
173
174 /// Ignore ANSI escape codes when computing the number of display columns.
175 ///
176 /// This is disabled by default. (But is enabled by default when the
177 /// deprecated `ansi_formatting` crate feature is enabled.)
178 pub fn ansi(mut self, yes: bool) -> TabWriter<W> {
179 self.ansi = yes;
180 self
181 }
182
183 /// Always use tabs for indentation columns (i.e., padding of
184 /// leading empty cells on the left).
185 ///
186 /// This is disabled by default.
187 pub fn tab_indent(mut self, yes: bool) -> TabWriter<W> {
188 self.tab_indent = yes;
189 self
190 }
191
192 /// Unwraps this `TabWriter`, returning the underlying writer.
193 ///
194 /// This internal buffer is flushed before returning the writer. If the
195 /// flush fails, then an error is returned.
196 pub fn into_inner(mut self) -> Result<W, IntoInnerError<TabWriter<W>>> {
197 match self.flush() {
198 Ok(()) => Ok(self.w),
199 Err(err) => Err(IntoInnerError(self, err)),
200 }
201 }
202
203 /// Resets the state of the aligner. Once the aligner is reset, all future
204 /// writes will start producing a new alignment.
205 fn reset(&mut self) {
206 self.buf = io::Cursor::new(Vec::with_capacity(1024));
207 self.lines = vec![vec![]];
208 self.curcell = Cell::new(0);
209 }
210
211 /// Adds the bytes received into the buffer and updates the size of
212 /// the current cell.
213 fn add_bytes(&mut self, bytes: &[u8]) {
214 self.curcell.size += bytes.len();
215 let _ = self.buf.write_all(bytes); // cannot fail
216 }
217
218 /// Ends the current cell, updates the UTF8 width of the cell and starts
219 /// a fresh cell.
220 fn term_curcell(&mut self) {
221 let mut curcell = Cell::new(self.buf.position() as usize);
222 mem::swap(&mut self.curcell, &mut curcell);
223
224 if self.ansi {
225 curcell.update_width(&self.buf.get_ref(), count_columns_ansi);
226 } else {
227 curcell.update_width(&self.buf.get_ref(), count_columns_noansi);
228 }
229 self.curline_mut().push(curcell);
230 }
231
232 /// Return a view of the current line of cells.
233 fn curline(&mut self) -> &[Cell] {
234 let i = self.lines.len() - 1;
235 &*self.lines[i]
236 }
237
238 /// Return a mutable view of the current line of cells.
239 fn curline_mut(&mut self) -> &mut Vec<Cell> {
240 let i = self.lines.len() - 1;
241 &mut self.lines[i]
242 }
243}
244
245impl Cell {
246 fn new(start: usize) -> Cell {
247 Cell { start, width: 0, size: 0 }
248 }
249
250 fn update_width(
251 &mut self,
252 buf: &[u8],
253 count_columns: impl Fn(&[u8]) -> usize,
254 ) {
255 let end = self.start + self.size;
256 self.width = count_columns(&buf[self.start..end]);
257 }
258}
259
260impl<W: io::Write> io::Write for TabWriter<W> {
261 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
262 let mut lastterm = 0usize;
263 for (i, &c) in buf.iter().enumerate() {
264 match c {
265 b'\t' | b'\n' => {
266 self.add_bytes(&buf[lastterm..i]);
267 self.term_curcell();
268 lastterm = i + 1;
269 if c == b'\n' {
270 let ncells = self.curline().len();
271 self.lines.push(vec![]);
272 // Having a single cell means that *all* previous
273 // columns have been broken, so we should just flush.
274 if ncells == 1 {
275 self.flush()?;
276 }
277 }
278 }
279 _ => {}
280 }
281 }
282 self.add_bytes(&buf[lastterm..]);
283 Ok(buf.len())
284 }
285
286 fn flush(&mut self) -> io::Result<()> {
287 if self.curcell.size > 0 {
288 self.term_curcell();
289 }
290 let widths = cell_widths(&self.lines, self.minwidth);
291
292 // This is a trick to avoid allocating padding for every cell.
293 // Just allocate the most we'll ever need and borrow from it.
294 let biggest_width = widths
295 .iter()
296 .map(|ws| ws.iter().map(|&w| w).max().unwrap_or(0))
297 .max()
298 .unwrap_or(0);
299 let padding: String =
300 iter::repeat(' ').take(biggest_width + self.padding).collect();
301
302 let mut first = true;
303 for (line, widths) in self.lines.iter().zip(widths.iter()) {
304 if !first {
305 self.w.write_all(b"\n")?;
306 } else {
307 first = false
308 }
309
310 let mut use_tabs = self.tab_indent;
311 for (i, cell) in line.iter().enumerate() {
312 let bytes =
313 &self.buf.get_ref()[cell.start..cell.start + cell.size];
314 if i >= widths.len() {
315 // There is no width for the last column
316 assert_eq!(i, line.len() - 1);
317 self.w.write_all(bytes)?;
318 } else {
319 if use_tabs && cell.size == 0 {
320 write!(&mut self.w, "\t")?;
321 continue;
322 }
323 use_tabs = false;
324
325 assert!(widths[i] >= cell.width);
326 let extra_space = widths[i] - cell.width;
327 let (left_spaces, mut right_spaces) = match self.alignment
328 {
329 Alignment::Left => (0, extra_space),
330 Alignment::Right => (extra_space, 0),
331 Alignment::Center => {
332 (extra_space / 2, extra_space - extra_space / 2)
333 }
334 };
335 right_spaces += self.padding;
336 write!(&mut self.w, "{}", &padding[0..left_spaces])?;
337 self.w.write_all(bytes)?;
338 write!(&mut self.w, "{}", &padding[0..right_spaces])?;
339 }
340 }
341 }
342
343 self.reset();
344 Ok(())
345 }
346}
347
348/// An error returned by `into_inner`.
349///
350/// This combines the error that happened while flushing the buffer with the
351/// `TabWriter` itself.
352pub struct IntoInnerError<W>(W, io::Error);
353
354impl<W> IntoInnerError<W> {
355 /// Returns the error which caused the `into_error()` call to fail.
356 pub fn error(&self) -> &io::Error {
357 &self.1
358 }
359
360 /// Returns the `TabWriter` instance which generated the error.
361 pub fn into_inner(self) -> W {
362 self.0
363 }
364}
365
366impl<W> fmt::Debug for IntoInnerError<W> {
367 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
368 self.error().fmt(f)
369 }
370}
371
372impl<W> fmt::Display for IntoInnerError<W> {
373 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
374 self.error().fmt(f)
375 }
376}
377
378impl<W: ::std::any::Any> error::Error for IntoInnerError<W> {
379 #[allow(deprecated)]
380 fn description(&self) -> &str {
381 self.error().description()
382 }
383
384 fn cause(&self) -> Option<&dyn error::Error> {
385 Some(self.error())
386 }
387}
388
389fn cell_widths(lines: &Vec<Vec<Cell>>, minwidth: usize) -> Vec<Vec<usize>> {
390 // Naively, this algorithm looks like it could be O(n^2m) where `n` is
391 // the number of lines and `m` is the number of contiguous columns.
392 //
393 // However, I claim that it is actually O(nm). That is, the width for
394 // every contiguous column is computed exactly once.
395 let mut ws: Vec<_> = (0..lines.len()).map(|_| vec![]).collect();
396 for (i, iline) in lines.iter().enumerate() {
397 if iline.is_empty() {
398 continue;
399 }
400 for col in ws[i].len()..(iline.len() - 1) {
401 let mut width = minwidth;
402 let mut contig_count = 0;
403 for line in lines[i..].iter() {
404 if col + 1 >= line.len() {
405 // ignores last column
406 break;
407 }
408 contig_count += 1;
409 width = cmp::max(width, line[col].width);
410 }
411 assert!(contig_count >= 1);
412 for j in i..(i + contig_count) {
413 ws[j].push(width);
414 }
415 }
416 }
417 ws
418}
419
420fn count_columns_noansi(bytes: &[u8]) -> usize {
421 use unicode_width::UnicodeWidthChar;
422
423 // If we have a Unicode string, then attempt to guess the number of
424 // *display* columns used.
425 match str::from_utf8(bytes) {
426 Err(_) => bytes.len(),
427 Ok(s) => s
428 .chars()
429 .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
430 .fold(0, |sum, width| sum + width),
431 }
432}
433
434fn count_columns_ansi(bytes: &[u8]) -> usize {
435 use unicode_width::UnicodeWidthChar;
436
437 // If we have a Unicode string, then attempt to guess the number of
438 // *display* columns used.
439 match str::from_utf8(bytes) {
440 Err(_) => bytes.len(),
441 Ok(s) => strip_formatting(s)
442 .chars()
443 .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
444 .fold(0, |sum, width| sum + width),
445 }
446}
447
448fn strip_formatting<'t>(input: &'t str) -> std::borrow::Cow<'t, str> {
449 let mut escapes = find_ansi_escapes(input).peekable();
450 if escapes.peek().is_none() {
451 return std::borrow::Cow::Borrowed(input);
452 }
453 let mut without_escapes = String::with_capacity(input.len());
454 let mut last_end = 0;
455 for mat in escapes {
456 without_escapes.push_str(&input[last_end..mat.start]);
457 last_end = mat.end;
458 }
459 without_escapes.push_str(&input[last_end..]);
460 std::borrow::Cow::Owned(without_escapes)
461}
462
463fn find_ansi_escapes<'t>(
464 input: &'t str,
465) -> impl Iterator<Item = std::ops::Range<usize>> + 't {
466 const ESCAPE_PREFIX: &str = "\x1B[";
467 let mut last_end = 0;
468 std::iter::from_fn(move || {
469 let start = last_end
470 + input[last_end..].match_indices(ESCAPE_PREFIX).next()?.0;
471 let after_prefix = start + ESCAPE_PREFIX.len();
472 let end = after_prefix
473 + input[after_prefix..].match_indices('m').next()?.0
474 + 1;
475 last_end = end;
476 Some(start..end)
477 })
478}