|
60 | 60 | //! by two zero-length breaks. The algorithm will try its best to fit it on a |
61 | 61 | //! line (which it can't) and so naturally place the content on its own line to |
62 | 62 | //! avoid combining it with other lines and making matters even worse. |
| 63 | +//! |
| 64 | +//! # Explanation |
| 65 | +//! |
| 66 | +//! In case you do not have the paper, here is an explanation of what's going |
| 67 | +//! on. |
| 68 | +//! |
| 69 | +//! There is a stream of input tokens flowing through this printer. |
| 70 | +//! |
| 71 | +//! The printer buffers up to 3N tokens inside itself, where N is linewidth. |
| 72 | +//! Yes, linewidth is chars and tokens are multi-char, but in the worst |
| 73 | +//! case every token worth buffering is 1 char long, so it's ok. |
| 74 | +//! |
| 75 | +//! Tokens are String, Break, and Begin/End to delimit blocks. |
| 76 | +//! |
| 77 | +//! Begin tokens can carry an offset, saying "how far to indent when you break |
| 78 | +//! inside here", as well as a flag indicating "consistent" or "inconsistent" |
| 79 | +//! breaking. Consistent breaking means that after the first break, no attempt |
| 80 | +//! will be made to flow subsequent breaks together onto lines. Inconsistent |
| 81 | +//! is the opposite. Inconsistent breaking example would be, say: |
| 82 | +//! |
| 83 | +//! ``` |
| 84 | +//! foo(hello, there, good, friends) |
| 85 | +//! ``` |
| 86 | +//! |
| 87 | +//! breaking inconsistently to become |
| 88 | +//! |
| 89 | +//! ``` |
| 90 | +//! foo(hello, there |
| 91 | +//! good, friends); |
| 92 | +//! ``` |
| 93 | +//! |
| 94 | +//! whereas a consistent breaking would yield: |
| 95 | +//! |
| 96 | +//! ``` |
| 97 | +//! foo(hello, |
| 98 | +//! there |
| 99 | +//! good, |
| 100 | +//! friends); |
| 101 | +//! ``` |
| 102 | +//! |
| 103 | +//! That is, in the consistent-break blocks we value vertical alignment |
| 104 | +//! more than the ability to cram stuff onto a line. But in all cases if it |
| 105 | +//! can make a block a one-liner, it'll do so. |
| 106 | +//! |
| 107 | +//! Carrying on with high-level logic: |
| 108 | +//! |
| 109 | +//! The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and |
| 110 | +//! 'right' indices denote the active portion of the ring buffer as well as |
| 111 | +//! describing hypothetical points-in-the-infinite-stream at most 3N tokens |
| 112 | +//! apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch |
| 113 | +//! between using 'left' and 'right' terms to denote the wrapped-to-ring-buffer |
| 114 | +//! and point-in-infinite-stream senses freely. |
| 115 | +//! |
| 116 | +//! There is a parallel ring buffer, 'size', that holds the calculated size of |
| 117 | +//! each token. Why calculated? Because for Begin/End pairs, the "size" |
| 118 | +//! includes everything between the pair. That is, the "size" of Begin is |
| 119 | +//! actually the sum of the sizes of everything between Begin and the paired |
| 120 | +//! End that follows. Since that is arbitrarily far in the future, 'size' is |
| 121 | +//! being rewritten regularly while the printer runs; in fact most of the |
| 122 | +//! machinery is here to work out 'size' entries on the fly (and give up when |
| 123 | +//! they're so obviously over-long that "infinity" is a good enough |
| 124 | +//! approximation for purposes of line breaking). |
| 125 | +//! |
| 126 | +//! The "input side" of the printer is managed as an abstract process called |
| 127 | +//! SCAN, which uses 'scan_stack', to manage calculating 'size'. SCAN is, in |
| 128 | +//! other words, the process of calculating 'size' entries. |
| 129 | +//! |
| 130 | +//! The "output side" of the printer is managed by an abstract process called |
| 131 | +//! PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to |
| 132 | +//! do with each token/size pair it consumes as it goes. It's trying to consume |
| 133 | +//! the entire buffered window, but can't output anything until the size is >= |
| 134 | +//! 0 (sizes are set to negative while they're pending calculation). |
| 135 | +//! |
| 136 | +//! So SCAN takes input and buffers tokens and pending calculations, while |
| 137 | +//! PRINT gobbles up completed calculations and tokens from the buffer. The |
| 138 | +//! theory is that the two can never get more than 3N tokens apart, because |
| 139 | +//! once there's "obviously" too much data to fit on a line, in a size |
| 140 | +//! calculation, SCAN will write "infinity" to the size and let PRINT consume |
| 141 | +//! it. |
| 142 | +//! |
| 143 | +//! In this implementation (following the paper, again) the SCAN process is |
| 144 | +//! the method called `Printer::pretty_print`, and the 'PRINT' process is the method |
| 145 | +//! called `Printer::print`. |
63 | 146 |
|
64 | 147 | use std::collections::VecDeque; |
65 | 148 | use std::fmt; |
66 | 149 | use std::io; |
67 | 150 |
|
| 151 | +/// How to break. Described in more detail in the module docs. |
68 | 152 | #[derive(Clone, Copy, PartialEq)] |
69 | 153 | pub enum Breaks { |
70 | 154 | Consistent, |
@@ -177,81 +261,6 @@ pub fn mk_printer<'a>(out: Box<io::Write+'a>, linewidth: usize) -> Printer<'a> { |
177 | 261 | } |
178 | 262 | } |
179 | 263 |
|
180 | | - |
181 | | -/// In case you do not have the paper, here is an explanation of what's going |
182 | | -/// on. |
183 | | -/// |
184 | | -/// There is a stream of input tokens flowing through this printer. |
185 | | -/// |
186 | | -/// The printer buffers up to 3N tokens inside itself, where N is linewidth. |
187 | | -/// Yes, linewidth is chars and tokens are multi-char, but in the worst |
188 | | -/// case every token worth buffering is 1 char long, so it's ok. |
189 | | -/// |
190 | | -/// Tokens are String, Break, and Begin/End to delimit blocks. |
191 | | -/// |
192 | | -/// Begin tokens can carry an offset, saying "how far to indent when you break |
193 | | -/// inside here", as well as a flag indicating "consistent" or "inconsistent" |
194 | | -/// breaking. Consistent breaking means that after the first break, no attempt |
195 | | -/// will be made to flow subsequent breaks together onto lines. Inconsistent |
196 | | -/// is the opposite. Inconsistent breaking example would be, say: |
197 | | -/// |
198 | | -/// foo(hello, there, good, friends) |
199 | | -/// |
200 | | -/// breaking inconsistently to become |
201 | | -/// |
202 | | -/// foo(hello, there |
203 | | -/// good, friends); |
204 | | -/// |
205 | | -/// whereas a consistent breaking would yield: |
206 | | -/// |
207 | | -/// foo(hello, |
208 | | -/// there |
209 | | -/// good, |
210 | | -/// friends); |
211 | | -/// |
212 | | -/// That is, in the consistent-break blocks we value vertical alignment |
213 | | -/// more than the ability to cram stuff onto a line. But in all cases if it |
214 | | -/// can make a block a one-liner, it'll do so. |
215 | | -/// |
216 | | -/// Carrying on with high-level logic: |
217 | | -/// |
218 | | -/// The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and |
219 | | -/// 'right' indices denote the active portion of the ring buffer as well as |
220 | | -/// describing hypothetical points-in-the-infinite-stream at most 3N tokens |
221 | | -/// apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch |
222 | | -/// between using 'left' and 'right' terms to denote the wrapped-to-ring-buffer |
223 | | -/// and point-in-infinite-stream senses freely. |
224 | | -/// |
225 | | -/// There is a parallel ring buffer, 'size', that holds the calculated size of |
226 | | -/// each token. Why calculated? Because for Begin/End pairs, the "size" |
227 | | -/// includes everything between the pair. That is, the "size" of Begin is |
228 | | -/// actually the sum of the sizes of everything between Begin and the paired |
229 | | -/// End that follows. Since that is arbitrarily far in the future, 'size' is |
230 | | -/// being rewritten regularly while the printer runs; in fact most of the |
231 | | -/// machinery is here to work out 'size' entries on the fly (and give up when |
232 | | -/// they're so obviously over-long that "infinity" is a good enough |
233 | | -/// approximation for purposes of line breaking). |
234 | | -/// |
235 | | -/// The "input side" of the printer is managed as an abstract process called |
236 | | -/// SCAN, which uses 'scan_stack', to manage calculating 'size'. SCAN is, in |
237 | | -/// other words, the process of calculating 'size' entries. |
238 | | -/// |
239 | | -/// The "output side" of the printer is managed by an abstract process called |
240 | | -/// PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to |
241 | | -/// do with each token/size pair it consumes as it goes. It's trying to consume |
242 | | -/// the entire buffered window, but can't output anything until the size is >= |
243 | | -/// 0 (sizes are set to negative while they're pending calculation). |
244 | | -/// |
245 | | -/// So SCAN takes input and buffers tokens and pending calculations, while |
246 | | -/// PRINT gobbles up completed calculations and tokens from the buffer. The |
247 | | -/// theory is that the two can never get more than 3N tokens apart, because |
248 | | -/// once there's "obviously" too much data to fit on a line, in a size |
249 | | -/// calculation, SCAN will write "infinity" to the size and let PRINT consume |
250 | | -/// it. |
251 | | -/// |
252 | | -/// In this implementation (following the paper, again) the SCAN process is |
253 | | -/// the method called 'pretty_print', and the 'PRINT' process is the method |
254 | | -/// called 'print'. |
255 | 264 | pub struct Printer<'a> { |
256 | 265 | pub out: Box<io::Write+'a>, |
257 | 266 | buf_len: usize, |
@@ -292,7 +301,7 @@ impl<'a> Printer<'a> { |
292 | 301 | pub fn last_token(&mut self) -> Token { |
293 | 302 | self.buf[self.right].token.clone() |
294 | 303 | } |
295 | | - // be very careful with this! |
| 304 | + /// be very careful with this! |
296 | 305 | pub fn replace_last_token(&mut self, t: Token) { |
297 | 306 | self.buf[self.right].token = t; |
298 | 307 | } |
@@ -571,19 +580,21 @@ impl<'a> Printer<'a> { |
571 | 580 | } |
572 | 581 |
|
573 | 582 | // Convenience functions to talk to the printer. |
574 | | -// |
575 | | -// "raw box" |
| 583 | + |
| 584 | +/// "raw box" |
576 | 585 | pub fn rbox(p: &mut Printer, indent: usize, b: Breaks) -> io::Result<()> { |
577 | 586 | p.pretty_print(Token::Begin(BeginToken { |
578 | 587 | offset: indent as isize, |
579 | 588 | breaks: b |
580 | 589 | })) |
581 | 590 | } |
582 | 591 |
|
| 592 | +/// Inconsistent breaking box |
583 | 593 | pub fn ibox(p: &mut Printer, indent: usize) -> io::Result<()> { |
584 | 594 | rbox(p, indent, Breaks::Inconsistent) |
585 | 595 | } |
586 | 596 |
|
| 597 | +/// Consistent breaking box |
587 | 598 | pub fn cbox(p: &mut Printer, indent: usize) -> io::Result<()> { |
588 | 599 | rbox(p, indent, Breaks::Consistent) |
589 | 600 | } |
|
0 commit comments