diff options
author | 2010-05-14 19:52:43 +0000 | |
---|---|---|
committer | 2010-05-14 19:52:43 +0000 | |
commit | bc49dbe17caf2a6bd9a7026019f6b8ca520f8c75 (patch) | |
tree | 0728aca4e989f1dc7955f9d116d392dca66f32e2 | |
parent | avoid builtins completely, so avoid possible issues with gcc4. (diff) | |
download | wireguard-openbsd-bc49dbe17caf2a6bd9a7026019f6b8ca520f8c75.tar.xz wireguard-openbsd-bc49dbe17caf2a6bd9a7026019f6b8ca520f8c75.zip |
Integrate kristaps@' end-of-sentence (EOS) framework
which is simpler and more powerful than mine, and remove mine.
* man(7) now has EOS handling, too
* put EOS detection into its own function in libmandoc
* use node and termp flags to communicate the EOS condition
* no more EOS pseudo-macro
* no more non-printable EOS marker character on the formatter level
This slightly breaks EOS detection after trailing punctuation
in mdoc(7) macros, but that will be restored soon.
-rw-r--r-- | usr.bin/mandoc/chars.h | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/libmandoc.h | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/man.7 | 8 | ||||
-rw-r--r-- | usr.bin/mandoc/man.c | 13 | ||||
-rw-r--r-- | usr.bin/mandoc/man.h | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/man_term.c | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/mandoc.c | 25 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.7 | 8 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.c | 46 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.h | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_action.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_argv.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_html.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_macro.c | 3 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_term.c | 19 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc_validate.c | 6 | ||||
-rw-r--r-- | usr.bin/mandoc/term.c | 27 | ||||
-rw-r--r-- | usr.bin/mandoc/term.h | 3 |
18 files changed, 98 insertions, 88 deletions
diff --git a/usr.bin/mandoc/chars.h b/usr.bin/mandoc/chars.h index 88ece13ec5a..3685451762f 100644 --- a/usr.bin/mandoc/chars.h +++ b/usr.bin/mandoc/chars.h @@ -1,4 +1,4 @@ -/* $Id: chars.h,v 1.2 2010/03/02 00:38:59 schwarze Exp $ */ +/* $Id: chars.h,v 1.3 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -17,7 +17,6 @@ #ifndef CHARS_H #define CHARS_H -#define ASCII_EOS 30 /* end of sentence marker */ #define ASCII_NBRSP 31 /* non-breaking space */ __BEGIN_DECLS diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h index 23588a422c0..7b43a25e329 100644 --- a/usr.bin/mandoc/libmandoc.h +++ b/usr.bin/mandoc/libmandoc.h @@ -1,4 +1,4 @@ -/* $Id: libmandoc.h,v 1.3 2009/12/23 22:30:17 schwarze Exp $ */ +/* $Id: libmandoc.h,v 1.4 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -29,6 +29,7 @@ time_t mandoc_a2time(int, const char *); #define MTIME_REDUCED (1 << 1) #define MTIME_MDOCDATE (1 << 2) #define MTIME_ISO_8601 (1 << 3) +int mandoc_eos(const char *, size_t); __END_DECLS diff --git a/usr.bin/mandoc/man.7 b/usr.bin/mandoc/man.7 index cd0f096f5e3..df94dbf704b 100644 --- a/usr.bin/mandoc/man.7 +++ b/usr.bin/mandoc/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.22 2010/05/14 14:47:44 schwarze Exp $ +.\" $Id: man.7,v 1.23 2010/05/14 19:52:43 schwarze Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> .\" @@ -212,6 +212,12 @@ this differs from .Xr mdoc 7 , which, if a unit is not provided, will instead interpret the string as literal text. +.Ss Sentence Spacing +When composing a manual, make sure that your sentences end at the end of +a line. +By doing so, front-ends will be able to apply the proper amount of +spacing after the end of sentence (unescaped) period, exclamation, or question +mark. .Sh MANUAL STRUCTURE Each .Nm diff --git a/usr.bin/mandoc/man.c b/usr.bin/mandoc/man.c index 3d0c5bf50a4..8ebac56ed9d 100644 --- a/usr.bin/mandoc/man.c +++ b/usr.bin/mandoc/man.c @@ -1,4 +1,4 @@ -/* $Id: man.c,v 1.27 2010/05/14 01:54:37 schwarze Exp $ */ +/* $Id: man.c,v 1.28 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -433,6 +433,17 @@ man_ptext(struct man *m, int line, char *buf) if ( ! man_word_alloc(m, line, 0, buf)) return(0); + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(i); + + if (mandoc_eos(buf, (size_t)i)) + m->last->flags |= MAN_EOS; + descope: /* * Co-ordinate what happens with having a next-line scope open: diff --git a/usr.bin/mandoc/man.h b/usr.bin/mandoc/man.h index ecf250e1dd4..fd4451bfce9 100644 --- a/usr.bin/mandoc/man.h +++ b/usr.bin/mandoc/man.h @@ -1,4 +1,4 @@ -/* $Id: man.h,v 1.16 2010/05/13 20:34:29 schwarze Exp $ */ +/* $Id: man.h,v 1.17 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -96,7 +96,8 @@ struct man_node { int flags; #define MAN_VALID (1 << 0) #define MAN_ACTED (1 << 1) -#define MAN_USE (1 << 2) +#define MAN_EOS (1 << 2) +#define MAN_USE (1 << 3) enum man_type type; char *string; struct man_node *head; diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c index 9ffd7f75e59..478daa5372b 100644 --- a/usr.bin/mandoc/man_term.c +++ b/usr.bin/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.29 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: man_term.c,v 1.30 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -841,6 +841,9 @@ print_man_node(DECL_ARGS) if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) term_fontrepl(p, TERMFONT_NONE); } + + if (MAN_EOS & n->flags) + p->flags |= TERMP_SENTENCE; } diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c index d03f7bf482d..34fa2029c48 100644 --- a/usr.bin/mandoc/mandoc.c +++ b/usr.bin/mandoc/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.8 2010/04/07 23:15:05 schwarze Exp $ */ +/* $Id: mandoc.c,v 1.9 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -296,3 +296,26 @@ mandoc_a2time(int flags, const char *p) return(0); } + +int +mandoc_eos(const char *p, size_t sz) +{ + + assert(sz); + + switch (p[(int)sz - 1]) { + case ('.'): + /* Escaped periods. */ + if (sz > 1 && '\\' == p[(int)sz - 2]) + return(0); + /* FALLTHROUGH */ + case ('!'): + /* FALLTHROUGH */ + case ('?'): + break; + default: + return(0); + } + + return(1); +} diff --git a/usr.bin/mandoc/mdoc.7 b/usr.bin/mandoc/mdoc.7 index b7084c5c6f3..aecae284574 100644 --- a/usr.bin/mandoc/mdoc.7 +++ b/usr.bin/mandoc/mdoc.7 @@ -1,4 +1,4 @@ -.\" $Id: mdoc.7,v 1.27 2010/05/14 14:47:44 schwarze Exp $ +.\" $Id: mdoc.7,v 1.28 2010/05/14 19:52:43 schwarze Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> .\" @@ -296,6 +296,12 @@ or is necessarily non-portable across output media. See .Sx COMPATIBILITY . +.Ss Sentence Spacing +When composing a manual, make sure that your sentences end at the end of +a line. +By doing so, front-ends will be able to apply the proper amount of +spacing after the end of sentence (unescaped) period, exclamation mark, +or question mark. .Sh MANUAL STRUCTURE A well-formed .Nm diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c index a19720547b1..e745ec86774 100644 --- a/usr.bin/mandoc/mdoc.c +++ b/usr.bin/mandoc/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.47 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc.c,v 1.48 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -120,7 +120,7 @@ const char *const __mdoc_macronames[MDOC_MAX] = { /* LINTED */ "Dx", "%Q", "br", "sp", /* LINTED */ - "%U", "eos" + "%U" }; const char *const __mdoc_argnames[MDOC_ARG_MAX] = { @@ -688,20 +688,21 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) } /* Allocate the whole word. */ + if ( ! mdoc_word_alloc(m, line, 0, buf)) return(0); /* - * Mark the end of a sentence. Only works when you respect - * Jason's rule: "new sentence, new line". + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. */ - if ('.' == buf[i-1] || '!' == buf[i-1] || '?' == buf[i-1]) { - m->next = MDOC_NEXT_SIBLING; - if ( ! mdoc_elem_alloc(m, line, i, MDOC_eos, NULL)) - return(0); - } - m->next = MDOC_NEXT_SIBLING; + assert(i); + + if (mandoc_eos(buf, (size_t)i)) + m->last->flags |= MDOC_EOS; + return(1); } @@ -727,8 +728,6 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) enum mdoct tok; int i, j, sv; char mac[5]; - struct mdoc_node *n; - char *t; /* Empty lines are ignored. */ @@ -799,29 +798,6 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) if ( ! mdoc_macro(m, tok, ln, sv, &i, buf)) goto err; - /* - * Mark the end of a sentence, but be careful not to insert - * markers into reference blocks and after ellipses in - * function definitions. - */ - n = m->last; - if (n->child) - n = n->child; - while (n->next) - n = n->next; - if (MDOC_TEXT == n->type && - MDOC_Fn != n->parent->tok && - MDOC_Rs != m->last->parent->tok) { - t = n->string; - while (t[0] && t[1]) - t++; - if ('.' == *t || '!' == *t || '?' == *t) { - if ( ! mdoc_elem_alloc(m, ln, i, MDOC_eos, NULL)) - return(0); - m->next = MDOC_NEXT_SIBLING; - } - } - return(1); err: /* Error out. */ diff --git a/usr.bin/mandoc/mdoc.h b/usr.bin/mandoc/mdoc.h index 42fc45bf333..bcd137429e0 100644 --- a/usr.bin/mandoc/mdoc.h +++ b/usr.bin/mandoc/mdoc.h @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.21 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc.h,v 1.22 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -149,7 +149,6 @@ enum mdoct { MDOC_br, MDOC_sp, MDOC__U, - MDOC_eos, MDOC_MAX }; diff --git a/usr.bin/mandoc/mdoc_action.c b/usr.bin/mandoc/mdoc_action.c index 0af3d507b49..856b38d56c1 100644 --- a/usr.bin/mandoc/mdoc_action.c +++ b/usr.bin/mandoc/mdoc_action.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_action.c,v 1.32 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_action.c,v 1.33 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -188,7 +188,6 @@ static const struct actions mdoc_actions[MDOC_MAX] = { { NULL, NULL }, /* br */ { NULL, NULL }, /* sp */ { NULL, NULL }, /* %U */ - { NULL, NULL }, /* eos */ }; #define RSORD_MAX 14 diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c index 632717cc1e0..c97929729ad 100644 --- a/usr.bin/mandoc/mdoc_argv.c +++ b/usr.bin/mandoc/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.25 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_argv.c,v 1.26 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -205,7 +205,6 @@ static int mdoc_argflags[MDOC_MAX] = { 0, /* br */ 0, /* sp */ 0, /* %U */ - 0, /* eos */ }; diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c index f44fcaa7162..60f1416c8f7 100644 --- a/usr.bin/mandoc/mdoc_html.c +++ b/usr.bin/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.13 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_html.c,v 1.14 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -251,7 +251,6 @@ static const struct htmlmdoc mdocs[MDOC_MAX] = { {mdoc_sp_pre, NULL}, /* br */ {mdoc_sp_pre, NULL}, /* sp */ {mdoc__x_pre, mdoc__x_post}, /* %U */ - {NULL, NULL}, /* eos */ }; diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c index 71de1d92f7a..6ea3cbf4ec6 100644 --- a/usr.bin/mandoc/mdoc_macro.c +++ b/usr.bin/mandoc/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.37 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_macro.c,v 1.38 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -180,7 +180,6 @@ const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { in_line_eoln, 0 }, /* br */ { in_line_eoln, 0 }, /* sp */ { in_line_eoln, 0 }, /* %U */ - { NULL, 0 }, /* eos */ }; const struct mdoc_macro * const mdoc_macros = __mdoc_macros; diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c index 13d603f5f9f..f44374761c4 100644 --- a/usr.bin/mandoc/mdoc_term.c +++ b/usr.bin/mandoc/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.77 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_term.c,v 1.78 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -133,7 +133,6 @@ static int termp_ud_pre(DECL_ARGS); static int termp_vt_pre(DECL_ARGS); static int termp_xr_pre(DECL_ARGS); static int termp_xx_pre(DECL_ARGS); -static int termp_eos_pre(DECL_ARGS); static const struct termact termacts[MDOC_MAX] = { { termp_ap_pre, NULL }, /* Ap */ @@ -257,7 +256,6 @@ static const struct termact termacts[MDOC_MAX] = { { termp_sp_pre, NULL }, /* br */ { termp_sp_pre, NULL }, /* sp */ { termp_under_pre, termp____post }, /* %U */ - { termp_eos_pre, NULL }, /* eos */ }; @@ -336,6 +334,9 @@ print_mdoc_node(DECL_ARGS) if (termacts[n->tok].post) (*termacts[n->tok].post)(p, &npair, m, n); + if (MDOC_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + p->offset = offset; p->rmargin = rmargin; } @@ -2166,15 +2167,3 @@ termp_under_pre(DECL_ARGS) term_fontpush(p, TERMFONT_UNDER); return(1); } - - -/* ARGSUSED */ -static int -termp_eos_pre(DECL_ARGS) -{ - const char ascii_eos[2] = { ASCII_EOS, 0 }; - - term_word(p, ascii_eos); - p->flags |= TERMP_NOSPACE; - return(1); -} diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c index 4426ffb3839..55416be2caf 100644 --- a/usr.bin/mandoc/mdoc_validate.c +++ b/usr.bin/mandoc/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.51 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.52 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -266,7 +266,6 @@ const struct valids mdoc_valids[MDOC_MAX] = { { NULL, posts_notext }, /* br */ { NULL, posts_sp }, /* sp */ { NULL, posts_text1 }, /* %U */ - { NULL, NULL }, /* eos */ }; @@ -888,8 +887,7 @@ post_vt(POST_ARGS) return(1); for (n = mdoc->last->child; n; n = n->next) - if (MDOC_TEXT != n->type && - (MDOC_ELEM != n->type || MDOC_eos != n->tok)) + if (MDOC_TEXT != n->type) if ( ! mdoc_nwarn(mdoc, n, EBADCHILD)) return(0); diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index 45191b5e3da..55b7593942b 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.30 2010/04/23 00:23:47 schwarze Exp $ */ +/* $Id: term.c,v 1.31 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -163,8 +163,7 @@ term_flushln(struct termp *p) * beginning of a line, one between words -- but do not * actually write them yet. */ - vbl = (size_t)(ASCII_EOS == p->buf[i] ? 2 : - (0 == vis ? 0 : 1)); + vbl = (size_t)(0 == vis ? 0 : 1); vis += vbl; vend = vis; @@ -195,7 +194,7 @@ term_flushln(struct termp *p) break; else if (8 == p->buf[j]) vend--; - else if (ASCII_EOS != p->buf[j]) { + else { if (vend > vis && vend < bp && '-' == p->buf[j]) jhy = j; @@ -204,13 +203,6 @@ term_flushln(struct termp *p) } /* - * Skip empty words. This happens due to the ASCII_EOS - * after the end of the final sentence of a paragraph. - */ - if (vend == vis && j == (int)p->col) - break; - - /* * Usually, indent the first line of each paragraph. */ if (0 == i && ! (p->flags & TERMP_NOLPAD)) { @@ -268,7 +260,7 @@ term_flushln(struct termp *p) } if (ASCII_NBRSP == p->buf[i]) putchar(' '); - else if (ASCII_EOS != p->buf[i]) + else putchar(p->buf[i]); } p->viscol += vend - vis; @@ -487,12 +479,17 @@ term_word(struct termp *p, const char *word) break; } - if ( ! (TERMP_NOSPACE & p->flags)) + if ( ! (TERMP_NOSPACE & p->flags)) { bufferc(p, ' '); + if (TERMP_SENTENCE & p->flags) + bufferc(p, ' '); + } if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; + p->flags &= ~TERMP_SENTENCE; + /* FIXME: use strcspn. */ while (*word) { @@ -533,6 +530,10 @@ term_word(struct termp *p, const char *word) p->flags |= TERMP_NOSPACE; } + /* + * Note that we don't process the pipe: the parser sees it as + * punctuation, but we don't in terms of typography. + */ if (sv[0] && 0 == sv[1]) switch (sv[0]) { case('('): diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h index f9eff2c0b6b..e497fba1512 100644 --- a/usr.bin/mandoc/term.h +++ b/usr.bin/mandoc/term.h @@ -1,4 +1,4 @@ -/* $Id: term.h,v 1.17 2010/05/14 14:47:44 schwarze Exp $ */ +/* $Id: term.h,v 1.18 2010/05/14 19:52:43 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -41,6 +41,7 @@ struct termp { size_t viscol; /* Chars on current line. */ int overstep; /* See termp_flushln(). */ int flags; +#define TERMP_SENTENCE (1 << 1) /* Space before a sentence. */ #define TERMP_NOSPACE (1 << 2) /* No space before words. */ #define TERMP_NOLPAD (1 << 3) /* See term_flushln(). */ #define TERMP_NOBREAK (1 << 4) /* See term_flushln(). */ |