diff options
author | 2010-05-20 00:58:02 +0000 | |
---|---|---|
committer | 2010-05-20 00:58:02 +0000 | |
commit | 62d9ccdb5cd4508f39f09940ef04620cc2fa0244 (patch) | |
tree | 6fc1c7ef02fdf31da880a34ea3faf200754cf38f | |
parent | New scsi code seems to be stable. Pluck previously identified (diff) | |
download | wireguard-openbsd-62d9ccdb5cd4508f39f09940ef04620cc2fa0244.tar.xz wireguard-openbsd-62d9ccdb5cd4508f39f09940ef04620cc2fa0244.zip |
Support nested roff instructions:
* allow roff_parseln() to be re-run
* allow roff_parseln() to manipulate the line buffer offset
* support the offset in the man and mdoc libraries
* adapt .if, .ie, .el, .ig, .am* and .de* support
* interpret some instructions even in conditional-negative context
Coded by kristaps during the last day of the mandoc hackathon.
To avoid regressions in the OpenBSD tree, commit this together
with some small local additions:
* detect roff block end "\}" even on macro lines
* actually implement the ".if n" conditional
* ignore .ds, .rm and .tr in libroff
Also back my old .if/.ie/.el-handling out of libman, reverting:
man.h 1.15 man.c 1.25 man_macro.c 1.15 man_validate.c 1.19
man_action.c 1.15 man_term.c 1.28 man_html.c 1.9.
-rw-r--r-- | usr.bin/mandoc/main.c | 43 | ||||
-rw-r--r-- | usr.bin/mandoc/man.c | 72 | ||||
-rw-r--r-- | usr.bin/mandoc/man.h | 8 | ||||
-rw-r--r-- | usr.bin/mandoc/man_action.c | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/man_html.c | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/man_macro.c | 94 | ||||
-rw-r--r-- | usr.bin/mandoc/man_term.c | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/man_validate.c | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/mandoc.h | 5 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.c | 57 | ||||
-rw-r--r-- | usr.bin/mandoc/mdoc.h | 4 | ||||
-rw-r--r-- | usr.bin/mandoc/roff.7 | 265 | ||||
-rw-r--r-- | usr.bin/mandoc/roff.c | 551 | ||||
-rw-r--r-- | usr.bin/mandoc/roff.h | 12 |
14 files changed, 833 insertions, 298 deletions
diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c index 63bd938dfdc..96ba8838d7d 100644 --- a/usr.bin/mandoc/main.c +++ b/usr.bin/mandoc/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.31 2010/05/16 01:16:25 schwarze Exp $ */ +/* $Id: main.c,v 1.32 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -375,7 +375,7 @@ static void fdesc(struct curparse *curp) { struct buf ln, blk; - int i, pos, lnn, lnn_start, with_mmap; + int i, pos, lnn, lnn_start, with_mmap, of; enum rofferr re; struct man *man; struct mdoc *mdoc; @@ -456,22 +456,42 @@ fdesc(struct curparse *curp) goto bailout; ln.buf[pos] = '\0'; - re = roff_parseln(roff, lnn_start, &ln.buf, &ln.sz); + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; + do { + re = roff_parseln(roff, lnn_start, + &ln.buf, &ln.sz, of, &of); + } while (ROFF_RERUN == re); + if (ROFF_IGN == re) continue; else if (ROFF_ERR == re) goto bailout; - /* If unset, assign parser in pset(). */ + /* + * If input parsers have not been allocated, do so now. + * We keep these instanced betwen parsers, but set them + * locally per parse routine since we can use different + * parsers with each one. + */ - if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc)) - goto bailout; + if ( ! (man || mdoc)) + if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc)) + goto bailout; - /* Pass down into parsers. */ + /* Lastly, push down into the parsers themselves. */ - if (man && ! man_parseln(man, lnn_start, ln.buf)) + if (man && ! man_parseln(man, lnn_start, ln.buf, of)) goto bailout; - if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf)) + if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) goto bailout; } @@ -482,6 +502,8 @@ fdesc(struct curparse *curp) goto bailout; } + /* Clean up the parse routine ASTs. */ + if (mdoc && ! mdoc_endparse(mdoc)) goto bailout; if (man && ! man_endparse(man)) @@ -765,7 +787,8 @@ mwarn(void *arg, int line, int col, const char *msg) static const char * const mandocerrs[MANDOCERR_MAX] = { "ok", "multi-line scope open on exit", - "request for scope closure when no matching scope is open", + "request for scope closure when no matching scope is open: ignored", + "macro requires line argument(s): ignored", "line arguments will be lost", "memory exhausted" }; diff --git a/usr.bin/mandoc/man.c b/usr.bin/mandoc/man.c index 6e8e720e8f5..377fb69e1d8 100644 --- a/usr.bin/mandoc/man.c +++ b/usr.bin/mandoc/man.c @@ -1,4 +1,4 @@ -/* $Id: man.c,v 1.30 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man.c,v 1.31 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -59,7 +59,6 @@ const char *const __man_macronames[MAN_MAX] = { "nf", "fi", "r", "RE", "RS", "DT", "UC", "PD", "Sp", "Vb", "Ve", - "if", "ie", "el", }; const char * const *man_macronames = __man_macronames; @@ -71,11 +70,11 @@ static int man_node_append(struct man *, static void man_node_free(struct man_node *); static void man_node_unlink(struct man *, struct man_node *); -static int man_ptext(struct man *, int, char *); -static int man_pmacro(struct man *, int, char *); +static int man_ptext(struct man *, int, char *, int); +static int man_pmacro(struct man *, int, char *, int); static void man_free1(struct man *); static void man_alloc1(struct man *); -static int macrowarn(struct man *, int, const char *); +static int macrowarn(struct man *, int, const char *, int); const struct man_node * @@ -145,29 +144,15 @@ man_endparse(struct man *m) int -man_parseln(struct man *m, int ln, char *buf) +man_parseln(struct man *m, int ln, char *buf, int offs) { - char *p; - size_t len; - int brace_close = 0; - - if ((len = strlen(buf)) > 1) { - p = buf + (len - 2); - if (p[0] == '\\' && p[1] == '}') { - brace_close = 1; - *p = '\0'; - } - } - if ('.' == *buf || '\'' == *buf) { - if ( ! man_pmacro(m, ln, buf)) - return(0); - } else { - if ( ! man_ptext(m, ln, buf)) - return(0); - } + if (MAN_HALT & m->flags) + return(0); - return(brace_close ? man_brace_close(m, ln, len-2) : 1); + return(('.' == buf[offs] || '\'' == buf[offs]) ? + man_pmacro(m, ln, buf, offs) : + man_ptext(m, ln, buf, offs)); } @@ -377,31 +362,33 @@ man_node_delete(struct man *m, struct man_node *p) static int -man_ptext(struct man *m, int line, char *buf) +man_ptext(struct man *m, int line, char *buf, int offs) { int i; /* Ignore bogus comments. */ - if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) - return(man_pwarn(m, line, 0, WBADCOMMENT)); + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(man_pwarn(m, line, offs, WBADCOMMENT)); /* Literal free-form text whitespace is preserved. */ if (MAN_LITERAL & m->flags) { - if ( ! man_word_alloc(m, line, 0, buf)) + if ( ! man_word_alloc(m, line, offs, buf + offs)) return(0); goto descope; } /* Pump blank lines directly into the backend. */ - for (i = 0; ' ' == buf[i]; i++) + for (i = offs; ' ' == buf[i]; i++) /* Skip leading whitespace. */ ; if ('\0' == buf[i]) { /* Allocate a blank entry. */ - if ( ! man_word_alloc(m, line, 0, "")) + if ( ! man_word_alloc(m, line, offs, "")) return(0); goto descope; } @@ -428,7 +415,7 @@ man_ptext(struct man *m, int line, char *buf) buf[i] = '\0'; } - if ( ! man_word_alloc(m, line, 0, buf)) + if ( ! man_word_alloc(m, line, offs, buf + offs)) return(0); /* @@ -438,7 +425,6 @@ man_ptext(struct man *m, int line, char *buf) */ assert(i); - if (mandoc_eos(buf, (size_t)i)) m->last->flags |= MAN_EOS; @@ -461,23 +447,23 @@ descope: if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); - return(man_body_alloc(m, line, 0, m->last->tok)); + return(man_body_alloc(m, line, offs, m->last->tok)); } static int -macrowarn(struct man *m, int ln, const char *buf) +macrowarn(struct man *m, int ln, const char *buf, int offs) { if ( ! (MAN_IGN_MACRO & m->pflags)) - return(man_verr(m, ln, 0, "unknown macro: %s%s", + return(man_verr(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); - return(man_vwarn(m, ln, 0, "unknown macro: %s%s", + return(man_vwarn(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); } int -man_pmacro(struct man *m, int ln, char *buf) +man_pmacro(struct man *m, int ln, char *buf, int offs) { int i, j, ppos; enum mant tok; @@ -486,10 +472,12 @@ man_pmacro(struct man *m, int ln, char *buf) /* Comments and empties are quickly ignored. */ - if ('\0' == buf[1]) + offs++; + + if ('\0' == buf[offs]) return(1); - i = 1; + i = offs; /* * Skip whitespace between the control character and initial @@ -534,7 +522,7 @@ man_pmacro(struct man *m, int ln, char *buf) } if (MAN_MAX == (tok = man_hash_find(mac))) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, ppos)) goto err; return(1); } @@ -640,7 +628,7 @@ out: if ( ! man_unscope(m, m->last->parent, WERRMAX)) return(0); - return(man_body_alloc(m, ln, 0, m->last->tok)); + return(man_body_alloc(m, ln, offs, m->last->tok)); err: /* Error out. */ diff --git a/usr.bin/mandoc/man.h b/usr.bin/mandoc/man.h index caf863f163e..3d9d629bbed 100644 --- a/usr.bin/mandoc/man.h +++ b/usr.bin/mandoc/man.h @@ -1,4 +1,4 @@ -/* $Id: man.h,v 1.19 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man.h,v 1.20 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -55,9 +55,6 @@ enum mant { MAN_Sp, MAN_Vb, MAN_Ve, - MAN_if, - MAN_ie, - MAN_el, MAN_MAX, }; @@ -91,7 +88,6 @@ struct man_node { #define MAN_VALID (1 << 0) #define MAN_ACTED (1 << 1) #define MAN_EOS (1 << 2) -#define MAN_USE (1 << 3) enum man_type type; char *string; struct man_node *head; @@ -115,7 +111,7 @@ struct man; void man_free(struct man *); struct man *man_alloc(void *, int, const struct man_cb *); void man_reset(struct man *); -int man_parseln(struct man *, int, char *buf); +int man_parseln(struct man *, int, char *, int); int man_endparse(struct man *); const struct man_node *man_node(const struct man *); diff --git a/usr.bin/mandoc/man_action.c b/usr.bin/mandoc/man_action.c index c4cb1a71026..bf30fe6378a 100644 --- a/usr.bin/mandoc/man_action.c +++ b/usr.bin/mandoc/man_action.c @@ -1,4 +1,4 @@ -/* $Id: man_action.c,v 1.17 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man_action.c,v 1.18 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -66,9 +66,6 @@ const struct actions man_actions[MAN_MAX] = { { NULL }, /* Sp */ { post_nf }, /* Vb */ { post_fi }, /* Ve */ - { NULL }, /* if */ - { NULL }, /* ie */ - { NULL }, /* el */ }; diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c index 74ee0935395..2ca3471f540 100644 --- a/usr.bin/mandoc/man_html.c +++ b/usr.bin/mandoc/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.11 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man_html.c,v 1.12 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -102,9 +102,6 @@ static const struct htmlman mans[MAN_MAX] = { { man_br_pre, NULL }, /* Sp */ { man_ign_pre, NULL }, /* Vb */ { NULL, NULL }, /* Ve */ - { NULL, NULL }, /* if */ - { NULL, NULL }, /* ie */ - { NULL, NULL }, /* el */ }; diff --git a/usr.bin/mandoc/man_macro.c b/usr.bin/mandoc/man_macro.c index bcf51c42910..d1c8e7d8197 100644 --- a/usr.bin/mandoc/man_macro.c +++ b/usr.bin/mandoc/man_macro.c @@ -1,4 +1,4 @@ -/* $Id: man_macro.c,v 1.16 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man_macro.c,v 1.17 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -30,7 +30,6 @@ enum rew { static int blk_close(MACRO_PROT_ARGS); static int blk_exp(MACRO_PROT_ARGS); static int blk_imp(MACRO_PROT_ARGS); -static int blk_cond(MACRO_PROT_ARGS); static int in_line_eoln(MACRO_PROT_ARGS); static int rew_scope(enum man_type, @@ -78,9 +77,6 @@ const struct man_macro __man_macros[MAN_MAX] = { { in_line_eoln, MAN_NSCOPED }, /* Sp */ { in_line_eoln, 0 }, /* Vb */ { in_line_eoln, 0 }, /* Ve */ - { blk_cond, 0 }, /* if */ - { blk_cond, 0 }, /* ie */ - { blk_cond, 0 }, /* el */ }; const struct man_macro * const man_macros = __man_macros; @@ -252,50 +248,6 @@ rew_scope(enum man_type type, struct man *m, enum mant tok) /* - * Closure for brace blocks (if, ie, el). - */ -int -man_brace_close(struct man *m, int line, int ppos) -{ - struct man_node *nif; - - nif = m->last->parent; - while (nif && - MAN_if != nif->tok && - MAN_ie != nif->tok && - MAN_el != nif->tok) - nif = nif->parent; - - if (NULL == nif) - return(man_pwarn(m, line, ppos, WNOSCOPE)); - - if (MAN_ie != nif->tok || MAN_USE & nif->flags) - m->flags &= ~MAN_EL_USE; - else - m->flags |= MAN_EL_USE; - - if (MAN_USE & nif->flags) { - if (nif->prev) { - nif->prev->next = nif->child; - nif->child->prev = nif->prev; - nif->prev = NULL; - } else { - nif->parent->child = nif->child; - } - nif->parent->nchild += nif->nchild - 1; - while (nif->child) { - nif->child->parent = nif->parent; - nif->child = nif->child->next; - } - nif->nchild = 0; - nif->parent = NULL; - } - man_node_delete(m, nif); - return(1); -} - - -/* * Close out a generic explicit macro. */ /* ARGSUSED */ @@ -439,50 +391,6 @@ blk_imp(MACRO_PROT_ARGS) } -/* - * Parse a conditional roff instruction. - */ -int -blk_cond(MACRO_PROT_ARGS) -{ - char *p = buf + *pos; - int use; - - if (MAN_el == tok) - use = m->flags & MAN_EL_USE; - else { - use = 'n' == *p++; - /* XXX skip the rest of the condition for now */ - while (*p && !isblank(*p)) - p++; - } - m->flags &= ~MAN_EL_USE; - - /* advance to the code controlled by the condition */ - while (*p && isblank(*p)) - p++; - if ('\0' == *p) - return(1); - - /* single-line body */ - if (strncmp("\\{", p, 2)) { - if (use && ! man_parseln(m, line, p)) - return(0); - if (MAN_ie == tok && !use) - m->flags |= MAN_EL_USE; - return(1); - } - - /* multi-line body */ - if ( ! man_block_alloc(m, line, ppos, tok)) - return(0); - if (use) - m->last->flags |= MAN_USE; - p += 2; - return(*p ? man_parseln(m, line, p) : 1); -} - - int in_line_eoln(MACRO_PROT_ARGS) { diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c index 2e89ae9d795..8711851ea31 100644 --- a/usr.bin/mandoc/man_term.c +++ b/usr.bin/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.34 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man_term.c,v 1.35 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -138,9 +138,6 @@ static const struct termact termacts[MAN_MAX] = { { pre_sp, NULL, MAN_NOTEXT }, /* Sp */ { pre_nf, NULL, 0 }, /* Vb */ { pre_fi, NULL, 0 }, /* Ve */ - { NULL, NULL, 0 }, /* if */ - { NULL, NULL, 0 }, /* ie */ - { NULL, NULL, 0 }, /* el */ }; diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c index b879304ff85..f2bd0c9309a 100644 --- a/usr.bin/mandoc/man_validate.c +++ b/usr.bin/mandoc/man_validate.c @@ -1,4 +1,4 @@ -/* $Id: man_validate.c,v 1.22 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: man_validate.c,v 1.23 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -91,9 +91,6 @@ static const struct man_valid man_valids[MAN_MAX] = { { NULL, posts_le1 }, /* Sp */ { pres_bline, posts_le1 }, /* Vb */ { pres_bline, posts_eq0 }, /* Ve */ - { NULL, NULL }, /* if */ - { NULL, NULL }, /* ie */ - { NULL, NULL }, /* el */ }; diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 6fe33b0966b..d7e47950feb 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.1 2010/05/16 01:46:39 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -22,9 +22,10 @@ __BEGIN_DECLS enum mandocerr { MANDOCERR_OK, MANDOCERR_SCOPEEXIT, /* scope open on exit */ - MANDOCERR_NOSCOPE, /* request scope close w/none open */ #define MANDOCERR_WARNING MANDOCERR_SCOPEEXIT + MANDOCERR_NOSCOPE, /* request scope close w/none open */ + MANDOCERR_NOARGS, /* macro requires argument(s) */ MANDOCERR_ARGSLOST, /* line arguments will be lost */ #define MANDOCERR_ERROR MANDOCERR_ARGSLOST diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c index e5c8121963b..261edfc35a6 100644 --- a/usr.bin/mandoc/mdoc.c +++ b/usr.bin/mandoc/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.52 2010/05/16 20:46:15 schwarze Exp $ */ +/* $Id: mdoc.c,v 1.53 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -146,9 +146,10 @@ static struct mdoc_node *node_alloc(struct mdoc *, int, int, enum mdoct, enum mdoc_type); static int node_append(struct mdoc *, struct mdoc_node *); -static int mdoc_ptext(struct mdoc *, int, char *); -static int mdoc_pmacro(struct mdoc *, int, char *); -static int macrowarn(struct mdoc *, int, const char *); +static int mdoc_ptext(struct mdoc *, int, char *, int); +static int mdoc_pmacro(struct mdoc *, int, char *, int); +static int macrowarn(struct mdoc *, int, + const char *, int); const struct mdoc_node * @@ -280,16 +281,16 @@ mdoc_endparse(struct mdoc *m) * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). */ int -mdoc_parseln(struct mdoc *m, int ln, char *buf) +mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) { if (MDOC_HALT & m->flags) return(0); m->flags |= MDOC_NEWLINE; - return(('.' == *buf || '\'' == *buf) ? - mdoc_pmacro(m, ln, buf) : - mdoc_ptext(m, ln, buf)); + return(('.' == buf[offs] || '\'' == buf[offs]) ? + mdoc_pmacro(m, ln, buf, offs) : + mdoc_ptext(m, ln, buf, offs)); } @@ -626,26 +627,28 @@ mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) * control character. */ static int -mdoc_ptext(struct mdoc *m, int line, char *buf) +mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) { char *c, *ws, *end; /* Ignore bogus comments. */ - if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) - return(mdoc_pwarn(m, line, 0, EBADCOMMENT)); + if ('\\' == buf[offs] && + '.' == buf[offs + 1] && + '"' == buf[offs + 2]) + return(mdoc_pwarn(m, line, offs, EBADCOMMENT)); /* No text before an initial macro. */ if (SEC_NONE == m->lastnamed) - return(mdoc_perr(m, line, 0, ETEXTPROL)); + return(mdoc_perr(m, line, offs, ETEXTPROL)); /* * Search for the beginning of unescaped trailing whitespace (ws) * and for the first character not to be output (end). */ ws = NULL; - for (c = end = buf; *c; c++) { + for (c = end = buf + offs; *c; c++) { switch (*c) { case ' ': if (NULL == ws) @@ -683,7 +686,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) if ( ! mdoc_pwarn(m, line, (int)(ws-buf), ETAILWS)) return(0); - if ('\0' == *buf && ! (MDOC_LITERAL & m->flags)) { + if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { if ( ! mdoc_pwarn(m, line, (int)(c-buf), ENOBLANK)) return(0); @@ -692,14 +695,14 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) * blank lines aren't allowed, but enough manuals assume this * behaviour that we want to work around it. */ - if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL)) + if ( ! mdoc_elem_alloc(m, line, offs, MDOC_Pp, NULL)) return(0); m->next = MDOC_NEXT_SIBLING; return(1); } - if ( ! mdoc_word_alloc(m, line, 0, buf)) + if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) return(0); if (MDOC_LITERAL & m->flags) @@ -713,7 +716,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) assert(buf < end); - if (mandoc_eos(buf, (size_t)(end-buf))) + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) m->last->flags |= MDOC_EOS; return(1); @@ -721,12 +724,12 @@ mdoc_ptext(struct mdoc *m, int line, char *buf) static int -macrowarn(struct mdoc *m, int ln, const char *buf) +macrowarn(struct mdoc *m, int ln, const char *buf, int offs) { if ( ! (MDOC_IGN_MACRO & m->pflags)) - return(mdoc_verr(m, ln, 0, "unknown macro: %s%s", + return(mdoc_verr(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); - return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", + return(mdoc_vwarn(m, ln, offs, "unknown macro: %s%s", buf, strlen(buf) > 3 ? "..." : "")); } @@ -736,7 +739,7 @@ macrowarn(struct mdoc *m, int ln, const char *buf) * character. */ int -mdoc_pmacro(struct mdoc *m, int ln, char *buf) +mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) { enum mdoct tok; int i, j, sv; @@ -744,10 +747,12 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) /* Empty lines are ignored. */ - if ('\0' == buf[1]) + offs++; + + if ('\0' == buf[offs]) return(1); - i = 1; + i = offs; /* Accept whitespace after the initial control char. */ @@ -776,16 +781,16 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf) return(mdoc_perr(m, ln, i, EPRINT)); } - mac[j] = 0; + mac[j] = '\0'; if (j == 4 || j < 2) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, sv)) goto err; return(1); } if (MDOC_MAX == (tok = mdoc_hash_find(mac))) { - if ( ! macrowarn(m, ln, mac)) + if ( ! macrowarn(m, ln, mac, sv)) goto err; return(1); } diff --git a/usr.bin/mandoc/mdoc.h b/usr.bin/mandoc/mdoc.h index 31dc068a41f..730c8111296 100644 --- a/usr.bin/mandoc/mdoc.h +++ b/usr.bin/mandoc/mdoc.h @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.23 2010/05/15 18:25:51 schwarze Exp $ */ +/* $Id: mdoc.h,v 1.24 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> * @@ -297,7 +297,7 @@ struct mdoc; void mdoc_free(struct mdoc *); struct mdoc *mdoc_alloc(void *, int, const struct mdoc_cb *); void mdoc_reset(struct mdoc *); -int mdoc_parseln(struct mdoc *, int, char *buf); +int mdoc_parseln(struct mdoc *, int, char *, int); const struct mdoc_node *mdoc_node(const struct mdoc *); const struct mdoc_meta *mdoc_meta(const struct mdoc *); int mdoc_endparse(struct mdoc *); diff --git a/usr.bin/mandoc/roff.7 b/usr.bin/mandoc/roff.7 new file mode 100644 index 00000000000..6f6ef450876 --- /dev/null +++ b/usr.bin/mandoc/roff.7 @@ -0,0 +1,265 @@ +.\" $Id: roff.7,v 1.1 2010/05/20 00:58:02 schwarze Exp $ +.\" +.\" Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 20 2010 $ +.Dt ROFF 7 +.Os +.Sh NAME +.Nm roff +.Nd roff language reference +.Sh DESCRIPTION +The +.Nm roff +language is a general-purpose text-formatting language. The purpose of +this document is to consistently describe those language constructs +accepted by the +.Xr mandoc 1 +utility. It is a work in progress. +.Pp +An +.Nm +document follows simple rules: lines beginning with the control +characters +.Sq \. +or +.Sq \(aq +are parsed for macros. Other lines are interpreted within the scope of +prior macros: +.Bd -literal -offset indent +\&.xx Macro lines change control state. +Other lines are interpreted within the current state. +.Ed +.Sh LANGUAGE SYNTAX +.Nm +documents may contain only graphable 7-bit ASCII characters, the space +character, and, in certain circumstances, the tab character. All +manuals must have +.Ux +line terminators. +.Sh MACRO SYNTAX +Macros are arbitrary in length and begin with a control character , +.Sq \. +or +.Sq \(aq , +at the beginning of the line. +An arbitrary amount of whitespace may sit between the control character +and the macro name. +Thus, the following are equivalent: +.Bd -literal -offset indent +\&.if +\&.\ \ \ \&if +.Ed +.Sh REFERENCE +This section is a canonical reference of all macros, arranged +alphabetically. +.Ss \&am +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&ami +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&am1 +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&de +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&dei +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&de1 +The syntax of this macro is the same as that of +.Sx \&ig , +except that a leading argument must be specified. +It is ignored, as are its children. +.Ss \&el +The +.Qq else +half of an if/else conditional. +Pops a result off the stack of conditional evaluations pushed by +.Sx \&ie +and uses it as its conditional. +If no stack entries are present (e.g., due to no prior +.Sx \&ie +calls) +then false is assumed. +The syntax of this macro is similar to +.Sx \&if +except that the conditional is missing. +.Ss \&ie +The +.Qq if +half of an if/else conditional. +The result of the conditional is pushed into a stack used by subsequent +invocations of +.Sx \&el , +which may be separated by any intervening input (or not exist at all). +Its syntax is equivalent to +.Sx \&if . +.Ss \&if +Begins a conditional that always evaluates to false. +If a conditional is false, its children are not processed, but are +syntactically interpreted to preserve the integrity of the input +document. +Thus, +.Pp +.D1 \&.if t \e .ig +.Pp +will discard the +.Sq \&.ig , +which may lead to interesting results, but +.Pp +.D1 \&.if t \e .if t \e{\e +.Pp +will continue to syntactically interpret to the block close of the final +conditional. +Sub-conditionals, in this case, obviously inherit the truth value of +the parent. +This macro has the following syntax: +.Pp +.Bd -literal -offset indent -compact +\&.if COND \e{\e +BODY... +\&.\e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e{ BODY +BODY... \e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e{ BODY +BODY... +\&.\e} +.Ed +.Bd -literal -offset indent -compact +\&.if COND \e +BODY +.Ed +.Pp +COND is a conditional (for the time being, this always evaluates to +false). +.Pp +If the BODY section is begun by an escaped brace +.Sq \e{ , +scope continues until a closing-brace macro +.Sq \.\e} . +If the BODY is not enclosed in braces, scope continues until the next +macro or word. +If the COND is followed by a BODY on the same line, whether after a +brace or not, then macros +.Em must +begin with a control character. +It is generally more intuitive, in this case, to write +.Bd -literal -offset indent +\&.if COND \e{\e +\&.foo +bar +\&.\e} +.Ed +.Pp +than having the macro follow as +.Pp +.D1 \&.if COND \e{ .foo +.Pp +The scope of a conditional is always parsed, but only executed if the +conditional evaluates to true. +.Pp +Note that text subsequent a +.Sq \&.\e} +macro is discarded. +Furthermore, if an explicit closing sequence +.Sq \e} +is specified in a free-form line, the entire line is accepted within the +scope of the prior macro, not only the text preceding the close. +.Ss \&ig +Ignore input. +Accepts the following syntax: +.Pp +.Bd -literal -offset indent -compact +\&.ig +BODY... +\&.. +.Ed +.Bd -literal -offset indent -compact +\&.ig END +BODY... +\&.END +.Ed +.Pp +In the first case, input is ignored until a +.Sq \&.. +macro is encountered on its own line. +In the second case, input is ignored until a +.Sq \&.END +is encountered. +Text subsequent the +.Sq \&.END +or +.Sq \&.. +is discarded. +.Pp +Do not use the escape +.Sq \e +anywhere in the definition of END. +It causes very strange behaviour. +Furthermore, if you redefine a +.Nm +macro, such as +.Pp +.D1 \&.ig if +.Pp +the subsequent invocation of +.Sx \&if +will first signify the end of comment, then be invoked as a macro. +This behaviour really shouldn't be counted upon. +.Sh COMPATIBILITY +This section documents compatibility between mandoc and other other +troff implementations, at this time limited to GNU troff +.Pq Qq groff . +The term +.Qq historic groff +refers to groff versions before the +.Pa doc.tmac +file re-write +.Pq somewhere between 1.15 and 1.19 . +.Pp +.Bl -dash -compact +.It +Historic groff did not accept white-space buffering the custom END tag +for the +.Sx \&ig +macro. +.It +The +.Sx \&if +and family would print funny white-spaces with historic groff when +depending on next-line syntax. +.El +.Sh AUTHORS +The +.Nm +reference was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c index d3919193a77..e81112eef7d 100644 --- a/usr.bin/mandoc/roff.c +++ b/usr.bin/mandoc/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.1 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: roff.c,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -21,32 +21,56 @@ #include <assert.h> #include <stdlib.h> #include <string.h> +#include <stdio.h> #include "mandoc.h" #include "roff.h" +#define RSTACK_MAX 128 + +#define ROFF_CTL(c) \ + ('.' == (c) || '\'' == (c)) + enum rofft { - ROFF_de, - ROFF_dei, ROFF_am, ROFF_ami, + ROFF_am1, + ROFF_de, + ROFF_dei, + ROFF_de1, + ROFF_ds, + ROFF_el, + ROFF_ie, + ROFF_if, ROFF_ig, - ROFF_close, + ROFF_rm, + ROFF_tr, + ROFF_cblock, + ROFF_ccond, ROFF_MAX }; +enum roffrule { + ROFFRULE_ALLOW, + ROFFRULE_DENY +}; + struct roff { struct roffnode *last; /* leaf of stack */ mandocmsg msg; /* err/warn/fatal messages */ void *data; /* privdata for messages */ + enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ + int rstackpos; /* position in rstack */ }; struct roffnode { enum rofft tok; /* type of node */ struct roffnode *parent; /* up one in stack */ - char *end; /* custom end-token */ int line; /* parse line */ int col; /* parse col */ + char *end; /* end-rules: custom token */ + int endspan; /* end-rules: next-line or infty */ + enum roffrule rule; /* current evaluation rule */ }; #define ROFF_ARGS struct roff *r, /* parse ctx */ \ @@ -54,31 +78,52 @@ struct roffnode { char **bufp, /* input buffer */ \ size_t *szp, /* size of input buffer */ \ int ln, /* parse line */ \ - int ppos /* current pos in buffer */ + int ppos, /* original pos in buffer */ \ + int pos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ typedef enum rofferr (*roffproc)(ROFF_ARGS); struct roffmac { const char *name; /* macro name */ - roffproc sub; /* child of control black */ - roffproc new; /* root of stack (type = ROFF_MAX) */ + roffproc proc; /* process new macro */ + roffproc text; /* process as child text of macro */ + roffproc sub; /* process as child of macro */ + int flags; +#define ROFFMAC_STRUCT (1 << 0) /* always interpret */ }; -static enum rofferr roff_new_close(ROFF_ARGS); -static enum rofferr roff_new_ig(ROFF_ARGS); -static enum rofferr roff_sub_ig(ROFF_ARGS); +static enum rofferr roff_block(ROFF_ARGS); +static enum rofferr roff_block_text(ROFF_ARGS); +static enum rofferr roff_block_sub(ROFF_ARGS); +static enum rofferr roff_cblock(ROFF_ARGS); +static enum rofferr roff_ccond(ROFF_ARGS); +static enum rofferr roff_cond(ROFF_ARGS); +static enum rofferr roff_cond_text(ROFF_ARGS); +static enum rofferr roff_cond_sub(ROFF_ARGS); +static enum rofferr roff_line(ROFF_ARGS); const struct roffmac roffs[ROFF_MAX] = { - { "de", roff_sub_ig, roff_new_ig }, - { "dei", roff_sub_ig, roff_new_ig }, - { "am", roff_sub_ig, roff_new_ig }, - { "ami", roff_sub_ig, roff_new_ig }, - { "ig", roff_sub_ig, roff_new_ig }, - { ".", NULL, roff_new_close }, + { "am", roff_block, roff_block_text, roff_block_sub, 0 }, + { "ami", roff_block, roff_block_text, roff_block_sub, 0 }, + { "am1", roff_block, roff_block_text, roff_block_sub, 0 }, + { "de", roff_block, roff_block_text, roff_block_sub, 0 }, + { "dei", roff_block, roff_block_text, roff_block_sub, 0 }, + { "de1", roff_block, roff_block_text, roff_block_sub, 0 }, + { "ds", roff_line, NULL, NULL, 0 }, + { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, + { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, + { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, + { "ig", roff_block, roff_block_text, roff_block_sub, 0 }, + { "rm", roff_line, NULL, NULL, 0 }, + { "tr", roff_line, NULL, NULL, 0 }, + { ".", roff_cblock, NULL, NULL, 0 }, + { "\\}", roff_ccond, NULL, NULL, 0 }, }; static void roff_free1(struct roff *); static enum rofft roff_hash_find(const char *); +static void roffnode_cleanscope(struct roff *); static int roffnode_push(struct roff *, enum rofft, int, int); static void roffnode_pop(struct roff *); @@ -113,9 +158,16 @@ roffnode_pop(struct roff *r) { struct roffnode *p; - if (NULL == (p = r->last)) - return; - r->last = p->parent; + assert(r->last); + p = r->last; + + if (ROFF_el == p->tok) + if (r->rstackpos > -1) + r->rstackpos--; + + r->last = r->last->parent; + if (p->end) + free(p->end); free(p); } @@ -138,6 +190,7 @@ roffnode_push(struct roff *r, enum rofft tok, int line, int col) p->parent = r->last; p->line = line; p->col = col; + p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; r->last = p; return(1); @@ -182,35 +235,68 @@ roff_alloc(const mandocmsg msg, void *data) r->msg = msg; r->data = data; + r->rstackpos = -1; return(r); } enum rofferr -roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp) +roff_parseln(struct roff *r, int ln, + char **bufp, size_t *szp, int pos, int *offs) { enum rofft t; int ppos; - if (NULL != r->last) { - /* - * If there's a node on the stack, then jump directly - * into its processing function. - */ + /* + * First, if a scope is open and we're not a macro, pass the + * text through the macro's filter. If a scope isn't open and + * we're not a macro, just let it through. + */ + + if (r->last && ! ROFF_CTL((*bufp)[pos])) { t = r->last->tok; - assert(roffs[t].sub); - return((*roffs[t].sub)(r, t, bufp, szp, ln, 0)); - } else if ('.' != (*bufp)[0] && NULL == r->last) - /* Return when in free text without a context. */ + assert(roffs[t].text); + return((*roffs[t].text) + (r, t, bufp, szp, ln, pos, pos, offs)); + } else if ( ! ROFF_CTL((*bufp)[pos])) return(ROFF_CONT); - /* There's nothing on the stack: make us anew. */ + /* + * If a scope is open, go to the child handler for that macro, + * as it may want to preprocess before doing anything with it. + */ + + if (r->last) { + t = r->last->tok; + assert(roffs[t].sub); + return((*roffs[t].sub) + (r, t, bufp, szp, ln, pos, pos, offs)); + } + + /* + * Lastly, as we've no scope open, try to look up and execute + * the new macro. If no macro is found, simply return and let + * the compilers handle it. + */ - if (ROFF_MAX == (t = roff_parse(*bufp, &ppos))) + ppos = pos; + if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) return(ROFF_CONT); - assert(roffs[t].new); - return((*roffs[t].new)(r, t, bufp, szp, ln, ppos)); + assert(roffs[t].proc); + return((*roffs[t].proc) + (r, t, bufp, szp, ln, ppos, pos, offs)); +} + + +int +roff_endparse(struct roff *r) +{ + + if (NULL == r->last) + return(1); + return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line, + r->last->col, NULL)); } @@ -225,8 +311,8 @@ roff_parse(const char *buf, int *pos) char mac[5]; enum rofft t; - assert('.' == buf[0]); - *pos = 1; + assert(ROFF_CTL(buf[*pos])); + (*pos)++; while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos])) (*pos)++; @@ -237,7 +323,7 @@ roff_parse(const char *buf, int *pos) for (j = 0; j < 4; j++, (*pos)++) if ('\0' == (mac[j] = buf[*pos])) break; - else if (' ' == buf[*pos]) + else if (' ' == buf[*pos] || (j && '\\' == buf[*pos])) break; if (j == 4 || j < 1) @@ -257,54 +343,158 @@ roff_parse(const char *buf, int *pos) /* ARGSUSED */ static enum rofferr -roff_sub_ig(ROFF_ARGS) +roff_cblock(ROFF_ARGS) { - int i, j; - /* Ignore free-text lines. */ + /* + * A block-close `..' should only be invoked as a child of an + * ignore macro, otherwise raise a warning and just ignore it. + */ - if ('.' != (*bufp)[ppos]) + if (NULL == r->last) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); return(ROFF_IGN); + } - if (r->last->end) { - i = ppos + 1; + switch (r->last->tok) { + case (ROFF_am): + /* FALLTHROUGH */ + case (ROFF_ami): + /* FALLTHROUGH */ + case (ROFF_am1): + /* FALLTHROUGH */ + case (ROFF_de): + /* FALLTHROUGH */ + case (ROFF_dei): + /* FALLTHROUGH */ + case (ROFF_de1): + /* FALLTHROUGH */ + case (ROFF_ig): + break; + default: + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } - while ((*bufp)[i] && ' ' == (*bufp)[i]) - i++; + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); - for (j = 0; r->last->end[j]; i++, j++) - if ((*bufp)[i] != r->last->end[j]) - return(ROFF_IGN); + roffnode_pop(r); + roffnode_cleanscope(r); + return(ROFF_IGN); - if (r->last->end[j]) - return(ROFF_IGN); - if ((*bufp)[i] && ' ' != (*bufp)[i]) - return(ROFF_IGN); +} + + +static void +roffnode_cleanscope(struct roff *r) +{ + + while (r->last) { + if (--r->last->endspan < 0) + break; + roffnode_pop(r); + } +} - while (' ' == (*bufp)[i]) - i++; - } else if (ROFF_close != roff_parse(*bufp, &i)) +/* ARGSUSED */ +static enum rofferr +roff_ccond(ROFF_ARGS) +{ + + if (NULL == r->last) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); return(ROFF_IGN); + } - roffnode_pop(r); + switch (r->last->tok) { + case (ROFF_el): + /* FALLTHROUGH */ + case (ROFF_ie): + /* FALLTHROUGH */ + case (ROFF_if): + break; + default: + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } - if ('\0' == (*bufp)[i]) + if (r->last->endspan > -1) { + if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + return(ROFF_ERR); return(ROFF_IGN); - if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL)) - return(ROFF_ERR); + } + + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); + roffnode_pop(r); + roffnode_cleanscope(r); return(ROFF_IGN); } /* ARGSUSED */ static enum rofferr -roff_new_close(ROFF_ARGS) +roff_block(ROFF_ARGS) { + int sv; + size_t sz; + + if (ROFF_ig != tok && '\0' == (*bufp)[pos]) { + if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } else if (ROFF_ig != tok) { + while ((*bufp)[pos] && ' ' != (*bufp)[pos]) + pos++; + while (' ' == (*bufp)[pos]) + pos++; + } + + if ( ! roffnode_push(r, tok, ln, ppos)) + return(ROFF_ERR); + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + sv = pos; + while ((*bufp)[pos] && ' ' != (*bufp)[pos] && + '\t' != (*bufp)[pos]) + pos++; + + /* + * Note: groff does NOT like escape characters in the input. + * Instead of detecting this, we're just going to let it fly and + * to hell with it. + */ + + assert(pos > sv); + sz = (size_t)(pos - sv); - if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) + if (1 == sz && '.' == (*bufp)[sv]) + return(ROFF_IGN); + + r->last->end = malloc(sz + 1); + + if (NULL == r->last->end) { + (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL); return(ROFF_ERR); + } + + memcpy(r->last->end, *bufp + sv, sz); + r->last->end[(int)sz] = '\0'; + + if ((*bufp)[pos]) + if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL)) + return(ROFF_ERR); return(ROFF_IGN); } @@ -312,62 +502,231 @@ roff_new_close(ROFF_ARGS) /* ARGSUSED */ static enum rofferr -roff_new_ig(ROFF_ARGS) +roff_block_sub(ROFF_ARGS) { - int i; + enum rofft t; + int i, j; - if ( ! roffnode_push(r, tok, ln, ppos)) - return(ROFF_ERR); + /* + * First check whether a custom macro exists at this level. If + * it does, then check against it. This is some of groff's + * stranger behaviours. If we encountered a custom end-scope + * tag and that tag also happens to be a "real" macro, then we + * need to try interpreting it again as a real macro. If it's + * not, then return ignore. Else continue. + */ + + if (r->last->end) { + i = pos + 1; + while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) + i++; + + for (j = 0; r->last->end[j]; j++, i++) + if ((*bufp)[i] != r->last->end[j]) + break; - if (ROFF_ig != tok) { - while ((*bufp)[ppos] && ' ' != (*bufp)[ppos]) - ppos++; - while (' ' == (*bufp)[ppos]) - ppos++; + if ('\0' == r->last->end[j] && + ('\0' == (*bufp)[i] || + ' ' == (*bufp)[i] || + '\t' == (*bufp)[i])) { + roffnode_pop(r); + roffnode_cleanscope(r); + + if (ROFF_MAX != roff_parse(*bufp, &pos)) + return(ROFF_RERUN); + return(ROFF_IGN); + } } - i = (int)ppos; + /* + * If we have no custom end-query or lookup failed, then try + * pulling it out of the hashtable. + */ - while ((*bufp)[i] && ' ' != (*bufp)[i]) - i++; + ppos = pos; + t = roff_parse(*bufp, &pos); - if (i == (int)ppos) + /* If we're not a comment-end, then throw it away. */ + if (ROFF_cblock != t) return(ROFF_IGN); - if ((*bufp)[i]) - if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL)) - return(ROFF_ERR); + assert(roffs[t].proc); + return((*roffs[t].proc)(r, t, bufp, + szp, ln, ppos, pos, offs)); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block_text(ROFF_ARGS) +{ + + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond_sub(ROFF_ARGS) +{ + enum rofft t; + enum roffrule rr; + + ppos = pos; + rr = r->last->rule; + + roff_cond_text(r, tok, bufp, szp, ln, ppos, pos, offs); + + if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + + /* + * A denied conditional must evaluate its children if and only + * if they're either structurally required (such as loops and + * conditionals) or a closing macro. + */ + if (ROFFRULE_DENY == rr) + if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) + if (ROFF_ccond != t) + return(ROFF_IGN); + + assert(roffs[t].proc); + return((*roffs[t].proc) + (r, t, bufp, szp, ln, ppos, pos, offs)); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond_text(ROFF_ARGS) +{ + char *ep, *st; + enum roffrule rr; + + rr = r->last->rule; /* - * If the macro has arguments, the first argument (up to the - * next whitespace) is interpreted as an argument marking the - * macro close. Thus, `.ig foo' will close at `.foo'. - * - * NOTE: the closing macro `.foo' in the above case is not - * allowed to have leading spaces with old groff! Thus `.foo' - * != `. foo'. Oh yeah, everything after the `.foo' is lost. - * Merry fucking Christmas. + * We display the value of the text if out current evaluation + * scope permits us to do so. */ - r->last->end = malloc((size_t)(i - ppos) + 1); - if (NULL == r->last->end) { + st = &(*bufp)[pos]; + if (NULL == (ep = strstr(st, "\\}"))) { + roffnode_cleanscope(r); + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + } + + if (ep > st && '\\' != *(ep - 1)) { + ep = '\0'; + roffnode_pop(r); + } + + roffnode_cleanscope(r); + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond(ROFF_ARGS) +{ + int cpos; /* position of the condition */ + int sv; + + /* Stack overflow! */ + + if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) { (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL); return(ROFF_ERR); } - memcpy(r->last->end, &(*bufp)[ppos], (size_t)(i - ppos)); - r->last->end[i - ppos] = '\0'; + cpos = pos; - return(ROFF_IGN); + if (ROFF_if == tok || ROFF_ie == tok) { + /* + * Read ahead past the conditional. FIXME: this does + * not work, as conditionals don't end on whitespace, + * but are parsed according to a formal grammar. It's + * good enough for now, however. + */ + while ((*bufp)[pos] && ' ' != (*bufp)[pos]) + pos++; + } + + sv = pos; + while (' ' == (*bufp)[pos]) + pos++; + + /* + * Roff is weird. If we have just white-space after the + * conditional, it's considered the BODY and we exit without + * really doing anything. Warn about this. It's probably + * wrong. + */ + if ('\0' == (*bufp)[pos] && sv != pos) { + if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL)) + return(ROFF_ERR); + return(ROFF_IGN); + } + + if ( ! roffnode_push(r, tok, ln, ppos)) + return(ROFF_ERR); + + /* XXX: Implement more conditionals. */ + + if (ROFF_if == tok || ROFF_ie == tok) + r->last->rule = 'n' == (*bufp)[cpos] ? + ROFFRULE_ALLOW : ROFFRULE_DENY; + else if (ROFF_el == tok) { + /* + * An `.el' will get the value of the current rstack + * entry set in prior `ie' calls or defaults to DENY. + */ + if (r->rstackpos < 0) + r->last->rule = ROFFRULE_DENY; + else + r->last->rule = r->rstack[r->rstackpos]; + } + if (ROFF_ie == tok) { + /* + * An if-else will put the NEGATION of the current + * evaluated conditional into the stack. + */ + r->rstackpos++; + if (ROFFRULE_DENY == r->last->rule) + r->rstack[r->rstackpos] = ROFFRULE_ALLOW; + else + r->rstack[r->rstackpos] = ROFFRULE_DENY; + } + if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) + r->last->rule = ROFFRULE_DENY; + + r->last->endspan = 1; + + if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { + r->last->endspan = -1; + pos += 2; + } + + /* + * If there are no arguments on the line, the next-line scope is + * assumed. + */ + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + /* Otherwise re-run the roff parser after recalculating. */ + + *offs = pos; + return(ROFF_RERUN); } -int -roff_endparse(struct roff *r) +/* ARGSUSED */ +static enum rofferr +roff_line(ROFF_ARGS) { - if (NULL == r->last) - return(1); - return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, - r->last->line, r->last->col, NULL)); + return(ROFF_IGN); } diff --git a/usr.bin/mandoc/roff.h b/usr.bin/mandoc/roff.h index b0235eb2ee7..a89c5c85722 100644 --- a/usr.bin/mandoc/roff.h +++ b/usr.bin/mandoc/roff.h @@ -1,4 +1,4 @@ -/* $Id: roff.h,v 1.1 2010/05/16 00:54:03 schwarze Exp $ */ +/* $Id: roff.h,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -18,9 +18,10 @@ #define ROFF_H enum rofferr { - ROFF_CONT, /* re-process line with libmdoc or libman */ - ROFF_IGN, /* ignore line */ - ROFF_ERR, /* badness */ + ROFF_CONT, /* continue processing line */ + ROFF_RERUN, /* re-run roff interpreter with offset */ + ROFF_IGN, /* ignore current line */ + ROFF_ERR /* badness: puke and stop */ }; __BEGIN_DECLS @@ -30,7 +31,8 @@ struct roff; void roff_free(struct roff *); struct roff *roff_alloc(mandocmsg, void *); void roff_reset(struct roff *); -enum rofferr roff_parseln(struct roff *, int, char **, size_t *); +enum rofferr roff_parseln(struct roff *, int, + char **, size_t *, int, int *); int roff_endparse(struct roff *); __END_DECLS |