summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorschwarze <schwarze@openbsd.org>2010-05-20 00:58:02 +0000
committerschwarze <schwarze@openbsd.org>2010-05-20 00:58:02 +0000
commit62d9ccdb5cd4508f39f09940ef04620cc2fa0244 (patch)
tree6fc1c7ef02fdf31da880a34ea3faf200754cf38f
parentNew scsi code seems to be stable. Pluck previously identified (diff)
downloadwireguard-openbsd-62d9ccdb5cd4508f39f09940ef04620cc2fa0244.tar.xz
wireguard-openbsd-62d9ccdb5cd4508f39f09940ef04620cc2fa0244.zip
Support nested roff instructions:
* allow roff_parseln() to be re-run * allow roff_parseln() to manipulate the line buffer offset * support the offset in the man and mdoc libraries * adapt .if, .ie, .el, .ig, .am* and .de* support * interpret some instructions even in conditional-negative context Coded by kristaps during the last day of the mandoc hackathon. To avoid regressions in the OpenBSD tree, commit this together with some small local additions: * detect roff block end "\}" even on macro lines * actually implement the ".if n" conditional * ignore .ds, .rm and .tr in libroff Also back my old .if/.ie/.el-handling out of libman, reverting: man.h 1.15 man.c 1.25 man_macro.c 1.15 man_validate.c 1.19 man_action.c 1.15 man_term.c 1.28 man_html.c 1.9.
-rw-r--r--usr.bin/mandoc/main.c43
-rw-r--r--usr.bin/mandoc/man.c72
-rw-r--r--usr.bin/mandoc/man.h8
-rw-r--r--usr.bin/mandoc/man_action.c5
-rw-r--r--usr.bin/mandoc/man_html.c5
-rw-r--r--usr.bin/mandoc/man_macro.c94
-rw-r--r--usr.bin/mandoc/man_term.c5
-rw-r--r--usr.bin/mandoc/man_validate.c5
-rw-r--r--usr.bin/mandoc/mandoc.h5
-rw-r--r--usr.bin/mandoc/mdoc.c57
-rw-r--r--usr.bin/mandoc/mdoc.h4
-rw-r--r--usr.bin/mandoc/roff.7265
-rw-r--r--usr.bin/mandoc/roff.c551
-rw-r--r--usr.bin/mandoc/roff.h12
14 files changed, 833 insertions, 298 deletions
diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c
index 63bd938dfdc..96ba8838d7d 100644
--- a/usr.bin/mandoc/main.c
+++ b/usr.bin/mandoc/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.31 2010/05/16 01:16:25 schwarze Exp $ */
+/* $Id: main.c,v 1.32 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -375,7 +375,7 @@ static void
fdesc(struct curparse *curp)
{
struct buf ln, blk;
- int i, pos, lnn, lnn_start, with_mmap;
+ int i, pos, lnn, lnn_start, with_mmap, of;
enum rofferr re;
struct man *man;
struct mdoc *mdoc;
@@ -456,22 +456,42 @@ fdesc(struct curparse *curp)
goto bailout;
ln.buf[pos] = '\0';
- re = roff_parseln(roff, lnn_start, &ln.buf, &ln.sz);
+ /*
+ * A significant amount of complexity is contained by
+ * the roff preprocessor. It's line-oriented but can be
+ * expressed on one line, so we need at times to
+ * readjust our starting point and re-run it. The roff
+ * preprocessor can also readjust the buffers with new
+ * data, so we pass them in wholesale.
+ */
+
+ of = 0;
+ do {
+ re = roff_parseln(roff, lnn_start,
+ &ln.buf, &ln.sz, of, &of);
+ } while (ROFF_RERUN == re);
+
if (ROFF_IGN == re)
continue;
else if (ROFF_ERR == re)
goto bailout;
- /* If unset, assign parser in pset(). */
+ /*
+ * If input parsers have not been allocated, do so now.
+ * We keep these instanced betwen parsers, but set them
+ * locally per parse routine since we can use different
+ * parsers with each one.
+ */
- if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc))
- goto bailout;
+ if ( ! (man || mdoc))
+ if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc))
+ goto bailout;
- /* Pass down into parsers. */
+ /* Lastly, push down into the parsers themselves. */
- if (man && ! man_parseln(man, lnn_start, ln.buf))
+ if (man && ! man_parseln(man, lnn_start, ln.buf, of))
goto bailout;
- if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf))
+ if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of))
goto bailout;
}
@@ -482,6 +502,8 @@ fdesc(struct curparse *curp)
goto bailout;
}
+ /* Clean up the parse routine ASTs. */
+
if (mdoc && ! mdoc_endparse(mdoc))
goto bailout;
if (man && ! man_endparse(man))
@@ -765,7 +787,8 @@ mwarn(void *arg, int line, int col, const char *msg)
static const char * const mandocerrs[MANDOCERR_MAX] = {
"ok",
"multi-line scope open on exit",
- "request for scope closure when no matching scope is open",
+ "request for scope closure when no matching scope is open: ignored",
+ "macro requires line argument(s): ignored",
"line arguments will be lost",
"memory exhausted"
};
diff --git a/usr.bin/mandoc/man.c b/usr.bin/mandoc/man.c
index 6e8e720e8f5..377fb69e1d8 100644
--- a/usr.bin/mandoc/man.c
+++ b/usr.bin/mandoc/man.c
@@ -1,4 +1,4 @@
-/* $Id: man.c,v 1.30 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man.c,v 1.31 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -59,7 +59,6 @@ const char *const __man_macronames[MAN_MAX] = {
"nf", "fi", "r", "RE",
"RS", "DT", "UC", "PD",
"Sp", "Vb", "Ve",
- "if", "ie", "el",
};
const char * const *man_macronames = __man_macronames;
@@ -71,11 +70,11 @@ static int man_node_append(struct man *,
static void man_node_free(struct man_node *);
static void man_node_unlink(struct man *,
struct man_node *);
-static int man_ptext(struct man *, int, char *);
-static int man_pmacro(struct man *, int, char *);
+static int man_ptext(struct man *, int, char *, int);
+static int man_pmacro(struct man *, int, char *, int);
static void man_free1(struct man *);
static void man_alloc1(struct man *);
-static int macrowarn(struct man *, int, const char *);
+static int macrowarn(struct man *, int, const char *, int);
const struct man_node *
@@ -145,29 +144,15 @@ man_endparse(struct man *m)
int
-man_parseln(struct man *m, int ln, char *buf)
+man_parseln(struct man *m, int ln, char *buf, int offs)
{
- char *p;
- size_t len;
- int brace_close = 0;
-
- if ((len = strlen(buf)) > 1) {
- p = buf + (len - 2);
- if (p[0] == '\\' && p[1] == '}') {
- brace_close = 1;
- *p = '\0';
- }
- }
- if ('.' == *buf || '\'' == *buf) {
- if ( ! man_pmacro(m, ln, buf))
- return(0);
- } else {
- if ( ! man_ptext(m, ln, buf))
- return(0);
- }
+ if (MAN_HALT & m->flags)
+ return(0);
- return(brace_close ? man_brace_close(m, ln, len-2) : 1);
+ return(('.' == buf[offs] || '\'' == buf[offs]) ?
+ man_pmacro(m, ln, buf, offs) :
+ man_ptext(m, ln, buf, offs));
}
@@ -377,31 +362,33 @@ man_node_delete(struct man *m, struct man_node *p)
static int
-man_ptext(struct man *m, int line, char *buf)
+man_ptext(struct man *m, int line, char *buf, int offs)
{
int i;
/* Ignore bogus comments. */
- if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2])
- return(man_pwarn(m, line, 0, WBADCOMMENT));
+ if ('\\' == buf[offs] &&
+ '.' == buf[offs + 1] &&
+ '"' == buf[offs + 2])
+ return(man_pwarn(m, line, offs, WBADCOMMENT));
/* Literal free-form text whitespace is preserved. */
if (MAN_LITERAL & m->flags) {
- if ( ! man_word_alloc(m, line, 0, buf))
+ if ( ! man_word_alloc(m, line, offs, buf + offs))
return(0);
goto descope;
}
/* Pump blank lines directly into the backend. */
- for (i = 0; ' ' == buf[i]; i++)
+ for (i = offs; ' ' == buf[i]; i++)
/* Skip leading whitespace. */ ;
if ('\0' == buf[i]) {
/* Allocate a blank entry. */
- if ( ! man_word_alloc(m, line, 0, ""))
+ if ( ! man_word_alloc(m, line, offs, ""))
return(0);
goto descope;
}
@@ -428,7 +415,7 @@ man_ptext(struct man *m, int line, char *buf)
buf[i] = '\0';
}
- if ( ! man_word_alloc(m, line, 0, buf))
+ if ( ! man_word_alloc(m, line, offs, buf + offs))
return(0);
/*
@@ -438,7 +425,6 @@ man_ptext(struct man *m, int line, char *buf)
*/
assert(i);
-
if (mandoc_eos(buf, (size_t)i))
m->last->flags |= MAN_EOS;
@@ -461,23 +447,23 @@ descope:
if ( ! man_unscope(m, m->last->parent, WERRMAX))
return(0);
- return(man_body_alloc(m, line, 0, m->last->tok));
+ return(man_body_alloc(m, line, offs, m->last->tok));
}
static int
-macrowarn(struct man *m, int ln, const char *buf)
+macrowarn(struct man *m, int ln, const char *buf, int offs)
{
if ( ! (MAN_IGN_MACRO & m->pflags))
- return(man_verr(m, ln, 0, "unknown macro: %s%s",
+ return(man_verr(m, ln, offs, "unknown macro: %s%s",
buf, strlen(buf) > 3 ? "..." : ""));
- return(man_vwarn(m, ln, 0, "unknown macro: %s%s",
+ return(man_vwarn(m, ln, offs, "unknown macro: %s%s",
buf, strlen(buf) > 3 ? "..." : ""));
}
int
-man_pmacro(struct man *m, int ln, char *buf)
+man_pmacro(struct man *m, int ln, char *buf, int offs)
{
int i, j, ppos;
enum mant tok;
@@ -486,10 +472,12 @@ man_pmacro(struct man *m, int ln, char *buf)
/* Comments and empties are quickly ignored. */
- if ('\0' == buf[1])
+ offs++;
+
+ if ('\0' == buf[offs])
return(1);
- i = 1;
+ i = offs;
/*
* Skip whitespace between the control character and initial
@@ -534,7 +522,7 @@ man_pmacro(struct man *m, int ln, char *buf)
}
if (MAN_MAX == (tok = man_hash_find(mac))) {
- if ( ! macrowarn(m, ln, mac))
+ if ( ! macrowarn(m, ln, mac, ppos))
goto err;
return(1);
}
@@ -640,7 +628,7 @@ out:
if ( ! man_unscope(m, m->last->parent, WERRMAX))
return(0);
- return(man_body_alloc(m, ln, 0, m->last->tok));
+ return(man_body_alloc(m, ln, offs, m->last->tok));
err: /* Error out. */
diff --git a/usr.bin/mandoc/man.h b/usr.bin/mandoc/man.h
index caf863f163e..3d9d629bbed 100644
--- a/usr.bin/mandoc/man.h
+++ b/usr.bin/mandoc/man.h
@@ -1,4 +1,4 @@
-/* $Id: man.h,v 1.19 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man.h,v 1.20 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -55,9 +55,6 @@ enum mant {
MAN_Sp,
MAN_Vb,
MAN_Ve,
- MAN_if,
- MAN_ie,
- MAN_el,
MAN_MAX,
};
@@ -91,7 +88,6 @@ struct man_node {
#define MAN_VALID (1 << 0)
#define MAN_ACTED (1 << 1)
#define MAN_EOS (1 << 2)
-#define MAN_USE (1 << 3)
enum man_type type;
char *string;
struct man_node *head;
@@ -115,7 +111,7 @@ struct man;
void man_free(struct man *);
struct man *man_alloc(void *, int, const struct man_cb *);
void man_reset(struct man *);
-int man_parseln(struct man *, int, char *buf);
+int man_parseln(struct man *, int, char *, int);
int man_endparse(struct man *);
const struct man_node *man_node(const struct man *);
diff --git a/usr.bin/mandoc/man_action.c b/usr.bin/mandoc/man_action.c
index c4cb1a71026..bf30fe6378a 100644
--- a/usr.bin/mandoc/man_action.c
+++ b/usr.bin/mandoc/man_action.c
@@ -1,4 +1,4 @@
-/* $Id: man_action.c,v 1.17 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man_action.c,v 1.18 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -66,9 +66,6 @@ const struct actions man_actions[MAN_MAX] = {
{ NULL }, /* Sp */
{ post_nf }, /* Vb */
{ post_fi }, /* Ve */
- { NULL }, /* if */
- { NULL }, /* ie */
- { NULL }, /* el */
};
diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c
index 74ee0935395..2ca3471f540 100644
--- a/usr.bin/mandoc/man_html.c
+++ b/usr.bin/mandoc/man_html.c
@@ -1,4 +1,4 @@
-/* $Id: man_html.c,v 1.11 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man_html.c,v 1.12 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -102,9 +102,6 @@ static const struct htmlman mans[MAN_MAX] = {
{ man_br_pre, NULL }, /* Sp */
{ man_ign_pre, NULL }, /* Vb */
{ NULL, NULL }, /* Ve */
- { NULL, NULL }, /* if */
- { NULL, NULL }, /* ie */
- { NULL, NULL }, /* el */
};
diff --git a/usr.bin/mandoc/man_macro.c b/usr.bin/mandoc/man_macro.c
index bcf51c42910..d1c8e7d8197 100644
--- a/usr.bin/mandoc/man_macro.c
+++ b/usr.bin/mandoc/man_macro.c
@@ -1,4 +1,4 @@
-/* $Id: man_macro.c,v 1.16 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man_macro.c,v 1.17 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -30,7 +30,6 @@ enum rew {
static int blk_close(MACRO_PROT_ARGS);
static int blk_exp(MACRO_PROT_ARGS);
static int blk_imp(MACRO_PROT_ARGS);
-static int blk_cond(MACRO_PROT_ARGS);
static int in_line_eoln(MACRO_PROT_ARGS);
static int rew_scope(enum man_type,
@@ -78,9 +77,6 @@ const struct man_macro __man_macros[MAN_MAX] = {
{ in_line_eoln, MAN_NSCOPED }, /* Sp */
{ in_line_eoln, 0 }, /* Vb */
{ in_line_eoln, 0 }, /* Ve */
- { blk_cond, 0 }, /* if */
- { blk_cond, 0 }, /* ie */
- { blk_cond, 0 }, /* el */
};
const struct man_macro * const man_macros = __man_macros;
@@ -252,50 +248,6 @@ rew_scope(enum man_type type, struct man *m, enum mant tok)
/*
- * Closure for brace blocks (if, ie, el).
- */
-int
-man_brace_close(struct man *m, int line, int ppos)
-{
- struct man_node *nif;
-
- nif = m->last->parent;
- while (nif &&
- MAN_if != nif->tok &&
- MAN_ie != nif->tok &&
- MAN_el != nif->tok)
- nif = nif->parent;
-
- if (NULL == nif)
- return(man_pwarn(m, line, ppos, WNOSCOPE));
-
- if (MAN_ie != nif->tok || MAN_USE & nif->flags)
- m->flags &= ~MAN_EL_USE;
- else
- m->flags |= MAN_EL_USE;
-
- if (MAN_USE & nif->flags) {
- if (nif->prev) {
- nif->prev->next = nif->child;
- nif->child->prev = nif->prev;
- nif->prev = NULL;
- } else {
- nif->parent->child = nif->child;
- }
- nif->parent->nchild += nif->nchild - 1;
- while (nif->child) {
- nif->child->parent = nif->parent;
- nif->child = nif->child->next;
- }
- nif->nchild = 0;
- nif->parent = NULL;
- }
- man_node_delete(m, nif);
- return(1);
-}
-
-
-/*
* Close out a generic explicit macro.
*/
/* ARGSUSED */
@@ -439,50 +391,6 @@ blk_imp(MACRO_PROT_ARGS)
}
-/*
- * Parse a conditional roff instruction.
- */
-int
-blk_cond(MACRO_PROT_ARGS)
-{
- char *p = buf + *pos;
- int use;
-
- if (MAN_el == tok)
- use = m->flags & MAN_EL_USE;
- else {
- use = 'n' == *p++;
- /* XXX skip the rest of the condition for now */
- while (*p && !isblank(*p))
- p++;
- }
- m->flags &= ~MAN_EL_USE;
-
- /* advance to the code controlled by the condition */
- while (*p && isblank(*p))
- p++;
- if ('\0' == *p)
- return(1);
-
- /* single-line body */
- if (strncmp("\\{", p, 2)) {
- if (use && ! man_parseln(m, line, p))
- return(0);
- if (MAN_ie == tok && !use)
- m->flags |= MAN_EL_USE;
- return(1);
- }
-
- /* multi-line body */
- if ( ! man_block_alloc(m, line, ppos, tok))
- return(0);
- if (use)
- m->last->flags |= MAN_USE;
- p += 2;
- return(*p ? man_parseln(m, line, p) : 1);
-}
-
-
int
in_line_eoln(MACRO_PROT_ARGS)
{
diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c
index 2e89ae9d795..8711851ea31 100644
--- a/usr.bin/mandoc/man_term.c
+++ b/usr.bin/mandoc/man_term.c
@@ -1,4 +1,4 @@
-/* $Id: man_term.c,v 1.34 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man_term.c,v 1.35 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -138,9 +138,6 @@ static const struct termact termacts[MAN_MAX] = {
{ pre_sp, NULL, MAN_NOTEXT }, /* Sp */
{ pre_nf, NULL, 0 }, /* Vb */
{ pre_fi, NULL, 0 }, /* Ve */
- { NULL, NULL, 0 }, /* if */
- { NULL, NULL, 0 }, /* ie */
- { NULL, NULL, 0 }, /* el */
};
diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c
index b879304ff85..f2bd0c9309a 100644
--- a/usr.bin/mandoc/man_validate.c
+++ b/usr.bin/mandoc/man_validate.c
@@ -1,4 +1,4 @@
-/* $Id: man_validate.c,v 1.22 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.23 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -91,9 +91,6 @@ static const struct man_valid man_valids[MAN_MAX] = {
{ NULL, posts_le1 }, /* Sp */
{ pres_bline, posts_le1 }, /* Vb */
{ pres_bline, posts_eq0 }, /* Ve */
- { NULL, NULL }, /* if */
- { NULL, NULL }, /* ie */
- { NULL, NULL }, /* el */
};
diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h
index 6fe33b0966b..d7e47950feb 100644
--- a/usr.bin/mandoc/mandoc.h
+++ b/usr.bin/mandoc/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.1 2010/05/16 01:46:39 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -22,9 +22,10 @@ __BEGIN_DECLS
enum mandocerr {
MANDOCERR_OK,
MANDOCERR_SCOPEEXIT, /* scope open on exit */
- MANDOCERR_NOSCOPE, /* request scope close w/none open */
#define MANDOCERR_WARNING MANDOCERR_SCOPEEXIT
+ MANDOCERR_NOSCOPE, /* request scope close w/none open */
+ MANDOCERR_NOARGS, /* macro requires argument(s) */
MANDOCERR_ARGSLOST, /* line arguments will be lost */
#define MANDOCERR_ERROR MANDOCERR_ARGSLOST
diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c
index e5c8121963b..261edfc35a6 100644
--- a/usr.bin/mandoc/mdoc.c
+++ b/usr.bin/mandoc/mdoc.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc.c,v 1.52 2010/05/16 20:46:15 schwarze Exp $ */
+/* $Id: mdoc.c,v 1.53 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -146,9 +146,10 @@ static struct mdoc_node *node_alloc(struct mdoc *, int, int,
enum mdoct, enum mdoc_type);
static int node_append(struct mdoc *,
struct mdoc_node *);
-static int mdoc_ptext(struct mdoc *, int, char *);
-static int mdoc_pmacro(struct mdoc *, int, char *);
-static int macrowarn(struct mdoc *, int, const char *);
+static int mdoc_ptext(struct mdoc *, int, char *, int);
+static int mdoc_pmacro(struct mdoc *, int, char *, int);
+static int macrowarn(struct mdoc *, int,
+ const char *, int);
const struct mdoc_node *
@@ -280,16 +281,16 @@ mdoc_endparse(struct mdoc *m)
* the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
*/
int
-mdoc_parseln(struct mdoc *m, int ln, char *buf)
+mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs)
{
if (MDOC_HALT & m->flags)
return(0);
m->flags |= MDOC_NEWLINE;
- return(('.' == *buf || '\'' == *buf) ?
- mdoc_pmacro(m, ln, buf) :
- mdoc_ptext(m, ln, buf));
+ return(('.' == buf[offs] || '\'' == buf[offs]) ?
+ mdoc_pmacro(m, ln, buf, offs) :
+ mdoc_ptext(m, ln, buf, offs));
}
@@ -626,26 +627,28 @@ mdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
* control character.
*/
static int
-mdoc_ptext(struct mdoc *m, int line, char *buf)
+mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
{
char *c, *ws, *end;
/* Ignore bogus comments. */
- if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2])
- return(mdoc_pwarn(m, line, 0, EBADCOMMENT));
+ if ('\\' == buf[offs] &&
+ '.' == buf[offs + 1] &&
+ '"' == buf[offs + 2])
+ return(mdoc_pwarn(m, line, offs, EBADCOMMENT));
/* No text before an initial macro. */
if (SEC_NONE == m->lastnamed)
- return(mdoc_perr(m, line, 0, ETEXTPROL));
+ return(mdoc_perr(m, line, offs, ETEXTPROL));
/*
* Search for the beginning of unescaped trailing whitespace (ws)
* and for the first character not to be output (end).
*/
ws = NULL;
- for (c = end = buf; *c; c++) {
+ for (c = end = buf + offs; *c; c++) {
switch (*c) {
case ' ':
if (NULL == ws)
@@ -683,7 +686,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf)
if ( ! mdoc_pwarn(m, line, (int)(ws-buf), ETAILWS))
return(0);
- if ('\0' == *buf && ! (MDOC_LITERAL & m->flags)) {
+ if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) {
if ( ! mdoc_pwarn(m, line, (int)(c-buf), ENOBLANK))
return(0);
@@ -692,14 +695,14 @@ mdoc_ptext(struct mdoc *m, int line, char *buf)
* blank lines aren't allowed, but enough manuals assume this
* behaviour that we want to work around it.
*/
- if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL))
+ if ( ! mdoc_elem_alloc(m, line, offs, MDOC_Pp, NULL))
return(0);
m->next = MDOC_NEXT_SIBLING;
return(1);
}
- if ( ! mdoc_word_alloc(m, line, 0, buf))
+ if ( ! mdoc_word_alloc(m, line, offs, buf+offs))
return(0);
if (MDOC_LITERAL & m->flags)
@@ -713,7 +716,7 @@ mdoc_ptext(struct mdoc *m, int line, char *buf)
assert(buf < end);
- if (mandoc_eos(buf, (size_t)(end-buf)))
+ if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
m->last->flags |= MDOC_EOS;
return(1);
@@ -721,12 +724,12 @@ mdoc_ptext(struct mdoc *m, int line, char *buf)
static int
-macrowarn(struct mdoc *m, int ln, const char *buf)
+macrowarn(struct mdoc *m, int ln, const char *buf, int offs)
{
if ( ! (MDOC_IGN_MACRO & m->pflags))
- return(mdoc_verr(m, ln, 0, "unknown macro: %s%s",
+ return(mdoc_verr(m, ln, offs, "unknown macro: %s%s",
buf, strlen(buf) > 3 ? "..." : ""));
- return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
+ return(mdoc_vwarn(m, ln, offs, "unknown macro: %s%s",
buf, strlen(buf) > 3 ? "..." : ""));
}
@@ -736,7 +739,7 @@ macrowarn(struct mdoc *m, int ln, const char *buf)
* character.
*/
int
-mdoc_pmacro(struct mdoc *m, int ln, char *buf)
+mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs)
{
enum mdoct tok;
int i, j, sv;
@@ -744,10 +747,12 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf)
/* Empty lines are ignored. */
- if ('\0' == buf[1])
+ offs++;
+
+ if ('\0' == buf[offs])
return(1);
- i = 1;
+ i = offs;
/* Accept whitespace after the initial control char. */
@@ -776,16 +781,16 @@ mdoc_pmacro(struct mdoc *m, int ln, char *buf)
return(mdoc_perr(m, ln, i, EPRINT));
}
- mac[j] = 0;
+ mac[j] = '\0';
if (j == 4 || j < 2) {
- if ( ! macrowarn(m, ln, mac))
+ if ( ! macrowarn(m, ln, mac, sv))
goto err;
return(1);
}
if (MDOC_MAX == (tok = mdoc_hash_find(mac))) {
- if ( ! macrowarn(m, ln, mac))
+ if ( ! macrowarn(m, ln, mac, sv))
goto err;
return(1);
}
diff --git a/usr.bin/mandoc/mdoc.h b/usr.bin/mandoc/mdoc.h
index 31dc068a41f..730c8111296 100644
--- a/usr.bin/mandoc/mdoc.h
+++ b/usr.bin/mandoc/mdoc.h
@@ -1,4 +1,4 @@
-/* $Id: mdoc.h,v 1.23 2010/05/15 18:25:51 schwarze Exp $ */
+/* $Id: mdoc.h,v 1.24 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
*
@@ -297,7 +297,7 @@ struct mdoc;
void mdoc_free(struct mdoc *);
struct mdoc *mdoc_alloc(void *, int, const struct mdoc_cb *);
void mdoc_reset(struct mdoc *);
-int mdoc_parseln(struct mdoc *, int, char *buf);
+int mdoc_parseln(struct mdoc *, int, char *, int);
const struct mdoc_node *mdoc_node(const struct mdoc *);
const struct mdoc_meta *mdoc_meta(const struct mdoc *);
int mdoc_endparse(struct mdoc *);
diff --git a/usr.bin/mandoc/roff.7 b/usr.bin/mandoc/roff.7
new file mode 100644
index 00000000000..6f6ef450876
--- /dev/null
+++ b/usr.bin/mandoc/roff.7
@@ -0,0 +1,265 @@
+.\" $Id: roff.7,v 1.1 2010/05/20 00:58:02 schwarze Exp $
+.\"
+.\" Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: May 20 2010 $
+.Dt ROFF 7
+.Os
+.Sh NAME
+.Nm roff
+.Nd roff language reference
+.Sh DESCRIPTION
+The
+.Nm roff
+language is a general-purpose text-formatting language. The purpose of
+this document is to consistently describe those language constructs
+accepted by the
+.Xr mandoc 1
+utility. It is a work in progress.
+.Pp
+An
+.Nm
+document follows simple rules: lines beginning with the control
+characters
+.Sq \.
+or
+.Sq \(aq
+are parsed for macros. Other lines are interpreted within the scope of
+prior macros:
+.Bd -literal -offset indent
+\&.xx Macro lines change control state.
+Other lines are interpreted within the current state.
+.Ed
+.Sh LANGUAGE SYNTAX
+.Nm
+documents may contain only graphable 7-bit ASCII characters, the space
+character, and, in certain circumstances, the tab character. All
+manuals must have
+.Ux
+line terminators.
+.Sh MACRO SYNTAX
+Macros are arbitrary in length and begin with a control character ,
+.Sq \.
+or
+.Sq \(aq ,
+at the beginning of the line.
+An arbitrary amount of whitespace may sit between the control character
+and the macro name.
+Thus, the following are equivalent:
+.Bd -literal -offset indent
+\&.if
+\&.\ \ \ \&if
+.Ed
+.Sh REFERENCE
+This section is a canonical reference of all macros, arranged
+alphabetically.
+.Ss \&am
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&ami
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&am1
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&de
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&dei
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&de1
+The syntax of this macro is the same as that of
+.Sx \&ig ,
+except that a leading argument must be specified.
+It is ignored, as are its children.
+.Ss \&el
+The
+.Qq else
+half of an if/else conditional.
+Pops a result off the stack of conditional evaluations pushed by
+.Sx \&ie
+and uses it as its conditional.
+If no stack entries are present (e.g., due to no prior
+.Sx \&ie
+calls)
+then false is assumed.
+The syntax of this macro is similar to
+.Sx \&if
+except that the conditional is missing.
+.Ss \&ie
+The
+.Qq if
+half of an if/else conditional.
+The result of the conditional is pushed into a stack used by subsequent
+invocations of
+.Sx \&el ,
+which may be separated by any intervening input (or not exist at all).
+Its syntax is equivalent to
+.Sx \&if .
+.Ss \&if
+Begins a conditional that always evaluates to false.
+If a conditional is false, its children are not processed, but are
+syntactically interpreted to preserve the integrity of the input
+document.
+Thus,
+.Pp
+.D1 \&.if t \e .ig
+.Pp
+will discard the
+.Sq \&.ig ,
+which may lead to interesting results, but
+.Pp
+.D1 \&.if t \e .if t \e{\e
+.Pp
+will continue to syntactically interpret to the block close of the final
+conditional.
+Sub-conditionals, in this case, obviously inherit the truth value of
+the parent.
+This macro has the following syntax:
+.Pp
+.Bd -literal -offset indent -compact
+\&.if COND \e{\e
+BODY...
+\&.\e}
+.Ed
+.Bd -literal -offset indent -compact
+\&.if COND \e{ BODY
+BODY... \e}
+.Ed
+.Bd -literal -offset indent -compact
+\&.if COND \e{ BODY
+BODY...
+\&.\e}
+.Ed
+.Bd -literal -offset indent -compact
+\&.if COND \e
+BODY
+.Ed
+.Pp
+COND is a conditional (for the time being, this always evaluates to
+false).
+.Pp
+If the BODY section is begun by an escaped brace
+.Sq \e{ ,
+scope continues until a closing-brace macro
+.Sq \.\e} .
+If the BODY is not enclosed in braces, scope continues until the next
+macro or word.
+If the COND is followed by a BODY on the same line, whether after a
+brace or not, then macros
+.Em must
+begin with a control character.
+It is generally more intuitive, in this case, to write
+.Bd -literal -offset indent
+\&.if COND \e{\e
+\&.foo
+bar
+\&.\e}
+.Ed
+.Pp
+than having the macro follow as
+.Pp
+.D1 \&.if COND \e{ .foo
+.Pp
+The scope of a conditional is always parsed, but only executed if the
+conditional evaluates to true.
+.Pp
+Note that text subsequent a
+.Sq \&.\e}
+macro is discarded.
+Furthermore, if an explicit closing sequence
+.Sq \e}
+is specified in a free-form line, the entire line is accepted within the
+scope of the prior macro, not only the text preceding the close.
+.Ss \&ig
+Ignore input.
+Accepts the following syntax:
+.Pp
+.Bd -literal -offset indent -compact
+\&.ig
+BODY...
+\&..
+.Ed
+.Bd -literal -offset indent -compact
+\&.ig END
+BODY...
+\&.END
+.Ed
+.Pp
+In the first case, input is ignored until a
+.Sq \&..
+macro is encountered on its own line.
+In the second case, input is ignored until a
+.Sq \&.END
+is encountered.
+Text subsequent the
+.Sq \&.END
+or
+.Sq \&..
+is discarded.
+.Pp
+Do not use the escape
+.Sq \e
+anywhere in the definition of END.
+It causes very strange behaviour.
+Furthermore, if you redefine a
+.Nm
+macro, such as
+.Pp
+.D1 \&.ig if
+.Pp
+the subsequent invocation of
+.Sx \&if
+will first signify the end of comment, then be invoked as a macro.
+This behaviour really shouldn't be counted upon.
+.Sh COMPATIBILITY
+This section documents compatibility between mandoc and other other
+troff implementations, at this time limited to GNU troff
+.Pq Qq groff .
+The term
+.Qq historic groff
+refers to groff versions before the
+.Pa doc.tmac
+file re-write
+.Pq somewhere between 1.15 and 1.19 .
+.Pp
+.Bl -dash -compact
+.It
+Historic groff did not accept white-space buffering the custom END tag
+for the
+.Sx \&ig
+macro.
+.It
+The
+.Sx \&if
+and family would print funny white-spaces with historic groff when
+depending on next-line syntax.
+.El
+.Sh AUTHORS
+The
+.Nm
+reference was written by
+.An Kristaps Dzonsons Aq kristaps@bsd.lv .
diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c
index d3919193a77..e81112eef7d 100644
--- a/usr.bin/mandoc/roff.c
+++ b/usr.bin/mandoc/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.1 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: roff.c,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -21,32 +21,56 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
+#include <stdio.h>
#include "mandoc.h"
#include "roff.h"
+#define RSTACK_MAX 128
+
+#define ROFF_CTL(c) \
+ ('.' == (c) || '\'' == (c))
+
enum rofft {
- ROFF_de,
- ROFF_dei,
ROFF_am,
ROFF_ami,
+ ROFF_am1,
+ ROFF_de,
+ ROFF_dei,
+ ROFF_de1,
+ ROFF_ds,
+ ROFF_el,
+ ROFF_ie,
+ ROFF_if,
ROFF_ig,
- ROFF_close,
+ ROFF_rm,
+ ROFF_tr,
+ ROFF_cblock,
+ ROFF_ccond,
ROFF_MAX
};
+enum roffrule {
+ ROFFRULE_ALLOW,
+ ROFFRULE_DENY
+};
+
struct roff {
struct roffnode *last; /* leaf of stack */
mandocmsg msg; /* err/warn/fatal messages */
void *data; /* privdata for messages */
+ enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
+ int rstackpos; /* position in rstack */
};
struct roffnode {
enum rofft tok; /* type of node */
struct roffnode *parent; /* up one in stack */
- char *end; /* custom end-token */
int line; /* parse line */
int col; /* parse col */
+ char *end; /* end-rules: custom token */
+ int endspan; /* end-rules: next-line or infty */
+ enum roffrule rule; /* current evaluation rule */
};
#define ROFF_ARGS struct roff *r, /* parse ctx */ \
@@ -54,31 +78,52 @@ struct roffnode {
char **bufp, /* input buffer */ \
size_t *szp, /* size of input buffer */ \
int ln, /* parse line */ \
- int ppos /* current pos in buffer */
+ int ppos, /* original pos in buffer */ \
+ int pos, /* current pos in buffer */ \
+ int *offs /* reset offset of buffer data */
typedef enum rofferr (*roffproc)(ROFF_ARGS);
struct roffmac {
const char *name; /* macro name */
- roffproc sub; /* child of control black */
- roffproc new; /* root of stack (type = ROFF_MAX) */
+ roffproc proc; /* process new macro */
+ roffproc text; /* process as child text of macro */
+ roffproc sub; /* process as child of macro */
+ int flags;
+#define ROFFMAC_STRUCT (1 << 0) /* always interpret */
};
-static enum rofferr roff_new_close(ROFF_ARGS);
-static enum rofferr roff_new_ig(ROFF_ARGS);
-static enum rofferr roff_sub_ig(ROFF_ARGS);
+static enum rofferr roff_block(ROFF_ARGS);
+static enum rofferr roff_block_text(ROFF_ARGS);
+static enum rofferr roff_block_sub(ROFF_ARGS);
+static enum rofferr roff_cblock(ROFF_ARGS);
+static enum rofferr roff_ccond(ROFF_ARGS);
+static enum rofferr roff_cond(ROFF_ARGS);
+static enum rofferr roff_cond_text(ROFF_ARGS);
+static enum rofferr roff_cond_sub(ROFF_ARGS);
+static enum rofferr roff_line(ROFF_ARGS);
const struct roffmac roffs[ROFF_MAX] = {
- { "de", roff_sub_ig, roff_new_ig },
- { "dei", roff_sub_ig, roff_new_ig },
- { "am", roff_sub_ig, roff_new_ig },
- { "ami", roff_sub_ig, roff_new_ig },
- { "ig", roff_sub_ig, roff_new_ig },
- { ".", NULL, roff_new_close },
+ { "am", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "de", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "ds", roff_line, NULL, NULL, 0 },
+ { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
+ { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
+ { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
+ { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
+ { "rm", roff_line, NULL, NULL, 0 },
+ { "tr", roff_line, NULL, NULL, 0 },
+ { ".", roff_cblock, NULL, NULL, 0 },
+ { "\\}", roff_ccond, NULL, NULL, 0 },
};
static void roff_free1(struct roff *);
static enum rofft roff_hash_find(const char *);
+static void roffnode_cleanscope(struct roff *);
static int roffnode_push(struct roff *,
enum rofft, int, int);
static void roffnode_pop(struct roff *);
@@ -113,9 +158,16 @@ roffnode_pop(struct roff *r)
{
struct roffnode *p;
- if (NULL == (p = r->last))
- return;
- r->last = p->parent;
+ assert(r->last);
+ p = r->last;
+
+ if (ROFF_el == p->tok)
+ if (r->rstackpos > -1)
+ r->rstackpos--;
+
+ r->last = r->last->parent;
+ if (p->end)
+ free(p->end);
free(p);
}
@@ -138,6 +190,7 @@ roffnode_push(struct roff *r, enum rofft tok, int line, int col)
p->parent = r->last;
p->line = line;
p->col = col;
+ p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
r->last = p;
return(1);
@@ -182,35 +235,68 @@ roff_alloc(const mandocmsg msg, void *data)
r->msg = msg;
r->data = data;
+ r->rstackpos = -1;
return(r);
}
enum rofferr
-roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp)
+roff_parseln(struct roff *r, int ln,
+ char **bufp, size_t *szp, int pos, int *offs)
{
enum rofft t;
int ppos;
- if (NULL != r->last) {
- /*
- * If there's a node on the stack, then jump directly
- * into its processing function.
- */
+ /*
+ * First, if a scope is open and we're not a macro, pass the
+ * text through the macro's filter. If a scope isn't open and
+ * we're not a macro, just let it through.
+ */
+
+ if (r->last && ! ROFF_CTL((*bufp)[pos])) {
t = r->last->tok;
- assert(roffs[t].sub);
- return((*roffs[t].sub)(r, t, bufp, szp, ln, 0));
- } else if ('.' != (*bufp)[0] && NULL == r->last)
- /* Return when in free text without a context. */
+ assert(roffs[t].text);
+ return((*roffs[t].text)
+ (r, t, bufp, szp, ln, pos, pos, offs));
+ } else if ( ! ROFF_CTL((*bufp)[pos]))
return(ROFF_CONT);
- /* There's nothing on the stack: make us anew. */
+ /*
+ * If a scope is open, go to the child handler for that macro,
+ * as it may want to preprocess before doing anything with it.
+ */
+
+ if (r->last) {
+ t = r->last->tok;
+ assert(roffs[t].sub);
+ return((*roffs[t].sub)
+ (r, t, bufp, szp, ln, pos, pos, offs));
+ }
+
+ /*
+ * Lastly, as we've no scope open, try to look up and execute
+ * the new macro. If no macro is found, simply return and let
+ * the compilers handle it.
+ */
- if (ROFF_MAX == (t = roff_parse(*bufp, &ppos)))
+ ppos = pos;
+ if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
return(ROFF_CONT);
- assert(roffs[t].new);
- return((*roffs[t].new)(r, t, bufp, szp, ln, ppos));
+ assert(roffs[t].proc);
+ return((*roffs[t].proc)
+ (r, t, bufp, szp, ln, ppos, pos, offs));
+}
+
+
+int
+roff_endparse(struct roff *r)
+{
+
+ if (NULL == r->last)
+ return(1);
+ return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
+ r->last->col, NULL));
}
@@ -225,8 +311,8 @@ roff_parse(const char *buf, int *pos)
char mac[5];
enum rofft t;
- assert('.' == buf[0]);
- *pos = 1;
+ assert(ROFF_CTL(buf[*pos]));
+ (*pos)++;
while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
(*pos)++;
@@ -237,7 +323,7 @@ roff_parse(const char *buf, int *pos)
for (j = 0; j < 4; j++, (*pos)++)
if ('\0' == (mac[j] = buf[*pos]))
break;
- else if (' ' == buf[*pos])
+ else if (' ' == buf[*pos] || (j && '\\' == buf[*pos]))
break;
if (j == 4 || j < 1)
@@ -257,54 +343,158 @@ roff_parse(const char *buf, int *pos)
/* ARGSUSED */
static enum rofferr
-roff_sub_ig(ROFF_ARGS)
+roff_cblock(ROFF_ARGS)
{
- int i, j;
- /* Ignore free-text lines. */
+ /*
+ * A block-close `..' should only be invoked as a child of an
+ * ignore macro, otherwise raise a warning and just ignore it.
+ */
- if ('.' != (*bufp)[ppos])
+ if (NULL == r->last) {
+ if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
return(ROFF_IGN);
+ }
- if (r->last->end) {
- i = ppos + 1;
+ switch (r->last->tok) {
+ case (ROFF_am):
+ /* FALLTHROUGH */
+ case (ROFF_ami):
+ /* FALLTHROUGH */
+ case (ROFF_am1):
+ /* FALLTHROUGH */
+ case (ROFF_de):
+ /* FALLTHROUGH */
+ case (ROFF_dei):
+ /* FALLTHROUGH */
+ case (ROFF_de1):
+ /* FALLTHROUGH */
+ case (ROFF_ig):
+ break;
+ default:
+ if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
+ return(ROFF_IGN);
+ }
- while ((*bufp)[i] && ' ' == (*bufp)[i])
- i++;
+ if ((*bufp)[pos])
+ if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
+ return(ROFF_ERR);
- for (j = 0; r->last->end[j]; i++, j++)
- if ((*bufp)[i] != r->last->end[j])
- return(ROFF_IGN);
+ roffnode_pop(r);
+ roffnode_cleanscope(r);
+ return(ROFF_IGN);
- if (r->last->end[j])
- return(ROFF_IGN);
- if ((*bufp)[i] && ' ' != (*bufp)[i])
- return(ROFF_IGN);
+}
+
+
+static void
+roffnode_cleanscope(struct roff *r)
+{
+
+ while (r->last) {
+ if (--r->last->endspan < 0)
+ break;
+ roffnode_pop(r);
+ }
+}
- while (' ' == (*bufp)[i])
- i++;
- } else if (ROFF_close != roff_parse(*bufp, &i))
+/* ARGSUSED */
+static enum rofferr
+roff_ccond(ROFF_ARGS)
+{
+
+ if (NULL == r->last) {
+ if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
return(ROFF_IGN);
+ }
- roffnode_pop(r);
+ switch (r->last->tok) {
+ case (ROFF_el):
+ /* FALLTHROUGH */
+ case (ROFF_ie):
+ /* FALLTHROUGH */
+ case (ROFF_if):
+ break;
+ default:
+ if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
+ return(ROFF_IGN);
+ }
- if ('\0' == (*bufp)[i])
+ if (r->last->endspan > -1) {
+ if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
return(ROFF_IGN);
- if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL))
- return(ROFF_ERR);
+ }
+
+ if ((*bufp)[pos])
+ if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
+ return(ROFF_ERR);
+ roffnode_pop(r);
+ roffnode_cleanscope(r);
return(ROFF_IGN);
}
/* ARGSUSED */
static enum rofferr
-roff_new_close(ROFF_ARGS)
+roff_block(ROFF_ARGS)
{
+ int sv;
+ size_t sz;
+
+ if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
+ if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
+ return(ROFF_IGN);
+ } else if (ROFF_ig != tok) {
+ while ((*bufp)[pos] && ' ' != (*bufp)[pos])
+ pos++;
+ while (' ' == (*bufp)[pos])
+ pos++;
+ }
+
+ if ( ! roffnode_push(r, tok, ln, ppos))
+ return(ROFF_ERR);
+
+ if ('\0' == (*bufp)[pos])
+ return(ROFF_IGN);
+
+ sv = pos;
+ while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
+ '\t' != (*bufp)[pos])
+ pos++;
+
+ /*
+ * Note: groff does NOT like escape characters in the input.
+ * Instead of detecting this, we're just going to let it fly and
+ * to hell with it.
+ */
+
+ assert(pos > sv);
+ sz = (size_t)(pos - sv);
- if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
+ if (1 == sz && '.' == (*bufp)[sv])
+ return(ROFF_IGN);
+
+ r->last->end = malloc(sz + 1);
+
+ if (NULL == r->last->end) {
+ (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
return(ROFF_ERR);
+ }
+
+ memcpy(r->last->end, *bufp + sv, sz);
+ r->last->end[(int)sz] = '\0';
+
+ if ((*bufp)[pos])
+ if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
+ return(ROFF_ERR);
return(ROFF_IGN);
}
@@ -312,62 +502,231 @@ roff_new_close(ROFF_ARGS)
/* ARGSUSED */
static enum rofferr
-roff_new_ig(ROFF_ARGS)
+roff_block_sub(ROFF_ARGS)
{
- int i;
+ enum rofft t;
+ int i, j;
- if ( ! roffnode_push(r, tok, ln, ppos))
- return(ROFF_ERR);
+ /*
+ * First check whether a custom macro exists at this level. If
+ * it does, then check against it. This is some of groff's
+ * stranger behaviours. If we encountered a custom end-scope
+ * tag and that tag also happens to be a "real" macro, then we
+ * need to try interpreting it again as a real macro. If it's
+ * not, then return ignore. Else continue.
+ */
+
+ if (r->last->end) {
+ i = pos + 1;
+ while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
+ i++;
+
+ for (j = 0; r->last->end[j]; j++, i++)
+ if ((*bufp)[i] != r->last->end[j])
+ break;
- if (ROFF_ig != tok) {
- while ((*bufp)[ppos] && ' ' != (*bufp)[ppos])
- ppos++;
- while (' ' == (*bufp)[ppos])
- ppos++;
+ if ('\0' == r->last->end[j] &&
+ ('\0' == (*bufp)[i] ||
+ ' ' == (*bufp)[i] ||
+ '\t' == (*bufp)[i])) {
+ roffnode_pop(r);
+ roffnode_cleanscope(r);
+
+ if (ROFF_MAX != roff_parse(*bufp, &pos))
+ return(ROFF_RERUN);
+ return(ROFF_IGN);
+ }
}
- i = (int)ppos;
+ /*
+ * If we have no custom end-query or lookup failed, then try
+ * pulling it out of the hashtable.
+ */
- while ((*bufp)[i] && ' ' != (*bufp)[i])
- i++;
+ ppos = pos;
+ t = roff_parse(*bufp, &pos);
- if (i == (int)ppos)
+ /* If we're not a comment-end, then throw it away. */
+ if (ROFF_cblock != t)
return(ROFF_IGN);
- if ((*bufp)[i])
- if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL))
- return(ROFF_ERR);
+ assert(roffs[t].proc);
+ return((*roffs[t].proc)(r, t, bufp,
+ szp, ln, ppos, pos, offs));
+}
+
+
+/* ARGSUSED */
+static enum rofferr
+roff_block_text(ROFF_ARGS)
+{
+
+ return(ROFF_IGN);
+}
+
+
+/* ARGSUSED */
+static enum rofferr
+roff_cond_sub(ROFF_ARGS)
+{
+ enum rofft t;
+ enum roffrule rr;
+
+ ppos = pos;
+ rr = r->last->rule;
+
+ roff_cond_text(r, tok, bufp, szp, ln, ppos, pos, offs);
+
+ if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
+ return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+
+ /*
+ * A denied conditional must evaluate its children if and only
+ * if they're either structurally required (such as loops and
+ * conditionals) or a closing macro.
+ */
+ if (ROFFRULE_DENY == rr)
+ if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
+ if (ROFF_ccond != t)
+ return(ROFF_IGN);
+
+ assert(roffs[t].proc);
+ return((*roffs[t].proc)
+ (r, t, bufp, szp, ln, ppos, pos, offs));
+}
+
+
+/* ARGSUSED */
+static enum rofferr
+roff_cond_text(ROFF_ARGS)
+{
+ char *ep, *st;
+ enum roffrule rr;
+
+ rr = r->last->rule;
/*
- * If the macro has arguments, the first argument (up to the
- * next whitespace) is interpreted as an argument marking the
- * macro close. Thus, `.ig foo' will close at `.foo'.
- *
- * NOTE: the closing macro `.foo' in the above case is not
- * allowed to have leading spaces with old groff! Thus `.foo'
- * != `. foo'. Oh yeah, everything after the `.foo' is lost.
- * Merry fucking Christmas.
+ * We display the value of the text if out current evaluation
+ * scope permits us to do so.
*/
- r->last->end = malloc((size_t)(i - ppos) + 1);
- if (NULL == r->last->end) {
+ st = &(*bufp)[pos];
+ if (NULL == (ep = strstr(st, "\\}"))) {
+ roffnode_cleanscope(r);
+ return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+ }
+
+ if (ep > st && '\\' != *(ep - 1)) {
+ ep = '\0';
+ roffnode_pop(r);
+ }
+
+ roffnode_cleanscope(r);
+ return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+}
+
+
+/* ARGSUSED */
+static enum rofferr
+roff_cond(ROFF_ARGS)
+{
+ int cpos; /* position of the condition */
+ int sv;
+
+ /* Stack overflow! */
+
+ if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
(*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
return(ROFF_ERR);
}
- memcpy(r->last->end, &(*bufp)[ppos], (size_t)(i - ppos));
- r->last->end[i - ppos] = '\0';
+ cpos = pos;
- return(ROFF_IGN);
+ if (ROFF_if == tok || ROFF_ie == tok) {
+ /*
+ * Read ahead past the conditional. FIXME: this does
+ * not work, as conditionals don't end on whitespace,
+ * but are parsed according to a formal grammar. It's
+ * good enough for now, however.
+ */
+ while ((*bufp)[pos] && ' ' != (*bufp)[pos])
+ pos++;
+ }
+
+ sv = pos;
+ while (' ' == (*bufp)[pos])
+ pos++;
+
+ /*
+ * Roff is weird. If we have just white-space after the
+ * conditional, it's considered the BODY and we exit without
+ * really doing anything. Warn about this. It's probably
+ * wrong.
+ */
+ if ('\0' == (*bufp)[pos] && sv != pos) {
+ if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
+ return(ROFF_ERR);
+ return(ROFF_IGN);
+ }
+
+ if ( ! roffnode_push(r, tok, ln, ppos))
+ return(ROFF_ERR);
+
+ /* XXX: Implement more conditionals. */
+
+ if (ROFF_if == tok || ROFF_ie == tok)
+ r->last->rule = 'n' == (*bufp)[cpos] ?
+ ROFFRULE_ALLOW : ROFFRULE_DENY;
+ else if (ROFF_el == tok) {
+ /*
+ * An `.el' will get the value of the current rstack
+ * entry set in prior `ie' calls or defaults to DENY.
+ */
+ if (r->rstackpos < 0)
+ r->last->rule = ROFFRULE_DENY;
+ else
+ r->last->rule = r->rstack[r->rstackpos];
+ }
+ if (ROFF_ie == tok) {
+ /*
+ * An if-else will put the NEGATION of the current
+ * evaluated conditional into the stack.
+ */
+ r->rstackpos++;
+ if (ROFFRULE_DENY == r->last->rule)
+ r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
+ else
+ r->rstack[r->rstackpos] = ROFFRULE_DENY;
+ }
+ if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
+ r->last->rule = ROFFRULE_DENY;
+
+ r->last->endspan = 1;
+
+ if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
+ r->last->endspan = -1;
+ pos += 2;
+ }
+
+ /*
+ * If there are no arguments on the line, the next-line scope is
+ * assumed.
+ */
+
+ if ('\0' == (*bufp)[pos])
+ return(ROFF_IGN);
+
+ /* Otherwise re-run the roff parser after recalculating. */
+
+ *offs = pos;
+ return(ROFF_RERUN);
}
-int
-roff_endparse(struct roff *r)
+/* ARGSUSED */
+static enum rofferr
+roff_line(ROFF_ARGS)
{
- if (NULL == r->last)
- return(1);
- return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
- r->last->line, r->last->col, NULL));
+ return(ROFF_IGN);
}
diff --git a/usr.bin/mandoc/roff.h b/usr.bin/mandoc/roff.h
index b0235eb2ee7..a89c5c85722 100644
--- a/usr.bin/mandoc/roff.h
+++ b/usr.bin/mandoc/roff.h
@@ -1,4 +1,4 @@
-/* $Id: roff.h,v 1.1 2010/05/16 00:54:03 schwarze Exp $ */
+/* $Id: roff.h,v 1.2 2010/05/20 00:58:02 schwarze Exp $ */
/*
* Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -18,9 +18,10 @@
#define ROFF_H
enum rofferr {
- ROFF_CONT, /* re-process line with libmdoc or libman */
- ROFF_IGN, /* ignore line */
- ROFF_ERR, /* badness */
+ ROFF_CONT, /* continue processing line */
+ ROFF_RERUN, /* re-run roff interpreter with offset */
+ ROFF_IGN, /* ignore current line */
+ ROFF_ERR /* badness: puke and stop */
};
__BEGIN_DECLS
@@ -30,7 +31,8 @@ struct roff;
void roff_free(struct roff *);
struct roff *roff_alloc(mandocmsg, void *);
void roff_reset(struct roff *);
-enum rofferr roff_parseln(struct roff *, int, char **, size_t *);
+enum rofferr roff_parseln(struct roff *, int,
+ char **, size_t *, int, int *);
int roff_endparse(struct roff *);
__END_DECLS