nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit a18a54a3ae97e1e6ff5aae196449587a8492b4ff (patch)
parent 19d6df987ac2d4edc582b048818ba5872c460b4b
Author: Alex Karle <alex@alexkarle.com>
Date:   Sat, 18 Dec 2021 00:20:45 -0500

Refactor parse vars into state struct

This'll allow closeblock to mutate the state, which will be necessary
with nested lists!

Diffstat:
Mblag.c | 237++++++++++++++++++++++++++++++++++++++++++-------------------------------------
1 file changed, 125 insertions(+), 112 deletions(-)

diff --git a/blag.c b/blag.c @@ -76,39 +76,50 @@ void putesc(int c) { } } -int closeblock(int in, int hlvl) { - if (in == HEADER) { - in = NONE; - printf("</h%d>", hlvl); - } else if (in == PARAGRAPH) { - in = PARAGRAPH_BREAK; - } else if (in == PARAGRAPH_BREAK) { - in = NONE; +typedef struct State { + enum Block in; + enum Block in_link; + int hlvl; + int in_code; + int in_ital; + int in_bold; + int escape; + char lnkbuf[2048]; + int lnkidx; +} state; + +void closeblock(state *s) { + if (s->in == HEADER) { + s->in = NONE; + printf("</h%d>", s->hlvl); + } else if (s->in == PARAGRAPH) { + s->in = PARAGRAPH_BREAK; + } else if (s->in == PARAGRAPH_BREAK) { + s->in = NONE; printf("</p>\n"); - } else if (in == CODE) { - in = CODE_BREAK; - } else if (in == CODE_BREAK) { - in = NONE; + } else if (s->in == CODE) { + s->in = CODE_BREAK; + } else if (s->in == CODE_BREAK) { + s->in = NONE; printf("</pre></code>\n"); - } else if (in == QUOTE) { - in = QUOTE_BREAK; - } else if (in == QUOTE_BREAK) { - in = NONE; + } else if (s->in == QUOTE) { + s->in = QUOTE_BREAK; + } else if (s->in == QUOTE_BREAK) { + s->in = NONE; printf("</blockquote>\n"); - } else if (in == ULIST) { - in = ULIST_BREAK; - } else if (in == ULIST_BREAK) { - in = NONE; + } else if (s->in == ULIST) { + s->in = ULIST_BREAK; + } else if (s->in == ULIST_BREAK) { + s->in = NONE; printf("\n</li>\n</ul>\n"); - } else if (in == OLIST) { - in = OLIST_BREAK; - } else if (in == OLIST_BREAK) { - in = NONE; + } else if (s->in == OLIST) { + s->in = OLIST_BREAK; + } else if (s->in == OLIST_BREAK) { + s->in = NONE; printf("\n</li>\n</ol>\n"); } else { /* keep in as is */ } - return in; } int parse() { @@ -123,172 +134,174 @@ int parse() { * action is taken until the parsing is done (usually on ' ') */ int c; - enum Block in = NONE; - enum Block in_link = NONE; - int hlvl = 0; - int in_code = 0; - int in_ital = 0; - int in_bold = 0; - int escape = 0; - char lnkbuf[2048] = {0}; - int lnkidx = 0; + state s = { + .in = NONE, + .in_link = NONE, + .hlvl = 0, + .in_code = 0, + .in_ital = 0, + .in_bold = 0, + .escape = 0, + .lnkbuf = {0}, + .lnkidx = 0 + }; while ((c = getchar()) != EOF) { /* Handle Escapes before all else */ - if (escape) { - if (in == NONE) { - in = PARAGRAPH; + if (s.escape) { + if (s.in == NONE) { + s.in = PARAGRAPH; printf("<p>\n"); } putesc(c); - escape = 0; + s.escape = 0; continue; } /* Store links as we go */ - if (in_link == LINK_URL_PARSE && c != ']') { - lnkbuf[lnkidx++] = c; + if (s.in_link == LINK_URL_PARSE && c != ']') { + s.lnkbuf[s.lnkidx++] = c; } /* Handle unique state changes by char */ switch (c) { case '\\': - escape = 1; + s.escape = 1; break; case '#': - if (in == NONE) { - in = HEADER_PARSE; - hlvl = 1; - } else if (in == HEADER_PARSE) { - hlvl++; + if (s.in == NONE) { + s.in = HEADER_PARSE; + s.hlvl = 1; + } else if (s.in == HEADER_PARSE) { + s.hlvl++; } else { /* not a special # */ putesc(c); } break; case ' ': - if (in == HEADER_PARSE) { - printf("<h%d>", hlvl); - in = HEADER; - } else if (in == ULIST_START) { + if (s.in == HEADER_PARSE) { + printf("<h%d>", s.hlvl); + s.in = HEADER; + } else if (s.in == ULIST_START) { printf("<ul>\n<li>\n"); - in = ULIST; - } else if (in == ULIST_PARSE) { + s.in = ULIST; + } else if (s.in == ULIST_PARSE) { printf("\n</li>\n<li>\n"); - in = ULIST; - } else if (in == OLIST_START) { + s.in = ULIST; + } else if (s.in == OLIST_START) { printf("<ol>\n<li>\n"); - in = OLIST; - } else if (in == OLIST_PARSE) { + s.in = OLIST; + } else if (s.in == OLIST_PARSE) { printf("\n</li>\n<li>\n"); - in = OLIST; - } else if (in == NONE) { + s.in = OLIST; + } else if (s.in == NONE) { /* no op */ - } else if (in == ULIST_BREAK) { + } else if (s.in == ULIST_BREAK) { /* assume it's a continuation! */ - in = ULIST; - } else if (in == OLIST_BREAK) { + s.in = ULIST; + } else if (s.in == OLIST_BREAK) { /* assume it's a continuation! */ - in = OLIST; - } else if (in_link == LINK_URL_PARSE) { - in_link = LINK_DESC_PARSE; + s.in = OLIST; + } else if (s.in_link == LINK_URL_PARSE) { + s.in_link = LINK_DESC_PARSE; printf("\">"); } else { putesc(c); } break; case '`': - if (in_link == NONE) { - if (in_code) { + if (s.in_link == NONE) { + if (s.in_code) { printf("</code>"); - in_code = 0; + s.in_code = 0; } else { printf("<code>"); - in_code = 1; + s.in_code = 1; } } break; case '*': - if (!in_code && in_link == NONE) { - if (in_bold) { + if (!s.in_code && s.in_link == NONE) { + if (s.in_bold) { printf("</strong>"); - in_bold = 0; + s.in_bold = 0; } else { printf("<strong>"); - in_bold = 1; + s.in_bold = 1; } } else { putesc(c); } break; case '_': - if (!in_code && in_link == NONE) { - if (in_ital) { + if (!s.in_code && s.in_link == NONE) { + if (s.in_ital) { printf("</em>"); - in_ital = 0; + s.in_ital = 0; } else { printf("<em>"); - in_ital = 1; + s.in_ital = 1; } } else { putesc(c); } break; case '\t': - if (in == NONE) { - in = CODE; + if (s.in == NONE) { + s.in = CODE; printf("<code><pre>\n"); - } else if (in == CODE_BREAK) { - in = CODE; + } else if (s.in == CODE_BREAK) { + s.in = CODE; } else { putesc(c); } break; case '>': - if (in == NONE) { - in = QUOTE; + if (s.in == NONE) { + s.in = QUOTE; printf("<blockquote>\n"); - } else if (in == QUOTE_BREAK) { - in = QUOTE; + } else if (s.in == QUOTE_BREAK) { + s.in = QUOTE; } else { putesc(c); } break; case '[': - if (in_link == NONE && !in_code) { - if (in == NONE) { + if (s.in_link == NONE && !s.in_code) { + if (s.in == NONE) { /* Assume this is a new paragraph */ - in = PARAGRAPH; + s.in = PARAGRAPH; printf("<p>\n"); } - in_link = LINK_URL_PARSE; - lnkidx = 0; + s.in_link = LINK_URL_PARSE; + s.lnkidx = 0; printf("<a href=\""); } else { putesc(c); } break; case ']': - if (in_link == LINK_URL_PARSE) { + if (s.in_link == LINK_URL_PARSE) { /* no description */ - in_link = NONE; - lnkbuf[lnkidx] = '\0'; - printf("\">%s</a>", lnkbuf); - } else if (in_link == LINK_DESC_PARSE) { - in_link = NONE; + s.in_link = NONE; + s.lnkbuf[s.lnkidx] = '\0'; + printf("\">%s</a>", s.lnkbuf); + } else if (s.in_link == LINK_DESC_PARSE) { + s.in_link = NONE; printf("</a>"); } else { putesc(c); } break; case '-': - if (in == NONE) { - in = ULIST_START; - } else if (in == ULIST_START || in == ULIST_PARSE) { + if (s.in == NONE) { + s.in = ULIST_START; + } else if (s.in == ULIST_START || s.in == ULIST_PARSE) { /* no op */ - } else if (in == ULIST_BREAK) { + } else if (s.in == ULIST_BREAK) { /* next list item */ - in = ULIST_PARSE; + s.in = ULIST_PARSE; } else { putesc(c); } @@ -303,37 +316,37 @@ int parse() { case '8': case '9': case '.': - if (in == NONE) { - in = OLIST_START; - } else if (in == OLIST_START || in == OLIST_PARSE) { + if (s.in == NONE) { + s.in = OLIST_START; + } else if (s.in == OLIST_START || s.in == OLIST_PARSE) { /* no op */ - } else if (in == OLIST_BREAK) { + } else if (s.in == OLIST_BREAK) { /* next list item */ - in = OLIST_PARSE; + s.in = OLIST_PARSE; } else { putesc(c); } break; case '\n': - in = closeblock(in, hlvl); - if (in != ULIST_BREAK && in != OLIST_BREAK) { + closeblock(&s); + if (s.in != ULIST_BREAK && s.in != OLIST_BREAK) { putesc(c); } break; default: - if (in == NONE) { + if (s.in == NONE) { /* nothing else was matched -> assume new <p> */ - in = PARAGRAPH; + s.in = PARAGRAPH; printf("<p>\n"); - } else if (in == PARAGRAPH_BREAK) { + } else if (s.in == PARAGRAPH_BREAK) { /* We thought it might be the end, but it aint! */ - in = PARAGRAPH; + s.in = PARAGRAPH; } putesc(c); break; } } - closeblock(in, hlvl); + closeblock(&s); return 0; }