nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit f9acd865af9fc42d7586760adbbce658e95c9b43 (patch)
parent e7aceb8df91e4225a1e4d690ca212bf6e4a64b2a
Author: Alex Karle <alex@alexkarle.com>
Date:   Wed, 29 Dec 2021 11:16:48 -0500

refactor: Replace mini-link state machine with playback mechanism

The first implemenation of the link description formatting had a mini-
version of the larger state machine in the pushbuf() function, which
is a bit of a maintenance burden for future changes that need duplication.

This patch removes the need for that mini-state machine clone by
refactoring the main loop into its own function that allows for
"playback". Certain things are still global of course, but the character
being parsed is paramaterized so that we can feed lnkdes[] one char
at a time to the buffer!

Diffstat:
Mnihdoc.c | 318+++++++++++++++++++++++++++++++++++++++----------------------------------------
1 file changed, 156 insertions(+), 162 deletions(-)

diff --git a/nihdoc.c b/nihdoc.c @@ -7,7 +7,6 @@ #include <stdio.h> #include <unistd.h> #include <stdbool.h> -#include <string.h> #define LNK_DES_MAX 2048 @@ -38,13 +37,13 @@ enum Link { /* Start Global State */ -int c = '0'; enum Block in = NONE; enum Link in_link = NOL; int hlvl = 0; bool fmts[256] = {false}; /* indexed by _ * ` */ bool escape = false; char lnkdes[LNK_DES_MAX] = {0}; +int lnkidx = 0; int indent = 0; int previndent = 0; int listdepth = 0; @@ -117,7 +116,7 @@ void handle_lf() { } } -bool fmt_disabled() { +bool fmt_disabled(int c) { /* `` blocks disable all but the next `, likewise CODE makes all disabled */ if (in == CODE || in_link == URL_PARSE) { return true; @@ -126,8 +125,8 @@ bool fmt_disabled() { } } -void toggle_format() { - if (!fmt_disabled()) { +void toggle_format(int c) { + if (!fmt_disabled(c)) { maybe_startp(); printf("<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); fmts[c] ^= true; @@ -136,182 +135,177 @@ void toggle_format() { } } -void pushbuf() { - /* push description onto lnkdes, with formatting! */ - char buf[12] = {0}; +void handlec(int c) { + /* the main state machine, abstracted into a function to allow + * playback support for buffered types (i.e. link descriptions) */ + + /* Handle Escapes before any other bit of the main switch */ if (escape) { - buf[0] = c; + maybe_startp(); + putesc(c); escape = false; - } else if (c == '`' || c == '_' || c == '*') { - snprintf(buf, 12, "<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); - fmts[c] ^= true; - } else if (c == '\\') { - escape = true; - } else { - buf[0] = c; + return; } - escape || strlcat(lnkdes, buf, LNK_DES_MAX); + + /* Store the indentation */ + if (!linestarted && c == ' ') { + indent++; + return; /* don't print leading indents */ + } + + switch (c) { + case '\\': + escape = true; + break; + case '#': + if (in == NONE) { + in = HEADER_PARSE; + hlvl = 1; + } else if (in == HEADER_PARSE) { + hlvl++; + } else { + /* not a special # */ + putesc(c); + } + break; + case ' ': + if (in == HEADER_PARSE) { + printf("<h%d>", hlvl); + in = HEADER; + } else if (in == LIST_PARSE) { + if (!listdepth) { + newlist(); + } else { + if (previndent < indent) { + newlist(); + } else if (previndent > indent) { + endlist(); + printf("</li>\n<li>\n"); + } else { + in = LIST; + printf("</li>\n<li>\n"); + } + } + } else { + putesc(c); + } + break; + case '*': + case '`': + case '_': + toggle_format(c); + break; + case '\t': + if (in == NONE) { + in = CODE; + printf("<pre><code>"); + } else if (lastc == '\n' && c == '\t') { + /* no op */ + } else { + putesc(c); + } + break; + case '>': + if (in == NONE) { + in = PARAGRAPH; + /* only start a new blockquote block is not already in one */ + printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n"); + blockquote = true; + } else if (lastc == '\n' && c == '>') { + /* no op */ + } else { + putesc(c); + } + break; + case '[': + if (in_link == NOL && !fmt_disabled(c)) { + maybe_startp(); + in_link = DESC_PARSE; + lnkidx = 0; + } else { + putesc(c); + } + break; + case ']': + if (in_link == DESC_PARSE) { + in_link = OPT_URL; + } else { + putesc(c); + } + break; + case '(': + if (in_link == OPT_URL) { + /* don't print it, start parsing */ + in_link = URL_PARSE; + printf("<a href=\""); + } else { + putesc(c); + } + break; + case ')': + if (in_link == URL_PARSE) { + in_link = NOL; + printf("\">"); + for (int i = 0; i < lnkidx; i++) { + handlec(lnkdes[i]); + } + printf("</a>"); + } else { + putesc(c); + } + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + case '-': + if (in == NONE || !linestarted) { + ol = c != '-'; + in = LIST_PARSE; + } else if (in != LIST_PARSE) { + putesc(c); + } + break; + case '\n': + handle_lf(); + if (in != NONE) { + putesc(c); + } + break; + default: + maybe_startp(); + putesc(c); + break; + } + lastc = c; + linestarted = c != '\n'; } int parse() { /* Mini state machine (home grown spaghetti code) */ + int c; while ((c = getchar()) != EOF) { /* Store links as we go, before the main loop */ if (in_link == DESC_PARSE && c != ']') { - pushbuf(); - /* skip the regular loop -- pushbuf handles formatting */ + lnkdes[lnkidx++] = c; + /* skip the regular loop -- it'll be played back later */ continue; } /* Any character other than a '(' terminates a link at ']' */ if (in_link == OPT_URL && c != '(') { in_link = NOL; + lnkdes[lnkidx] = '\0'; printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes); } - /* Handle Escapes before any other bit of the main loop */ - if (escape) { - maybe_startp(); - putesc(c); - escape = false; - continue; - } - - /* Store the indentation */ - if (!linestarted && c == ' ') { - indent++; - continue; /* don't print leading indents */ - } - - /* Handle unique state changes by char */ - switch (c) { - case '\\': - escape = true; - break; - case '#': - if (in == NONE) { - in = HEADER_PARSE; - hlvl = 1; - } else if (in == HEADER_PARSE) { - hlvl++; - } else { - /* not a special # */ - putesc(c); - } - break; - case ' ': - if (in == HEADER_PARSE) { - printf("<h%d>", hlvl); - in = HEADER; - } else if (in == LIST_PARSE) { - if (!listdepth) { - newlist(); - } else { - if (previndent < indent) { - newlist(); - } else if (previndent > indent) { - endlist(); - printf("</li>\n<li>\n"); - } else { - in = LIST; - printf("</li>\n<li>\n"); - } - } - } else { - putesc(c); - } - break; - case '*': - case '`': - case '_': - toggle_format(); - break; - case '\t': - if (in == NONE) { - in = CODE; - printf("<pre><code>"); - } else if (lastc == '\n' && c == '\t') { - /* no op */ - } else { - putesc(c); - } - break; - case '>': - if (in == NONE) { - in = PARAGRAPH; - /* only start a new blockquote block is not already in one */ - printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n"); - blockquote = true; - } else if (lastc == '\n' && c == '>') { - /* no op */ - } else { - putesc(c); - } - break; - case '[': - if (in_link == NOL && !fmt_disabled()) { - maybe_startp(); - in_link = DESC_PARSE; - lnkdes[0] = '\0'; - } else { - putesc(c); - } - break; - case ']': - if (in_link == DESC_PARSE) { - in_link = OPT_URL; - } else { - putesc(c); - } - break; - case '(': - if (in_link == OPT_URL) { - /* don't print it, start parsing */ - in_link = URL_PARSE; - printf("<a href=\""); - } else { - putesc(c); - } - break; - case ')': - if (in_link == URL_PARSE) { - in_link = NOL; - printf("\">%s</a>", lnkdes); - } else { - putesc(c); - } - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '.': - case '-': - if (in == NONE || !linestarted) { - ol = c != '-'; - in = LIST_PARSE; - } else if (in != LIST_PARSE) { - putesc(c); - } - break; - case '\n': - handle_lf(); - if (in != NONE) { - putesc(c); - } - break; - default: - maybe_startp(); - putesc(c); - break; - } - lastc = c; - linestarted = c != '\n'; + handlec(c); } /* pretend there's a final LF to close any blocks */ handle_lf();