nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit f0bc377e7d6b57bfcfc86fd61c2fa648cd225a33 (patch)
parent 3c5a5ae8f80a76f8ad41a82b1ff8dee2bf046d6a
Author: Alex Karle <alex@alexkarle.com>
Date:   Thu, 30 Dec 2021 00:39:02 -0500

style: Convert whole source to adhere to style(9)

The OpenBSD style(9) guide seems as good a guide as any to adhere to,
since it's the OS I'm primarily developing on / targetting.

The non-curly else-if's after a multi-line if feel a bit weird/unsafe
(and I'm a tad surprised they wouldn't suggest always using parens to
combat easy mistakes.. I guess that's what compiler warnings are for?).

No real functional changes, although I basically get rid of parse()
since I realized handlec was basically doing all the heavy lifting...

Diffstat:
Mnihdoc.c | 537+++++++++++++++++++++++++++++++++++++++++--------------------------------------
1 file changed, 277 insertions(+), 260 deletions(-)

diff --git a/nihdoc.c b/nihdoc.c @@ -10,31 +10,28 @@ #include <unistd.h> #include <stdbool.h> -#define LNK_DES_MAX 2048 - - /* Global Constants and Enums */ char *FMT_STRS[] = { - ['_'] = "em", - ['*'] = "strong", - ['`'] = "code", + ['_'] = "em", + ['*'] = "strong", + ['`'] = "code", }; enum Block { - NONE, - HEADER, - HEADER_PARSE, - PARAGRAPH, - CODE, - LIST, - LIST_PARSE, + NONE, + HEADER, + HEADER_PARSE, + PARAGRAPH, + CODE, + LIST, + LIST_PARSE, }; enum Link { - NOL, - URL_PARSE, - DESC_PARSE, - OPT_URL, + NOL, + URL_PARSE, + DESC_PARSE, + OPT_URL, }; @@ -44,7 +41,7 @@ enum Link in_link = NOL; int hlvl = 0; bool fmts[256] = {false}; /* indexed by _ * ` */ bool escape = false; -char lnkdes[LNK_DES_MAX] = {0}; +char lnkdes[2048] = {0}; int lnkidx = 0; int indent = 0; int previndent = 0; @@ -56,271 +53,291 @@ bool blockquote = false; /* Helper functions */ -void putesc(int c) { - switch (c) { - case '<': printf("&lt;"); break; - case '>': printf("&gt;"); break; - case '&': printf("&amp;"); break; - default: putchar(c); - } +void +putesc(int c) +{ + switch (c) { + case '<': + printf("&lt;"); + break; + case '>': + printf("&gt;"); + break; + case '&': + printf("&amp;"); + break; + default: + putchar(c); + } } -void newlist() { - in = LIST; - previndent = indent; - printf("<%s>\n<li>\n", ol ? "ol" : "ul"); - listdepth++; +void +newlist(void) +{ + in = LIST; + previndent = indent; + printf("<%s>\n<li>\n", ol ? "ol" : "ul"); + listdepth++; } -int endlist() { - in = LIST; - previndent = indent; - printf("</li>\n</%s>\n", ol ? "ol" : "ul"); - return --listdepth; +int +endlist(void) +{ + in = LIST; + previndent = indent; + printf("</li>\n</%s>\n", ol ? "ol" : "ul"); + return --listdepth; } -void maybe_startp() { - /* All inline types should start the paragraph if no other major type present*/ - if (in == NONE) { - in = PARAGRAPH; - printf("<p>\n"); - } +/* + * All inline types should start the paragraph if no other major type present. + * This function does just that, IFF we aren't already in a major type. + */ +void +maybe_startp(void) +{ + if (in == NONE) { + in = PARAGRAPH; + printf("<p>\n"); + } } -void handle_lf() { - indent = 0; - linestarted = false; +void +handle_lf(void) +{ + indent = 0; + linestarted = false; - /* single line types (one lf to close) */ - if (in == HEADER) { - in = NONE; - printf("</h%d>\n", hlvl); - } + /* single line types (one lf to close) */ + if (in == HEADER) { + in = NONE; + printf("</h%d>\n", hlvl); + } - /* multi-line types (two lf to close) */ - if (lastc == '\n' || (lastc == '>' && blockquote)) { - switch (in) { - case PARAGRAPH: printf("</p>\n"); break; - case CODE: printf("</code></pre>\n"); break; - case LIST: - previndent = 0; - while (endlist()) - ; - break; - default: - break; /* no op */ - } - in = NONE; - if (blockquote && lastc == '\n') { - printf("</blockquote>\n"); - blockquote = false; - } - } + /* multi-line types (two lf to close) */ + if (lastc == '\n' || (lastc == '>' && blockquote)) { + switch (in) { + case PARAGRAPH: + printf("</p>\n"); + break; + case CODE: + printf("</code></pre>\n"); + break; + case LIST: + previndent = 0; + while (endlist()) + ; + break; + default: + break; /* no op */ + } + in = NONE; + if (blockquote && lastc == '\n') { + printf("</blockquote>\n"); + blockquote = false; + } + } } -bool fmt_disabled(int c) { - /* `` blocks disable all but the next `, likewise CODE makes all disabled */ - if (in == CODE || in_link == URL_PARSE) { - return true; - } else { - return fmts['`'] && c != '`'; - } +/* Returns whether inline styles should be allowed at this moment. */ +bool +fmt_disabled(int c) +{ + /* `` blocks all but the next `, likewise CODE makes all disabled */ + if (in == CODE || in_link == URL_PARSE) + return true; + else + return fmts['`'] && c != '`'; } -void toggle_format(int c) { - if (!fmt_disabled(c)) { - maybe_startp(); - printf("<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); - fmts[c] ^= true; - } else { - putesc(c); - } +void +toggle_format(int c) +{ + if (!fmt_disabled(c)) { + maybe_startp(); + printf("<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); + fmts[c] ^= true; + } else + putesc(c); } -void handlec(int c) { - /* the main state machine, abstracted into a function to allow - * playback support for buffered types (i.e. link descriptions) */ - /* Handle Escapes before any other bit of the main switch */ - if (escape) { - maybe_startp(); - putesc(c); - escape = false; - return; - } +/* + * The main state machine [1], abstracted into a function to allow + * playback support for buffered types (i.e. link descriptions) + * + * [1]: home grown spaghetti code + */ +void +handlec(int c) +{ + /* + * Store link descriptions as we go, skipping the regular loop since + * we'll play them back later via recursion on handlec + */ + if (in_link == DESC_PARSE && c != ']') { + lnkdes[lnkidx++] = c; + return; + } - /* Store the indentation */ - if (!linestarted && c == ' ') { - indent++; - return; /* don't print leading indents */ - } + /* Any character other than a '(' terminates a link at ']' */ + if (in_link == OPT_URL && c != '(') { + in_link = NOL; + lnkdes[lnkidx] = '\0'; + printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes); + } - switch (c) { - case '\\': - escape = true; - break; - case '#': - if (in == NONE) { - in = HEADER_PARSE; - hlvl = 1; - } else if (in == HEADER_PARSE) { - hlvl++; - } else { - /* not a special # */ - putesc(c); - } - break; - case ' ': - if (in == HEADER_PARSE) { - printf("<h%d>", hlvl); - in = HEADER; - } else if (in == LIST_PARSE) { - if (!listdepth) { - newlist(); - } else { - if (previndent < indent) { - newlist(); - } else if (previndent > indent) { - endlist(); - printf("</li>\n<li>\n"); - } else { - in = LIST; - printf("</li>\n<li>\n"); - } - } - } else { - putesc(c); - } - break; - case '*': - case '`': - case '_': - toggle_format(c); - break; - case '\t': - if (in == NONE) { - in = CODE; - printf("<pre><code>"); - } else if (lastc == '\n' && c == '\t') { - /* no op */ - } else { - putesc(c); - } - break; - case '>': - if (in == NONE) { - in = PARAGRAPH; - /* only start a new blockquote block is not already in one */ - printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n"); - blockquote = true; - } else if (lastc == '\n' && c == '>') { - /* no op */ - } else { - putesc(c); - } - break; - case '[': - if (in_link == NOL && !fmt_disabled(c)) { - maybe_startp(); - in_link = DESC_PARSE; - lnkidx = 0; - } else { - putesc(c); - } - break; - case ']': - if (in_link == DESC_PARSE) { - in_link = OPT_URL; - } else { - putesc(c); - } - break; - case '(': - if (in_link == OPT_URL) { - /* don't print it, start parsing */ - in_link = URL_PARSE; - printf("<a href=\""); - } else { - putesc(c); - } - break; - case ')': - if (in_link == URL_PARSE) { - in_link = NOL; - printf("\">"); - for (int i = 0; i < lnkidx; i++) { - handlec(lnkdes[i]); - } - printf("</a>"); - } else { - putesc(c); - } - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '.': - case '-': - if (in == NONE || !linestarted) { - ol = c != '-'; - in = LIST_PARSE; - } else if (in != LIST_PARSE) { - putesc(c); - } - break; - case '\n': - handle_lf(); - if (in != NONE) { - putesc(c); - } - break; - default: - maybe_startp(); - putesc(c); - break; - } - lastc = c; - linestarted = c != '\n'; -} + /* Handle Escapes before any other bit of the main switch */ + if (escape) { + maybe_startp(); + putesc(c); + escape = false; + return; + } -int parse() { - /* Mini state machine (home grown spaghetti code) */ - int c; - while ((c = getchar()) != EOF) { - /* Store links as we go, before the main loop */ - if (in_link == DESC_PARSE && c != ']') { - lnkdes[lnkidx++] = c; - /* skip the regular loop -- it'll be played back later */ - continue; - } + /* Store the indentation and return without printing */ + if (!linestarted && c == ' ') { + indent++; + return; + } - /* Any character other than a '(' terminates a link at ']' */ - if (in_link == OPT_URL && c != '(') { - in_link = NOL; - lnkdes[lnkidx] = '\0'; - printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes); - } - - handlec(c); - } - /* pretend there's a final LF to close any blocks */ - handle_lf(); - return 0; + switch (c) { + case '\\': + escape = true; + break; + case '#': + if (in == NONE) { + in = HEADER_PARSE; + hlvl = 1; + } else if (in == HEADER_PARSE) + hlvl++; + else + putesc(c); + break; + case ' ': + if (in == HEADER_PARSE) { + printf("<h%d>", hlvl); + in = HEADER; + } else if (in == LIST_PARSE) { + if (!listdepth) { + newlist(); + } else { + if (previndent < indent) { + newlist(); + } else if (previndent > indent) { + endlist(); + printf("</li>\n<li>\n"); + } else { + in = LIST; + printf("</li>\n<li>\n"); + } + } + } else + putesc(c); + break; + case '*': + case '`': + case '_': + toggle_format(c); + break; + case '\t': + if (in == NONE) { + in = CODE; + printf("<pre><code>"); + } else if (lastc != '\n') + putesc(c); + break; + case '>': + if (in == NONE) { + in = PARAGRAPH; + printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n"); + blockquote = true; + } else if (lastc != '\n') + putesc(c); + break; + case '[': + if (in_link == NOL && !fmt_disabled(c)) { + maybe_startp(); + in_link = DESC_PARSE; + lnkidx = 0; + } else + putesc(c); + break; + case ']': + if (in_link == DESC_PARSE) + in_link = OPT_URL; + else + putesc(c); + break; + case '(': + if (in_link == OPT_URL) { + in_link = URL_PARSE; + printf("<a href=\""); + } else + putesc(c); + break; + case ')': + if (in_link == URL_PARSE) { + in_link = NOL; + printf("\">"); + for (int i = 0; i < lnkidx; i++) + handlec(lnkdes[i]); + printf("</a>"); + } else + putesc(c); + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + case '-': + if (in == NONE || !linestarted) { + ol = c != '-'; + in = LIST_PARSE; + } else if (in != LIST_PARSE) + putesc(c); + break; + case '\n': + handle_lf(); + if (in != NONE) + putesc(c); + break; + default: + maybe_startp(); + putesc(c); + break; + } + lastc = c; + linestarted = c != '\n'; } -int main(int argc, char *argv[]) { - if (argc > 1) { - fprintf(stderr, "error: %s takes no arguments\n", argv[0]); - return 1; - } + +/* nihdoc: a text -> HTML parser */ +int +main(int argc, char *argv[]) +{ + int c; + #ifdef __OpenBSD__ - pledge("stdio", "stdio"); + pledge("stdio", "stdio"); #endif - return parse(); + + if (argc > 1) { + fprintf(stderr, "error: %s takes no arguments\n", argv[0]); + return 1; + } + + while ((c = getchar()) != EOF) + handlec(c); + + /* pretend there's a final LF to close any blocks */ + handle_lf(); + + return 0; }