nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit a6dc7f2f92162dc9a9930c3fe676c23a03ef35b8 (patch)
parent eb4048f4d179f2329d77827831af06da52947de9
Author: Alex Karle <alex@alexkarle.com>
Date:   Wed, 15 Dec 2021 00:25:31 -0500

Add initial support for links

Diffstat:
Mblag.c | 103++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 86 insertions(+), 17 deletions(-)

diff --git a/blag.c b/blag.c @@ -20,23 +20,13 @@ * * Inline `code` * - * TODO: + * Escaping via \ + * * [[url|Links]] - * -> if we see [[ - * -> print out <a href=" - * -> while in link - * -> push into buf - * -> putchar - * -> if we see | - * -> printf "> - * -> just putchar - * -> else - * -> printf ">url - * -> once we hit ]] - * -> </a> * - * --> error reporting for unclosed links?? - * -> errx in closeblock() if in link parser + * TODO: + * ----- + * - error reporting for malformed blocks */ #include <stdio.h> #include <err.h> @@ -60,6 +50,10 @@ enum Block { OLIST_START, OLIST_PARSE, OLIST_BREAK, + LINK_URL_PARSE, + LINK_DESC_PARSE, + POTENTIAL_LINK, + POTENTIAL_LINK_END }; int closeblock(int in, int hlvl) { @@ -100,13 +94,47 @@ int closeblock(int in, int hlvl) { } int parse() { - /* Mini state machine */ + /* Mini state machine (home grown spaghetti code) + * + * Key: global "line level" state in `in`, secondary mid-line states + * (inline code & links) use dedicated states. A newline triggers many of + * the line-level blocks to enter "BREAK" mode, where they can either + * continue or truly be broken on a second newline (PARAGRAPH, CODE, + * U/OLIST, etc). Several of the variable length tokens (like ordered + * list numbers, header level, etc) enter a "PARSE" mode where special + * action is taken until the parsing is done (usually on ' ', but for + * links on '|') + */ int c; enum Block in = NONE; + enum Block in_link = NONE; int hlvl = 0; int in_code; + int escape = 0; while ((c = getchar()) != EOF) { + /* Handle Escapes before all else */ + if (escape) { + if (in == NONE) { + in = PARAGRAPH; + printf("<p>\n"); + } + putchar(c); + escape = 0; + continue; + } + + /* Reset special "potential" states */ + if (c != '[' && in_link == POTENTIAL_LINK) { + in_link = NONE; + } else if (c != ']' && in_link == POTENTIAL_LINK_END) { + in_link = NONE; + } + + /* Handle unique state changes by char */ switch (c) { + case '\\': + escape = 1; + break; case '#': if (in == NONE) { in = HEADER_PARSE; @@ -120,7 +148,7 @@ int parse() { break; case ' ': if (in == HEADER_PARSE) { - printf("</h%d>", hlvl); + printf("<h%d>", hlvl); in = HEADER; } else if (in == ULIST_START) { printf("<ul>\n <li>"); @@ -155,6 +183,8 @@ int parse() { printf("<code><pre>\n"); } else if (in == CODE_BREAK) { in = CODE; + } else { + putchar(c); } break; case '>': @@ -163,6 +193,45 @@ int parse() { printf("<blockquote>\n"); } else if (in == QUOTE_BREAK) { in = QUOTE; + } else { + putchar(c); + } + break; + case '[': + if (in_link == NONE) { + in_link = POTENTIAL_LINK; + } else if (in_link == POTENTIAL_LINK) { + if (in == NONE) { + /* Assume this is a new paragraph */ + in = PARAGRAPH; + printf("<p>\n"); + } + /* XXX: need to print the first link */ + in_link = LINK_URL_PARSE; + printf("<a href=\""); + } else if (in_link == LINK_URL_PARSE) { + /* TODO: save to linkbuf to support [[url]] links? */ + putchar(c); + } else { + putchar(c); + } + break; + case '|': + if (in_link == LINK_URL_PARSE) { + in_link = LINK_DESC_PARSE; + printf("\">"); + } else { + putchar(c); + } + break; + case ']': + if (in_link == LINK_DESC_PARSE) { + in_link = POTENTIAL_LINK_END; + } else if (in_link == POTENTIAL_LINK_END) { + in_link = NONE; + printf("</a>"); + } else { + putchar(c); } break; case '*':