nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit bd43c075db3d89805f5c102e02f2bc854a9d9354 (patch)
parent 413ca68a9606500ea8766ad7c4d1d0cf91c1287d
Author: Alex Karle <alex@alexkarle.com>
Date:   Fri, 17 Dec 2021 00:16:16 -0500

Simplify links, add bold, italic, and html escapes

Diffstat:
Mblag.c | 125++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 79 insertions(+), 46 deletions(-)

diff --git a/blag.c b/blag.c @@ -10,7 +10,7 @@ * * <p> tags around paragraphs * - * * bullets (non-nested), breaks allowed if indented + * - bullets (non-nested), breaks allowed if indented * * 1. numerical lists (non-nested), breaks allowed if indented * @@ -18,11 +18,11 @@ * * TAB code blocks * - * Inline `code` + * Inline `code`, _italics_, and *bold* * * Escaping via \ * - * [[url|Links]] + * [url Links] * * TODO: * ----- @@ -52,10 +52,30 @@ enum Block { OLIST_BREAK, LINK_URL_PARSE, LINK_DESC_PARSE, - POTENTIAL_LINK, - POTENTIAL_LINK_END }; +void putesc(int c) { + switch (c) { + case '<': + printf("&lt;"); + break; + case '>': + printf("&gt;"); + break; + case '&': + printf("&amp;"); + break; + case '"': + printf("&quot;"); + break; + case '\'': + printf("&#39;"); + break; + default: + putchar(c); + } +} + int closeblock(int in, int hlvl) { if (in == HEADER) { in = NONE; @@ -100,14 +120,15 @@ int parse() { * continue or truly be broken on a second newline (PARAGRAPH, CODE, * U/OLIST, etc). Several of the variable length tokens (like ordered * list numbers, header level, etc) enter a "PARSE" mode where special - * action is taken until the parsing is done (usually on ' ', but for - * links on '|') + * action is taken until the parsing is done (usually on ' ') */ int c; enum Block in = NONE; enum Block in_link = NONE; int hlvl = 0; - int in_code; + int in_code = 0; + int in_ital = 0; + int in_bold = 0; int escape = 0; while ((c = getchar()) != EOF) { /* Handle Escapes before all else */ @@ -116,18 +137,11 @@ int parse() { in = PARAGRAPH; printf("<p>\n"); } - putchar(c); + putesc(c); escape = 0; continue; } - /* Reset special "potential" states */ - if (c != '[' && in_link == POTENTIAL_LINK) { - in_link = NONE; - } else if (c != ']' && in_link == POTENTIAL_LINK_END) { - in_link = NONE; - } - /* Handle unique state changes by char */ switch (c) { case '\\': @@ -141,7 +155,7 @@ int parse() { hlvl++; } else { /* not a special # */ - putchar(c); + putesc(c); } break; case ' ': @@ -168,17 +182,48 @@ int parse() { } else if (in == OLIST_BREAK) { /* assume it's a continuation! */ in = OLIST; + } else if (in_link == LINK_URL_PARSE) { + in_link = LINK_DESC_PARSE; + printf("\">"); } else { - putchar(c); + putesc(c); } break; case '`': - if (in_code) { - printf("</code>"); - in_code = 0; + if (in_link == NONE) { + if (in_code) { + printf("</code>"); + in_code = 0; + } else { + printf("<code>"); + in_code = 1; + } + } + break; + case '*': + if (!in_code && in_link == NONE) { + if (in_bold) { + printf("</strong>"); + in_bold = 0; + } else { + printf("<strong>"); + in_bold = 1; + } + } else { + putesc(c); + } + break; + case '_': + if (!in_code && in_link == NONE) { + if (in_ital) { + printf("</em>"); + in_ital = 0; + } else { + printf("<em>"); + in_ital = 1; + } } else { - printf("<code>"); - in_code = 1; + putesc(c); } break; case '\t': @@ -188,7 +233,7 @@ int parse() { } else if (in == CODE_BREAK) { in = CODE; } else { - putchar(c); + putesc(c); } break; case '>': @@ -198,13 +243,11 @@ int parse() { } else if (in == QUOTE_BREAK) { in = QUOTE; } else { - putchar(c); + putesc(c); } break; case '[': if (in_link == NONE) { - in_link = POTENTIAL_LINK; - } else if (in_link == POTENTIAL_LINK) { if (in == NONE) { /* Assume this is a new paragraph */ in = PARAGRAPH; @@ -214,31 +257,21 @@ int parse() { in_link = LINK_URL_PARSE; printf("<a href=\""); } else if (in_link == LINK_URL_PARSE) { - /* TODO: save to linkbuf to support [[url]] links? */ - putchar(c); + /* TODO: save to linkbuf to support [url] links? */ + putesc(c); } else { - putchar(c); - } - break; - case '|': - if (in_link == LINK_URL_PARSE) { - in_link = LINK_DESC_PARSE; - printf("\">"); - } else { - putchar(c); + putesc(c); } break; case ']': if (in_link == LINK_DESC_PARSE) { - in_link = POTENTIAL_LINK_END; - } else if (in_link == POTENTIAL_LINK_END) { in_link = NONE; printf("</a>"); } else { - putchar(c); + putesc(c); } break; - case '*': + case '-': if (in == NONE) { in = ULIST_START; } else if (in == ULIST_START || in == ULIST_PARSE) { @@ -247,7 +280,7 @@ int parse() { /* next list item */ in = ULIST_PARSE; } else { - putchar(c); + putesc(c); } break; case '1': @@ -268,13 +301,13 @@ int parse() { /* next list item */ in = OLIST_PARSE; } else { - putchar(c); + putesc(c); } break; case '\n': in = closeblock(in, hlvl); if (in != ULIST_BREAK && in != OLIST_BREAK) { - putchar(c); + putesc(c); } break; default: @@ -286,7 +319,7 @@ int parse() { /* We thought it might be the end, but it aint! */ in = PARAGRAPH; } - putchar(c); + putesc(c); break; } }