nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

commit 3ca74bd94fd1d1034dd02d6076b117d48cefa157 (patch)
parent daf51240a0e3d3328842a7314eb375b29d0ae8a8
Author: Alex Karle <alex@alexkarle.com>
Date:   Tue, 28 Dec 2021 00:17:43 -0500

Change link syntax to [desc](url) style

This is one of those things that I think is just much better looking
from the plaintext-source perspective. It's a shame that it's a bit more
complex / inefficient (repeatedly strcat-ing instead of printing/saving
the characters as they come), but IMHO, it's cleaner:

    read about [my link](https://alexkarle.com) mid-sentence

       vs

    read about [https://alexkarle.com my link] mid-sentence

Time to go update my whole site with this syntax. Regex to the rescue!

Diffstat:
MREADME | 2+-
Mnihdoc.1 | 2+-
Mnihdoc.c | 77++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mtest/big.txt | 12++++++------
Atest/link.html | 27+++++++++++++++++++++++++++
Atest/link.txt | 19+++++++++++++++++++
6 files changed, 108 insertions(+), 31 deletions(-)

diff --git a/README b/README @@ -40,7 +40,7 @@ is enforced via pledge(2)). Common usage is: - TAB code blocks - Inline `code`, _italics_, and *bold* - Escaping via \\ -- Links: [https://alexkarle.com my site] or [https://alexkarle.com] +- Links: [my site](https://alexkarle.com) or [https://alexkarle.com] ## TODO diff --git a/nihdoc.1 b/nihdoc.1 @@ -29,7 +29,7 @@ Inline *bold*, _italics_, `code` (bold/italics disabled in code) .It backslash to escape .It -[alexkarle.com] bare links and [alexkarle.com description] +[alexkarle.com] bare links and [description](alexkarle.com) .It # headers (each # adds a level) .It diff --git a/nihdoc.c b/nihdoc.c @@ -7,6 +7,7 @@ #include <stdio.h> #include <unistd.h> #include <stdbool.h> +#include <string.h> /* Global Constants and Enums */ @@ -30,6 +31,7 @@ enum Link { NOL, URL_PARSE, DESC_PARSE, + OPT_URL, }; @@ -40,8 +42,7 @@ enum Link in_link = NOL; int hlvl = 0; bool fmts[256] = {false}; /* indexed by _ * ` */ bool escape = false; -char lnkbuf[2048] = {0}; -int lnkidx = 0; +char lnkdes[2048] = {0}; int indent = 0; int previndent = 0; int listdepth = 0; @@ -93,10 +94,10 @@ void handle_lf() { printf("</h%d>\n", hlvl); } - /* terminate url parsing in links */ - if (in_link == URL_PARSE) { - in_link = DESC_PARSE; - printf("\">"); + /* Check for [url] style links */ + if (in_link == OPT_URL) { + in_link = NOL; + printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes); } /* multi-line types (two lf to close) */ @@ -139,10 +140,34 @@ void toggle_format() { } } +void pushbuf() { + /* push description onto lnkdes, with formatting! */ + char buf[12] = {0}; + if (escape) { + buf[0] = c; + escape = false; + } else if (c == '`' || c == '_' || c == '*') { + snprintf(buf, 12, "<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); + fmts[c] ^= true; + } else if (c == '\\') { + escape = true; + } else { + buf[0] = c; + } + strcat(lnkdes, buf); +} + int parse() { /* Mini state machine (home grown spaghetti code) */ while ((c = getchar()) != EOF) { - /* Handle Escapes before all else */ + /* Store links as we go, before the main loop */ + if (in_link == DESC_PARSE && c != ']') { + pushbuf(); + /* skip the regular loop -- pushbuf handles formatting */ + continue; + } + + /* Handle Escapes before any other bit of the main loop */ if (escape) { maybe_startp(); putesc(c); @@ -150,11 +175,6 @@ int parse() { continue; } - /* Store links as we go */ - if (in_link == URL_PARSE && c != ']') { - lnkbuf[lnkidx++] = c; - } - /* Store the indentation */ if (!linestarted && c == ' ') { indent++; @@ -181,9 +201,10 @@ int parse() { if (in == HEADER_PARSE) { printf("<h%d>", hlvl); in = HEADER; - } else if (in_link == URL_PARSE) { - in_link = DESC_PARSE; - printf("\">"); + } else if (in_link == OPT_URL) { + /* [url] style links */ + in_link = NOL; + printf("<a href=\"%s\">%s</a> ", lnkdes, lnkdes); } else if (in == LIST_PARSE) { if (!listdepth) { newlist(); @@ -232,22 +253,32 @@ int parse() { case '[': if (in_link == NOL && !fmt_disabled()) { maybe_startp(); + in_link = DESC_PARSE; + lnkdes[0] = '\0'; + } else { + putesc(c); + } + break; + case ']': + if (in_link == DESC_PARSE) { + in_link = OPT_URL; + } else { + putesc(c); + } + break; + case '(': + if (in_link == OPT_URL) { + /* don't print it, start parsing */ in_link = URL_PARSE; - lnkidx = 0; printf("<a href=\""); } else { putesc(c); } break; - case ']': + case ')': if (in_link == URL_PARSE) { - /* no description */ - in_link = NOL; - lnkbuf[lnkidx] = '\0'; - printf("\">%s</a>", lnkbuf); - } else if (in_link == DESC_PARSE) { in_link = NOL; - printf("</a>"); + printf("\">%s</a>", lnkdes); } else { putesc(c); } diff --git a/test/big.txt b/test/big.txt @@ -27,7 +27,7 @@ that wraps nicely This is a _paragraph_ *dangit* with links to [https://alexkarle.com] and -my [gopher://alexkarle.com gopherhole]. +my [gopherhole](gopher://alexkarle.com). > this is a block quote > that wraps! @@ -41,14 +41,14 @@ my [gopher://alexkarle.com gopherhole]. cheers! -[gopher://alexkarle.com use `sacc(1)`] +[use `sacc(1)`](gopher://alexkarle.com) -[https://example.com this is a link -with a newline!] +[this is a link +with a newline!](https://example.com) -[https://example.com +[ this is another link -with a newline!] +with a newline!](https://example.com) > This is a block quote > diff --git a/test/link.html b/test/link.html @@ -0,0 +1,27 @@ +<p> +<a href="https://alexkarle.com">https://alexkarle.com</a> +</p> +<p> +<a href="https://alexkarle.com">link <code>with code</code> <strong>and bold</strong></a> +</p> +<p> +<a href="https://alexkarle.com">link with *escape</a> +</p> +<p> +this is a paragraph about +<a href="https://alexkarle.com">https://alexkarle.com</a> +yo! +</p> +<p> +this is a paragraph about +<a href="https://alexkarle.com">my site</a> +yo! +</p> +<p> +this is a paragraph about +<a href="https://alexkarle.com">https://alexkarle.com</a> yo! +</p> +<p> +this is a paragraph about +<a href="https://alexkarle.com">my site</a> yo! +</p> diff --git a/test/link.txt b/test/link.txt @@ -0,0 +1,19 @@ +[https://alexkarle.com] + +[link `with code` *and bold*](https://alexkarle.com) + +[link with \*escape](https://alexkarle.com) + +this is a paragraph about +[https://alexkarle.com] +yo! + +this is a paragraph about +[my site](https://alexkarle.com) +yo! + +this is a paragraph about +[https://alexkarle.com] yo! + +this is a paragraph about +[my site](https://alexkarle.com) yo!