nihdoc

WIP markup parser (txt -> html)
git clone git://git.alexkarle.com.com/blag
Log | Files | Refs | README | LICENSE

nihdoc.c (5700B) [raw]


      1 /*
      2  * nihdoc.c -- Not Invented Here Doc
      3  *             -   -        -    ---
      4  *
      5  *              ~akarle, MIT License
      6  *
      7  * "because markdown isn't in base!"
      8  */
      9 #include <stdio.h>
     10 #ifndef __PLAN_9
     11 #include <unistd.h>
     12 #endif
     13 #include <stdbool.h>
     14 
     15 /* Global Constants and Enums */
     16 char *FMT_STRS[] = {
     17 	['_'] = "em",
     18 	['*'] = "strong",
     19 	['`'] = "code",
     20 };
     21 
     22 enum Block {
     23 	NONE,
     24 	HEADER,
     25 	HEADER_PARSE,
     26 	PARAGRAPH,
     27 	CODE,
     28 	LIST,
     29 	LIST_PARSE,
     30 };
     31 
     32 enum Link {
     33 	NOL,
     34 	URL_PARSE,
     35 	DESC_PARSE,
     36 	OPT_URL,
     37 };
     38 
     39 
     40 /* Start Global State */
     41 enum Block in = NONE;
     42 enum Link in_link = NOL;
     43 int hlvl = 0;
     44 bool fmts[256] = {false}; /* indexed by _ * ` */
     45 bool escape = false;
     46 char lnkdes[2048] = {0};
     47 int lnkidx = 0;
     48 int indent = 0;
     49 int previndent = 0;
     50 int listdepth = 0;
     51 int lastc = '0';
     52 bool ol = false;
     53 bool linestarted = false;
     54 bool blockquote = false;
     55 
     56 
     57 /* Helper functions */
     58 void
     59 putesc(int c)
     60 {
     61 	switch (c) {
     62 	case '<':
     63 		printf("&lt;");
     64 		break;
     65 	case '>':
     66 		printf("&gt;");
     67 		break;
     68 	case '&':
     69 		printf("&amp;");
     70 		break;
     71 	default:
     72 		putchar(c);
     73 	}
     74 }
     75 
     76 void
     77 newlist(void)
     78 {
     79 	in = LIST;
     80 	previndent = indent;
     81 	printf("<%s>\n<li>\n", ol ? "ol" : "ul");
     82 	listdepth++;
     83 }
     84 
     85 int
     86 endlist(void)
     87 {
     88 	in = LIST;
     89 	previndent = indent;
     90 	printf("</li>\n</%s>\n", ol ? "ol" : "ul");
     91 	return --listdepth;
     92 }
     93 
     94 /*
     95  * All inline types should start the paragraph if no other major type present.
     96  * This function does just that, IFF we aren't already in a major type.
     97  */
     98 void
     99 maybe_startp(void)
    100 {
    101 	if (in == NONE) {
    102 		in = PARAGRAPH;
    103 		printf("<p>\n");
    104 	}
    105 }
    106 
    107 void
    108 handle_lf(void)
    109 {
    110 	indent = 0;
    111 	linestarted = false;
    112 
    113 	/* single line types (one lf to close) */
    114 	if (in == HEADER) {
    115 		in = NONE;
    116 		printf("</h%d>\n", hlvl);
    117 	}
    118 
    119 	/* multi-line types (two lf to close) */
    120 	if (lastc == '\n' || (lastc == '>' && blockquote)) {
    121 		switch (in) {
    122 		case PARAGRAPH:
    123 			printf("</p>\n");
    124 			break;
    125 		case CODE:
    126 			printf("</code></pre>\n");
    127 			break;
    128 		case LIST:
    129 			previndent = 0;
    130 			while (endlist())
    131 				;
    132 			break;
    133 		default:
    134 			break; /* no op */
    135 		}
    136 		in = NONE;
    137 		if (blockquote && lastc == '\n') {
    138 			printf("</blockquote>\n");
    139 			blockquote = false;
    140 		}
    141 	}
    142 }
    143 
    144 /* Returns whether inline styles should be allowed at this moment. */
    145 bool
    146 fmt_disabled(int c)
    147 {
    148 	/* `` blocks all but the next `, likewise CODE makes all disabled */
    149 	if (in == CODE || in_link == URL_PARSE)
    150 		return true;
    151 	else
    152 		return fmts['`'] && c != '`';
    153 }
    154 
    155 void
    156 toggle_format(int c)
    157 {
    158 	if (!fmt_disabled(c)) {
    159 		maybe_startp();
    160 		printf("<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]);
    161 		fmts[c] ^= true;
    162 	} else
    163 		putesc(c);
    164 }
    165 
    166 
    167 /*
    168  * The main state machine [1], abstracted into a function to allow
    169  * playback support for buffered types (i.e. link descriptions)
    170  *
    171  * [1]: home grown spaghetti code
    172  */
    173 void
    174 handlec(int c)
    175 {
    176 	/*
    177 	 * Store link descriptions as we go, skipping the regular loop since
    178 	 * we'll play them back later via recursion on handlec
    179 	 */
    180 	if (in_link == DESC_PARSE && c != ']') {
    181 		lnkdes[lnkidx++] = c;
    182 		return;
    183 	}
    184 
    185 	/* Any character other than a '(' terminates a link at ']' */
    186 	if (in_link == OPT_URL && c != '(') {
    187 		in_link = NOL;
    188 		lnkdes[lnkidx] = '\0';
    189 		printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes);
    190 	}
    191 
    192 	/* Handle Escapes before any other bit of the main switch */
    193 	if (escape) {
    194 		maybe_startp();
    195 		putesc(c);
    196 		escape = false;
    197 		return;
    198 	}
    199 
    200 	/* Store the indentation and return without printing */
    201 	if (!linestarted && c == ' ') {
    202 		indent++;
    203 		return;
    204 	}
    205 
    206 	switch (c) {
    207 	case '\\':
    208 		escape = true;
    209 		break;
    210 	case '#':
    211 		if (in == NONE) {
    212 			in = HEADER_PARSE;
    213 			hlvl = 1;
    214 		} else if (in == HEADER_PARSE)
    215 			hlvl++;
    216 		else
    217 			putesc(c);
    218 		break;
    219 	case ' ':
    220 		if (in == HEADER_PARSE) {
    221 			printf("<h%d>", hlvl);
    222 			in = HEADER;
    223 		} else if (in == LIST_PARSE) {
    224 			if (!listdepth) {
    225 				newlist();
    226 			} else {
    227 				if (previndent < indent) {
    228 					newlist();
    229 				} else if (previndent > indent) {
    230 					endlist();
    231 					printf("</li>\n<li>\n");
    232 				} else {
    233 					in = LIST;
    234 					printf("</li>\n<li>\n");
    235 				}
    236 			}
    237 		} else
    238 			putesc(c);
    239 		break;
    240 	case '*':
    241 	case '`':
    242 	case '_':
    243 		toggle_format(c);
    244 		break;
    245 	case '\t':
    246 		if (in == NONE) {
    247 			in = CODE;
    248 			printf("<pre><code>");
    249 		} else if (lastc != '\n')
    250 			putesc(c);
    251 		break;
    252 	case '>':
    253 		if (in == NONE) {
    254 			in = PARAGRAPH;
    255 			printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n");
    256 			blockquote = true;
    257 		} else if (lastc != '\n')
    258 			putesc(c);
    259 		break;
    260 	case '[':
    261 		if (in_link == NOL && !fmt_disabled(c)) {
    262 			maybe_startp();
    263 			in_link = DESC_PARSE;
    264 			lnkidx = 0;
    265 		} else
    266 			putesc(c);
    267 		break;
    268 	case ']':
    269 		if (in_link == DESC_PARSE)
    270 			in_link = OPT_URL;
    271 		else
    272 			putesc(c);
    273 		break;
    274 	case '(':
    275 		if (in_link == OPT_URL) {
    276 			in_link = URL_PARSE;
    277 			printf("<a href=\"");
    278 		} else
    279 			putesc(c);
    280 		break;
    281 	case ')':
    282 		if (in_link == URL_PARSE) {
    283 			in_link = NOL;
    284 			printf("\">");
    285 			for (int i = 0; i < lnkidx; i++)
    286 				handlec(lnkdes[i]);
    287 			printf("</a>");
    288 		} else
    289 			putesc(c);
    290 		break;
    291 	case '1':
    292 	case '2':
    293 	case '3':
    294 	case '4':
    295 	case '5':
    296 	case '6':
    297 	case '7':
    298 	case '8':
    299 	case '9':
    300 	case '.':
    301 	case '-':
    302 		if (in == NONE || !linestarted) {
    303 			ol = c != '-';
    304 			in = LIST_PARSE;
    305 		} else if (in != LIST_PARSE)
    306 			putesc(c);
    307 		break;
    308 	case '\n':
    309 		handle_lf();
    310 		if (in != NONE)
    311 			putesc(c);
    312 		break;
    313 	default:
    314 		maybe_startp();
    315 		putesc(c);
    316 		break;
    317 	}
    318 	lastc = c;
    319 	linestarted = c != '\n';
    320 }
    321 
    322 
    323 /* nihdoc: a text -> HTML parser */
    324 int
    325 main(int argc, char *argv[])
    326 {
    327 	int c;
    328 
    329 #ifdef __OpenBSD__
    330 	pledge("stdio", "stdio");
    331 #endif
    332 
    333 	if (argc > 1) {
    334 		fprintf(stderr, "error: %s takes no arguments\n", argv[0]);
    335 		return 1;
    336 	}
    337 
    338 	while ((c = getchar()) != EOF)
    339 		handlec(c);
    340 
    341 	/* pretend there's a final LF to close any blocks */
    342 	handle_lf();
    343 
    344 	return 0;
    345 }