nihdoc.c (5700B) [raw]
1 /* 2 * nihdoc.c -- Not Invented Here Doc 3 * - - - --- 4 * 5 * ~akarle, MIT License 6 * 7 * "because markdown isn't in base!" 8 */ 9 #include <stdio.h> 10 #ifndef __PLAN_9 11 #include <unistd.h> 12 #endif 13 #include <stdbool.h> 14 15 /* Global Constants and Enums */ 16 char *FMT_STRS[] = { 17 ['_'] = "em", 18 ['*'] = "strong", 19 ['`'] = "code", 20 }; 21 22 enum Block { 23 NONE, 24 HEADER, 25 HEADER_PARSE, 26 PARAGRAPH, 27 CODE, 28 LIST, 29 LIST_PARSE, 30 }; 31 32 enum Link { 33 NOL, 34 URL_PARSE, 35 DESC_PARSE, 36 OPT_URL, 37 }; 38 39 40 /* Start Global State */ 41 enum Block in = NONE; 42 enum Link in_link = NOL; 43 int hlvl = 0; 44 bool fmts[256] = {false}; /* indexed by _ * ` */ 45 bool escape = false; 46 char lnkdes[2048] = {0}; 47 int lnkidx = 0; 48 int indent = 0; 49 int previndent = 0; 50 int listdepth = 0; 51 int lastc = '0'; 52 bool ol = false; 53 bool linestarted = false; 54 bool blockquote = false; 55 56 57 /* Helper functions */ 58 void 59 putesc(int c) 60 { 61 switch (c) { 62 case '<': 63 printf("<"); 64 break; 65 case '>': 66 printf(">"); 67 break; 68 case '&': 69 printf("&"); 70 break; 71 default: 72 putchar(c); 73 } 74 } 75 76 void 77 newlist(void) 78 { 79 in = LIST; 80 previndent = indent; 81 printf("<%s>\n<li>\n", ol ? "ol" : "ul"); 82 listdepth++; 83 } 84 85 int 86 endlist(void) 87 { 88 in = LIST; 89 previndent = indent; 90 printf("</li>\n</%s>\n", ol ? "ol" : "ul"); 91 return --listdepth; 92 } 93 94 /* 95 * All inline types should start the paragraph if no other major type present. 96 * This function does just that, IFF we aren't already in a major type. 97 */ 98 void 99 maybe_startp(void) 100 { 101 if (in == NONE) { 102 in = PARAGRAPH; 103 printf("<p>\n"); 104 } 105 } 106 107 void 108 handle_lf(void) 109 { 110 indent = 0; 111 linestarted = false; 112 113 /* single line types (one lf to close) */ 114 if (in == HEADER) { 115 in = NONE; 116 printf("</h%d>\n", hlvl); 117 } 118 119 /* multi-line types (two lf to close) */ 120 if (lastc == '\n' || (lastc == '>' && blockquote)) { 121 switch (in) { 122 case PARAGRAPH: 123 printf("</p>\n"); 124 break; 125 case CODE: 126 printf("</code></pre>\n"); 127 break; 128 case LIST: 129 previndent = 0; 130 while (endlist()) 131 ; 132 break; 133 default: 134 break; /* no op */ 135 } 136 in = NONE; 137 if (blockquote && lastc == '\n') { 138 printf("</blockquote>\n"); 139 blockquote = false; 140 } 141 } 142 } 143 144 /* Returns whether inline styles should be allowed at this moment. */ 145 bool 146 fmt_disabled(int c) 147 { 148 /* `` blocks all but the next `, likewise CODE makes all disabled */ 149 if (in == CODE || in_link == URL_PARSE) 150 return true; 151 else 152 return fmts['`'] && c != '`'; 153 } 154 155 void 156 toggle_format(int c) 157 { 158 if (!fmt_disabled(c)) { 159 maybe_startp(); 160 printf("<%s%s>", fmts[c] ? "/" : "", FMT_STRS[c]); 161 fmts[c] ^= true; 162 } else 163 putesc(c); 164 } 165 166 167 /* 168 * The main state machine [1], abstracted into a function to allow 169 * playback support for buffered types (i.e. link descriptions) 170 * 171 * [1]: home grown spaghetti code 172 */ 173 void 174 handlec(int c) 175 { 176 /* 177 * Store link descriptions as we go, skipping the regular loop since 178 * we'll play them back later via recursion on handlec 179 */ 180 if (in_link == DESC_PARSE && c != ']') { 181 lnkdes[lnkidx++] = c; 182 return; 183 } 184 185 /* Any character other than a '(' terminates a link at ']' */ 186 if (in_link == OPT_URL && c != '(') { 187 in_link = NOL; 188 lnkdes[lnkidx] = '\0'; 189 printf("<a href=\"%s\">%s</a>", lnkdes, lnkdes); 190 } 191 192 /* Handle Escapes before any other bit of the main switch */ 193 if (escape) { 194 maybe_startp(); 195 putesc(c); 196 escape = false; 197 return; 198 } 199 200 /* Store the indentation and return without printing */ 201 if (!linestarted && c == ' ') { 202 indent++; 203 return; 204 } 205 206 switch (c) { 207 case '\\': 208 escape = true; 209 break; 210 case '#': 211 if (in == NONE) { 212 in = HEADER_PARSE; 213 hlvl = 1; 214 } else if (in == HEADER_PARSE) 215 hlvl++; 216 else 217 putesc(c); 218 break; 219 case ' ': 220 if (in == HEADER_PARSE) { 221 printf("<h%d>", hlvl); 222 in = HEADER; 223 } else if (in == LIST_PARSE) { 224 if (!listdepth) { 225 newlist(); 226 } else { 227 if (previndent < indent) { 228 newlist(); 229 } else if (previndent > indent) { 230 endlist(); 231 printf("</li>\n<li>\n"); 232 } else { 233 in = LIST; 234 printf("</li>\n<li>\n"); 235 } 236 } 237 } else 238 putesc(c); 239 break; 240 case '*': 241 case '`': 242 case '_': 243 toggle_format(c); 244 break; 245 case '\t': 246 if (in == NONE) { 247 in = CODE; 248 printf("<pre><code>"); 249 } else if (lastc != '\n') 250 putesc(c); 251 break; 252 case '>': 253 if (in == NONE) { 254 in = PARAGRAPH; 255 printf("%s", blockquote ? "<p>\n" : "<blockquote>\n<p>\n"); 256 blockquote = true; 257 } else if (lastc != '\n') 258 putesc(c); 259 break; 260 case '[': 261 if (in_link == NOL && !fmt_disabled(c)) { 262 maybe_startp(); 263 in_link = DESC_PARSE; 264 lnkidx = 0; 265 } else 266 putesc(c); 267 break; 268 case ']': 269 if (in_link == DESC_PARSE) 270 in_link = OPT_URL; 271 else 272 putesc(c); 273 break; 274 case '(': 275 if (in_link == OPT_URL) { 276 in_link = URL_PARSE; 277 printf("<a href=\""); 278 } else 279 putesc(c); 280 break; 281 case ')': 282 if (in_link == URL_PARSE) { 283 in_link = NOL; 284 printf("\">"); 285 for (int i = 0; i < lnkidx; i++) 286 handlec(lnkdes[i]); 287 printf("</a>"); 288 } else 289 putesc(c); 290 break; 291 case '1': 292 case '2': 293 case '3': 294 case '4': 295 case '5': 296 case '6': 297 case '7': 298 case '8': 299 case '9': 300 case '.': 301 case '-': 302 if (in == NONE || !linestarted) { 303 ol = c != '-'; 304 in = LIST_PARSE; 305 } else if (in != LIST_PARSE) 306 putesc(c); 307 break; 308 case '\n': 309 handle_lf(); 310 if (in != NONE) 311 putesc(c); 312 break; 313 default: 314 maybe_startp(); 315 putesc(c); 316 break; 317 } 318 lastc = c; 319 linestarted = c != '\n'; 320 } 321 322 323 /* nihdoc: a text -> HTML parser */ 324 int 325 main(int argc, char *argv[]) 326 { 327 int c; 328 329 #ifdef __OpenBSD__ 330 pledge("stdio", "stdio"); 331 #endif 332 333 if (argc > 1) { 334 fprintf(stderr, "error: %s takes no arguments\n", argv[0]); 335 return 1; 336 } 337 338 while ((c = getchar()) != EOF) 339 handlec(c); 340 341 /* pretend there's a final LF to close any blocks */ 342 handle_lf(); 343 344 return 0; 345 }