#!/usr/bin/env python3 """ Concatenates a number of input files into a single output file, while performing the following regex substitutions: [[pagebreak]] [nobr[s]] # Substitute spaces with `\ ` to mark as nonbreaking # Doesn't work inside code markdown, alas [nh[x]] \hyphenation{x} # no hyphen, no underscores allowed [ix[x]] \index{x} # index straight up [ixtt[x]] \index{x@\texttt{x}} # index tt fl = footnote link flx = footnote link to example https://beej.us/guide/bgnet/examples/file flr = footnote link to redirect https://beej.us/guide/url/id [fl[link|url]] [link](url)^[url] [flx[link|file]] [link](https://beej.us/guide/bgnet/examples/file)^[https://beej.us/guide/bgnet/examples/file] [flr[link|id]] [link](https://beej.us/guide/url/id)^[https://beej.us/guide/url/id] [flrfc[link|num]] [link](https://tools.ietf.org/html/rfcnum)^[https://tools.ietf.org/html/rfcnum] Also puts a blank line between files. """ import sys import re import preproc_config if len(sys.argv) < 3: print("usage: preproc infile [infile ... ] outputfile", file=sys.stdout) sys.exit(1) infiles = sys.argv[1:-1] outfile = sys.argv[-1] filedata = [] def nobr_replace(mo): return re.sub(r'\s', r'\ ', mo.group(1)) for infile in infiles: fin = open(infile) filedata.append(fin.read()) fin.close() filedata = '\n'.join(filedata) filedata = re.sub(r'\t', " ", filedata, flags=re.DOTALL) filedata = re.sub(r'\[nobr\[(.+?)\]\]', nobr_replace, filedata, flags=re.DOTALL) filedata = re.sub(r'\[\[pagebreak\]\]', r'\\newpage', filedata, flags=re.DOTALL) filedata = re.sub(r'\[nh\[(.+?)\]\]', r'\\hyphenation{\1}', filedata, flags=re.DOTALL) filedata = re.sub(r'\[ix\[(.+?)\]\]', r'\\index{\1}', filedata, flags=re.DOTALL) filedata = re.sub(r'\[ixtt\[(.+?)\]\]', r'\\index{\1@\\texttt{\1}}', filedata, flags=re.DOTALL) filedata = re.sub(r'\[fl\[(.+?)\|(.+?)\]\]', r'[\1](\2)^[\2]', filedata, flags=re.DOTALL) filedata = re.sub(r'\[flx\[(.+?)\|(.+?)\]\]', r'[\1](' + preproc_config.EXAMPLE_URL + r'\2)^[' + preproc_config.EXAMPLE_URL + r'\2]', filedata, flags=re.DOTALL) filedata = re.sub(r'\[flr\[(.+?)\|(.+?)\]\]', r'[\1](https://beej.us/guide/url/\2)^[https://beej.us/guide/url/\2]', filedata, flags=re.DOTALL) filedata = re.sub(r'\[flrfc\[(.+?)\|(.+?)\]\]', r'[\1](https://tools.ietf.org/html/rfc\2)^[https://tools.ietf.org/html/rfc\2]', filedata, flags=re.DOTALL) fout = open(outfile, "w") in_fence = False this_line_fence = False number_lines = False # Go through a line at a time indenting if we're in unnumbered fenced code for line in filedata.splitlines(True): if line.strip()[:3] == '```': number_lines = line.lower().find("numberlines") != -1 this_line_fence = True in_fence = not in_fence else: this_line_fence = False if in_fence and not this_line_fence and not number_lines: fout.write(" ") # indent fout.write(line) fout.close()