''' Created on Apr 19, 2014 @author: Brett Paufler (c) Copyright Brett Paufler MS WORD has a save html format type This takes the saved html file (of the version of word I'm currently using 2003, I believe) And exports a file closer to what I want out of an html Complete with custom links and so forth #THIS IS HIGHLY BRITTLE, dependent upon Word, which is dependent upon the computer I am using #DANGER!!! THIS MODIFIES A FILE IN PLACE #COMPLETE ERASURE IS POSSIBLE #SO, ONLY USE ON COPIES, not the last surviving original #if "{{{" in line is the selector footerText is the copy out, likely all you'll ever want to modify ''' import os import fileinput endTagged = False footerText = '
Archived Articles

2014 Copyright Brett Paufler
' rawHTMLdir = "./htmlOut" for htmlFile in os.listdir(rawHTMLdir): print "Working on " + htmlFile infile = rawHTMLdir + "/" + htmlFile for line in fileinput.input(infile, inplace=True): if "{{{" in line: if endTagged == False: endTagged = True line = line.replace(line, footerText) print line else: continue elif "}}}" in line: continue else: print(line) endTagged = False print "END"