''' Created on Apr 27, 2014 @author: Brett Paufler for input in baseDirectory ./twelfthCentury/ .doc files as input DIRECTIONS 1) make base file (name of which is unimportant) 2) put this twelfthCentury.py program in base file 3) make sub directory folder in base file named "twelfthCentury" 4) place raw .doc files in this subdirectory 5) run twelfthCentury.py WHAT HAPPENS: 1) a html file is made for each doc file 2) {{{Chapter End}}} in each .html (but not in .doc) is replaced with various links 3) twelfthCentury.html is created with a link for each .html file in the directory along with a .doc and .pdf link for each .html NO PROVISION IS MADE FOR CREATING THE PDF's ASSUMED TO BE MADE BY OTHER PROGRAM ''' import win32com.client import os import fileinput print "twelfthCentury.py Started" #everything will take place in this directory #No sub directory named thus, nothing will happen #so .py is safe and can be stored with website twelfthCenturyDirectory = "./twelfthCentury" def allWordToHTML(dirIn=twelfthCenturyDirectory, dirOut=twelfthCenturyDirectory): '''Converts ALL .doc files in twelfthCentury directory to .txt files ''' print "allWordToText converting doc to txt" #Nice Long Call Line to open the Word Document Processor (Word app) word = win32com.client.gencache.EnsureDispatch('Word.Application') for fN in os.listdir(dirIn): if fN[-4:].lower() == ".doc": print fN #inFile fPIn = os.path.abspath(dirIn) + "\\" + fN print fPIn #outFile fPOut = fPIn[:-3] + "html" print fPOut #open, save, close - .doc to .txt doc = word.Documents.Open(fPIn) ''' 7 txt -- UCS-2 little endian 8 html ''' doc.SaveAs(fPOut, 8) # 7 is a magic number, is stands for txt in this function doc.Close() print "allWordToText Finishing" #End allWordToText #allWordToHTML() def addChapterLinksToHTML(): ''' adds links to bottom of html page replacing endID = "{{{Chapter End}}}" with alls sorts of links and information remainder of document is not effected ''' print "addChapterLinks called" endID = "{{{Chapter End}}}" #assembling the list of htmlFiles htmlFiles = [] for html in os.listdir(twelfthCenturyDirectory): if html[-4:] == "html": if not html == "twelfthCentury.html": html = twelfthCenturyDirectory + "/" + html htmlFiles.append(html) print html length = len(htmlFiles) print "length = " + str(length) for n in range(0,length,1): print range(0,length,1) html = htmlFiles[n] print "Working on " + html if not n == length - 1: nextHTML = htmlFiles[n + 1] link = nextHTML[17:] link = link.replace(" ", "%20") print "link =:" + link name = nextHTML[17:-5] print "name =:" + name firstLink = 'Next Chapter
%s' % (link, name) else: firstLink = "Thus the Twelfth Century Ends
for now..." footerText = '''

%s

Twelfth Century Splash Page

Writing Home BrettWords

Brett@Paufler.net
(c) Copyright 2014 Brett Paufler

Terms of Service

''' % firstLink print footerText for line in fileinput.input(html, inplace=True): if endID in line: line = line.replace(line, footerText) print line else: print(line) print "END HTML LINKS ADDED" #addChapterLinksToHTML() def createTwelfthCenturyHTML(): '''uses the .html files as input output is one .html file name is twelfthCentury.html link created for each .html ASSUMES .doc .pdf to match each html ''' print "createTwelfthCenturyHTML started" outFile = "./twelfthCentury/twelfthCentury.html" fileOut = open(outFile, 'w' ) header = ''' The Twelfth Century

The Twelfth Century

by

The Dark Lord Insidious

...complete and unabridged...

High School never lasted so long...

''' # Dividing Line between header and footer footer = '''

Back to Main Writing Site

BrettWords

Disclaimer!
The contents of this site are Fiction.
Any resemblance to the contents within
and any specific or actual person, place, or thing
living or dead
real or imagined
is unintentional and purely coincidental.

No seriously!
Insidious is way too nice of a guy to be a Necromancer.
So, you just know the rest of what he has to say is all lies, as well.
Not that I'd say this to his face, mind you.
The guy gives me the heebie-jeebies.

(c) Copyright 2014 Brett Paufler

Brett@Paufler.net

Terms of Service

These webpages were assembled automatically from MS Word .doc files using the following python script.
twelfthCentury.py

''' #write header fileOut.write(header) #write links for fN in os.listdir("./twelfthCentury"): end = fN[-5:].lower() types = [".html"] if end in types: if not fN == outFile and not fN == "twelfthCentury.html": #no sense linking to itself htmlLink = fN.replace(" ", "%20") pdfLink = htmlLink[:-4] + "pdf" docLink = htmlLink[:-4] + "doc" linkName = fN[:-5] textOut = '''

%s (pdf) (doc)

''' % (htmlLink, linkName, pdfLink, docLink) fileOut.write(textOut) fileOut.write(footer) fileOut.close() print "createTwelfthCenturyHTML ended" #END createPoetryInMotionHTML() #createTwelfthCenturyHTML() def doALL(): allWordToHTML() addChapterLinksToHTML() createTwelfthCenturyHTML() doALL() print "twelfthCentury FINISHED"