'''
Created on Apr 27, 2014
@author: Brett Paufler
for input in
baseDirectory
./twelfthCentury/
.doc files as input
DIRECTIONS
1) make base file (name of which is unimportant)
2) put this twelfthCentury.py program in base file
3) make sub directory folder in base file named "twelfthCentury"
4) place raw .doc files in this subdirectory
5) run twelfthCentury.py
WHAT HAPPENS:
1) a html file is made for each doc file
2) {{{Chapter End}}} in each .html (but not in .doc) is replaced with various links
3) twelfthCentury.html is created
with a link for each .html file in the directory
along with a .doc and .pdf link for each .html
NO PROVISION IS MADE FOR CREATING THE PDF's
ASSUMED TO BE MADE BY OTHER PROGRAM
'''
import win32com.client
import os
import fileinput
print "twelfthCentury.py Started"
#everything will take place in this directory
#No sub directory named thus, nothing will happen
#so .py is safe and can be stored with website
twelfthCenturyDirectory = "./twelfthCentury"
def allWordToHTML(dirIn=twelfthCenturyDirectory, dirOut=twelfthCenturyDirectory):
'''Converts ALL .doc files in twelfthCentury directory to .txt files
'''
print "allWordToText converting doc to txt"
#Nice Long Call Line to open the Word Document Processor (Word app)
word = win32com.client.gencache.EnsureDispatch('Word.Application')
for fN in os.listdir(dirIn):
if fN[-4:].lower() == ".doc":
print fN
#inFile
fPIn = os.path.abspath(dirIn) + "\\" + fN
print fPIn
#outFile
fPOut = fPIn[:-3] + "html"
print fPOut
#open, save, close - .doc to .txt
doc = word.Documents.Open(fPIn)
'''
7 txt -- UCS-2 little endian
8 html
'''
doc.SaveAs(fPOut, 8) # 7 is a magic number, is stands for txt in this function
doc.Close()
print "allWordToText Finishing"
#End allWordToText
#allWordToHTML()
def addChapterLinksToHTML():
'''
adds links to bottom of html page
replacing
endID = "{{{Chapter End}}}"
with alls sorts of links and information
remainder of document is not effected
'''
print "addChapterLinks called"
endID = "{{{Chapter End}}}"
#assembling the list of htmlFiles
htmlFiles = []
for html in os.listdir(twelfthCenturyDirectory):
if html[-4:] == "html":
if not html == "twelfthCentury.html":
html = twelfthCenturyDirectory + "/" + html
htmlFiles.append(html)
print html
length = len(htmlFiles)
print "length = " + str(length)
for n in range(0,length,1):
print range(0,length,1)
html = htmlFiles[n]
print "Working on " + html
if not n == length - 1:
nextHTML = htmlFiles[n + 1]
link = nextHTML[17:]
link = link.replace(" ", "%20")
print "link =:" + link
name = nextHTML[17:-5]
print "name =:" + name
firstLink = 'Next Chapter
%s' % (link, name)
else:
firstLink = "Thus the Twelfth Century Ends
for now..."
footerText = '''