'''
Created on Apr 18, 2014
@author: Brett Paufler
Copyright Brett Paufler 2014
Place py in directory folder with other files
Program creates link to all other html files
Program assumes that both doc & pdf files with the same base name exist
And links to those files as well.
In other words, program assumes the existence of
three files with the same base name:
base.pdf
base.doc
base.html
And creates a master html file as output based on the existence of the html files
(note, these files are created from a base.doc, if the 8 steps below are followed)
program does not check for existence of all three files:
it looks for the .html extension only and if this exists presumes the existence of the other two files
Since the output is customized to my needs,
it's unlikely this file is of much use for anything other than a template
At bottom, commented out:
function to convert Word Doc to HTML
function to replace {{{ }}} text with
one link
copyright notice
I used Nuance PDF Assist to batch convert the Word Doc files to pdf
So, personal notes,
1) Edit doc files
2) add {{{ }}} copyright tags at bottom
3) place word .doc files in docFilesIn directory
4) run allWordDOCtoHTML function as found in DOCConverters module (commented out version below)
5) run archiveArticle_docHTML_addLink module (commented out version below)
6) place copy of this py program archiveArticlesMakeMainHTML in htmlOut directory
7) run this py program in place in that directory
8) rename htmlOut directory and add to website as appropriate, all needed files in base directory
And yes, I'm not really that good at commenting yet, as I've only just beginning to get to the point where
I want to reuse code that I haven't seen for weeks or months
I'm also indecisive on whether it makes any sense to generalize a html creator,
or simple take the base I've got going,
and copy, paste, customize it as suits my current needs
'''
import os
def createArchiveArticlesMAINHTML():
'''Input is all doc, pdf, txt files, output is a main, index sort of html
poetryInMotion.HTLM
that links to all those other documents with some premade text
'''
print "createArchiveArticlesHTML() started"
outFile = "archivedArticles.html"
fileOut = open(outFile, 'w' )
header = '''
Archived Articles
Brett Paufler
Archived Articles
...a selection of writing samples from the 2008 era...
'''
fileOut.write(header)
footer = '''
Disclaimer!
The contents of this sub-site are my opinion.
More accurately, they were my opinion circa 2008.
Even things that look like fact may not be.
They simply are (or at least, were) my opinion.
I am not a fact checker.
I do not care about 'facts'.
Do not rely on anything I have to say.
This, too, is merely my opinion...
'''
#a href meat and potatoes
for fN in os.listdir("."):
end = fN[-5:].lower()
types = [".html"]
if end in types:
if not fN == outFile: #no sense linking to itself
htmlLink = fN.replace(" ", "%20")
pdfLink = htmlLink[:-4] + "pdf"
docLink = htmlLink[:-4] + "doc"
linkName = fN[:-5]
fileOut.write('')
fileOut.write('\n
\n\n')
fileOut.write(footer)
fileOut.close()
print "createArchiveArticlesHTML() ended"
#END createArchiveArticlesHTML()
createArchiveArticlesMAINHTML()
'''
THIS IS INCLUDED FOR REFERENCE - HIGHLY BRITTLE
'''
'''
#the Word Doc converter to html
import win32com.client
import os
def allWordDOCtoHTML(dirIn = ".\docFilesIn",dirOut = ".\htmlOut"):
#Takes all Microsoft Word .doc's (or docx) files in dirIn
#and converts to standard MS format html files in dirOut
print "allWordDOCtoHTML staring"
makeOutDir(dirOut)
word = win32com.client.gencache.EnsureDispatch("Word.Application")
#docIn = "tonyHawk.doc"
for docIn in os.listdir(dirIn):
fP = os.path.abspath(dirIn) + "\\" + docIn
print fP
if docIn.endswith(".doc"):
sN = os.path.abspath(".") + dirOut + "\\"+ docIn[:-3] + "html"
elif docIn.endswith(".docx"):
sN = os.path.abspath(".") + dirOut + "\\"+ docIn[:-4] + "html"
print sN
doc = word.Documents.Open(fP)
doc.SaveAs(sN, win32com.client.constants.wdFormatHTML)
doc.Close()
print "allWordDOCtoHTML ending"
# END allWordDOCtoHTML
allWordDOCtoHTML()
'''
'''
textFileWork
archiveArticle_docHTML_addLink
Since I have a separate py for this to be found in my programs at above directory
and am only including it here for back up purposes,
I have not reduced this to a function
#MS WORD has a save html format type
#This takes the saved html file (of the version of word I'm currently using 2003, I believe)
#And exports a file closer to what I want out of an html
#Complete with custom links and so forth
#THIS IS HIGHLY BRITTLE, dependent upon Word, which is dependent upon the computer I am using
#DANGER!!! THIS MODIFIES A FILE IN PLACE
#COMPLETE ERASURE IS POSSIBLE
#SO, ONLY USE ON COPIES, not the last surviving original
#if "{{{" in line is the selector
footerText is the copy out, likely all you'll ever want to modify
import os
import fileinput
endTagged = False
footerText = ''
rawHTMLdir = "./htmlOut"
for htmlFile in os.listdir(rawHTMLdir):
print "Working on " + htmlFile
infile = rawHTMLdir + "/" + htmlFile
for line in fileinput.input(infile, inplace=True):
if "{{{" in line:
if endTagged == False:
endTagged = True
line = line.replace(line, footerText)
print line
else:
continue
elif "}}}" in line:
continue
else:
print(line)
print "END"
'''