''' Created on Apr 18, 2014 @author: Brett Paufler Copyright Brett Paufler 2014 Place py in directory folder with other files Program creates link to all other html files Program assumes that both doc & pdf files with the same base name exist And links to those files as well. In other words, program assumes the existence of three files with the same base name: base.pdf base.doc base.html And creates a master html file as output based on the existence of the html files (note, these files are created from a base.doc, if the 8 steps below are followed) program does not check for existence of all three files: it looks for the .html extension only and if this exists presumes the existence of the other two files Since the output is customized to my needs, it's unlikely this file is of much use for anything other than a template At bottom, commented out: function to convert Word Doc to HTML function to replace {{{ }}} text with one link copyright notice I used Nuance PDF Assist to batch convert the Word Doc files to pdf So, personal notes, 1) Edit doc files 2) add {{{ }}} copyright tags at bottom 3) place word .doc files in docFilesIn directory 4) run allWordDOCtoHTML function as found in DOCConverters module (commented out version below) 5) run archiveArticle_docHTML_addLink module (commented out version below) 6) place copy of this py program archiveArticlesMakeMainHTML in htmlOut directory 7) run this py program in place in that directory 8) rename htmlOut directory and add to website as appropriate, all needed files in base directory And yes, I'm not really that good at commenting yet, as I've only just beginning to get to the point where I want to reuse code that I haven't seen for weeks or months I'm also indecisive on whether it makes any sense to generalize a html creator, or simple take the base I've got going, and copy, paste, customize it as suits my current needs ''' import os def createArchiveArticlesMAINHTML(): '''Input is all doc, pdf, txt files, output is a main, index sort of html poetryInMotion.HTLM that links to all those other documents with some premade text ''' print "createArchiveArticlesHTML() started" outFile = "archivedArticles.html" fileOut = open(outFile, 'w' ) header = ''' Archived Articles

Brett Paufler

Archived Articles

...a selection of writing samples from the 2008 era...

''' fileOut.write(header) footer = '''

Back to Main Writing Site

www.DragonBoundPub.com

Disclaimer!
The contents of this sub-site are my opinion.
More accurately, they were my opinion circa 2008.
Even things that look like fact may not be.
They simply are (or at least, were) my opinion.
I am not a fact checker.
I do not care about 'facts'.
Do not rely on anything I have to say.
This, too, is merely my opinion...

(c) Copyright 2014 Brett Paufler

Brett@Paufler.net

Terms of Service

These webpages were assembled automatically using the following python scripts.
Archive Articles Make the Main HTML Python File

''' #a href meat and potatoes for fN in os.listdir("."): end = fN[-5:].lower() types = [".html"] if end in types: if not fN == outFile: #no sense linking to itself htmlLink = fN.replace(" ", "%20") pdfLink = htmlLink[:-4] + "pdf" docLink = htmlLink[:-4] + "doc" linkName = fN[:-5] fileOut.write('

\npdf\n \n') fileOut.write(linkName) fileOut.write('\n \ndoc\n

') fileOut.write('\n
\n\n') fileOut.write(footer) fileOut.close() print "createArchiveArticlesHTML() ended" #END createArchiveArticlesHTML() createArchiveArticlesMAINHTML() ''' THIS IS INCLUDED FOR REFERENCE - HIGHLY BRITTLE ''' ''' #the Word Doc converter to html import win32com.client import os def allWordDOCtoHTML(dirIn = ".\docFilesIn",dirOut = ".\htmlOut"): #Takes all Microsoft Word .doc's (or docx) files in dirIn #and converts to standard MS format html files in dirOut print "allWordDOCtoHTML staring" makeOutDir(dirOut) word = win32com.client.gencache.EnsureDispatch("Word.Application") #docIn = "tonyHawk.doc" for docIn in os.listdir(dirIn): fP = os.path.abspath(dirIn) + "\\" + docIn print fP if docIn.endswith(".doc"): sN = os.path.abspath(".") + dirOut + "\\"+ docIn[:-3] + "html" elif docIn.endswith(".docx"): sN = os.path.abspath(".") + dirOut + "\\"+ docIn[:-4] + "html" print sN doc = word.Documents.Open(fP) doc.SaveAs(sN, win32com.client.constants.wdFormatHTML) doc.Close() print "allWordDOCtoHTML ending" # END allWordDOCtoHTML allWordDOCtoHTML() ''' ''' textFileWork archiveArticle_docHTML_addLink Since I have a separate py for this to be found in my programs at above directory and am only including it here for back up purposes, I have not reduced this to a function #MS WORD has a save html format type #This takes the saved html file (of the version of word I'm currently using 2003, I believe) #And exports a file closer to what I want out of an html #Complete with custom links and so forth #THIS IS HIGHLY BRITTLE, dependent upon Word, which is dependent upon the computer I am using #DANGER!!! THIS MODIFIES A FILE IN PLACE #COMPLETE ERASURE IS POSSIBLE #SO, ONLY USE ON COPIES, not the last surviving original #if "{{{" in line is the selector footerText is the copy out, likely all you'll ever want to modify import os import fileinput endTagged = False footerText = '

' rawHTMLdir = "./htmlOut" for htmlFile in os.listdir(rawHTMLdir): print "Working on " + htmlFile infile = rawHTMLdir + "/" + htmlFile for line in fileinput.input(infile, inplace=True): if "{{{" in line: if endTagged == False: endTagged = True line = line.replace(line, footerText) print line else: continue elif "}}}" in line: continue else: print(line) print "END" '''