''' Created on Mar 27, 2014 @author: Brett Paufler Copyright Brett Paufler 3-27-14 ''' import os import urllib2 ''' STEP ONE Gets the Base Web Pages from CALottery.com Addresses are hardwired ''' #standard two tier, make directory subroutine def makeOutputDirectory(a,letter=0): print "makeOutputDirectory called" if letter: c = a + ".\\" + letter + ".\\" else: c = a if not os.path.exists(c): os.makedirs(c) print "Directory " + c + " was created" else: print "Directory " + c + " already existed" #makeOutputDirectory("lottery") #downloads a webpage, should work for any webpage by feeding it seperate inputs #Note: it will be a statci web page, not dynamic, links not included #So, often no pictures def downloadWebpage(thisURL="http://www.calottery.com", name="caLottery.html", dirThis="lottery"): print "downlaodWebpage called" if dirThis == "": localName = name else: localName = ".//" + dirThis + ".//" + name imgurHomeURL = thisURL imgurPage = urllib2.urlopen(imgurHomeURL) LocalWegPage = open(localName, 'wb') LocalWegPage.write(imgurPage.read()) LocalWegPage.close() if os.path.exists(localName): print thisURL + " webpage saved as " + localName def downloadBaseHTML(): '''downloads the first batch of html webpages the scratchers download relies on the documents downloaded in this function so this runs first ''' print 'downloadBaseHTML() starting' #creates lottery director if it doesn't exist makeOutputDirectory("lottery") #downloads MainPage downloadWebpage() #downloadsAppropriate Scratcher pages downloadWebpage("http://www.calottery.com/play/scratchers-games/$1-scratchers","scratch1.html") downloadWebpage("http://www.calottery.com/play/scratchers-games/$2-scratchers","scratch2.html") downloadWebpage("http://www.calottery.com/play/scratchers-games/$3-scratchers","scratch3.html") downloadWebpage("http://www.calottery.com/play/scratchers-games/$5-scratchers","scratch5.html") downloadWebpage("http://www.calottery.com/play/scratchers-games/$10-scratchers","scratch10.html") downloadWebpage("http://www.calottery.com/play/scratchers-games/$20-scratchers","scratch20.html") #downloads Draw Games downloadWebpage("http://www.calottery.com/play/draw-games","DRAWdrawGames.html") downloadWebpage("http://www.calottery.com/play/draw-games/powerball","DRAW-powerball.html") downloadWebpage("http://www.calottery.com/play/draw-games/mega-millions","DRAW-megaMillions.html") downloadWebpage("http://www.calottery.com/play/draw-games/superlotto-plus","DRAW-superLotto.html") downloadWebpage("http://www.calottery.com/play/draw-games/fantasy-5","DRAW-fantasy5.html") downloadWebpage("http://www.calottery.com/play/draw-games/daily-4","DRAW-daily4.html") downloadWebpage("http://www.calottery.com/play/draw-games/daily-3","DRAW-daily3.html") downloadWebpage("http://www.calottery.com/play/draw-games/daily-derby","DRAW-dailyDerby.html") downloadWebpage("http://www.calottery.com/play/draw-games/hot-spot","DRAW-hotSpot.html") #downloades Fact Sheets, Draw Games Odds downloadWebpage("http://www.calottery.com/media/fact-sheets/games","drawGamesOdds.html") print "Down Load Base Web Pages Run and Ended Successfully" print 'downloadBaseHTML() ending' if __name__ == "__main__": downloadBaseHTML()