''' Created on Apr 10, 2014 @author: Brett Paufler Copyright Brett Paufler TAKES RAW CORN DATA AND PROVIDES GRAPH BASED UPON INPUT VALUE YEARS Time Spent 3-4 hours at first run ''' #import BS import pandas import matplotlib.pyplot as plt def monthDayNumberArray(): '''Returns a default Month Day array for use as a key 01-01 to 12-31, including 02-29, 02-30, 02-31 and so on ''' dates = [] for m in range(1,13,1): if m < 10: m = "0" + str(m) for d in range(1,32,1): if d < 10: d = "0" + str(d) date = str(m) + "-" + str(d) #print date dates.append(date) return dates #print dates def numberToMonth(x): '''x is a month-date string 01-01 returns 'January 01 ''' if x.startswith('01-'): x = "January " + x[3:] elif x.startswith('02-'): x = "February " + x[3:] elif x.startswith('03-'): x = "March " + x[3:] elif x.startswith('04-'): x = "April " + x[3:] elif x.startswith('05-'): x = "May " + x[3:] elif x.startswith('06-'): x = "June " + x[3:] elif x.startswith('07-'): x = "July " + x[3:] elif x.startswith('08-'): x = "August " + x[3:] elif x.startswith('09-'): x = "September " + x[3:] elif x.startswith('10-'): x = "October " + x[3:] elif x.startswith('11-'): x = "November " + x[3:] elif x.startswith('12-'): x = "December " + x[3:] return x def dailyPriceOfIssue(years=["2012", "2013"], full=True): '''Returns a dataFrame: MD = 01-01, 12-31 date index CZXXXX_High = high price per day of selected CZ issues years takes an array object of year data desired (not a range, but a list) drawn automatically from rawPath = "./corn/" default full=True, means all dates are used, even NaN and garbage values False filters out NaN for prettier graphs, but at longer year runs, data is meaningless ''' print "dailyPriceStackedGraphByYears called" #assembles the files to call, the paths to the data rawPath = "./corn/" #create month & day index dates = monthDayNumberArray() dF = pandas.DataFrame(dates, index=dates, columns=["MD"]) #cycles through the issues for year in years: #makes the file path dP = rawPath + "FUTURE_CZ" + year + ".csv" #This is the straight import #sIA = pandas.read_csv(dP, index_col='Date', parse_dates=['Date']) sIA = pandas.read_csv(dP) #yB = yearBegin, yE = yearEnd yB = str(str(year) + "-01-01") yE = str(str(year) + "-12-31") #slices issue down to year yB, yE #and makes sure High isn't zero sIA = sIA[sIA.Date >= yB] sIA = sIA[sIA.Date <= yE] sIA = sIA[sIA.High >= 25] # a little more aggressive than zero #slices the year down to a month day sIA["MD"] = sIA.Date.map(lambda Date: Date[5:]) #Removes unwanted variables #So This is where they would be added back in del sIA["Date"] del sIA["Open Interest"] del sIA["Open"] del sIA["Low"] del sIA["Settle"] del sIA["Volume"] #renaming High sIA["CZ" + year + " High"] = sIA["High"] del sIA["High"] print sIA print "sIA printed" #merges the base MD (01-31) array with the current array if full: dF = pandas.merge(dF, sIA, how='left', left_index=True, right_on=['MD']) else: dF = pandas.merge(dF, sIA, left_index=True, right_on=['MD']) #kills unwanted columns #Cheesy, sure, but throws an error occanssionally try: del dF['MD_x'] except: pass try: del dF['MD_y'] except: pass dF = dF.set_index("MD") print dF print dF print "dailyPriceStackedGraphByYears Ended returned dF" return dF #END dailyPriceStackedGraphByYears #dailyPriceOfIssue() def prettyPrintMonth(dF): ''' takes a dataFrame with 01-01 to 12-31 format and returns one with pretty print January to December format ''' dF['MonthDate'] = dF.index.map(lambda index: numberToMonth(index)) dF = dF.set_index("MonthDate") print dF print 'prettyPrintMonth worked' return dF #prettyPrintMonth(dailyPriceOfIssue()) def graphAlot(toPlot, name): ''' Fast graphing output for various inputs toPlot is a pandas dataFrame name could be whatever, but CZ2000-CZ2014 range will workBest ''' #standardizes the size plt.rcParams['figure.figsize'] = 20,10 print "graphAlot called" fig = toPlot.boxplot() outputName = "./chartsOut/" + name + "_BoxPlot.png" print "Working On " + outputName plt.savefig(outputName) fig = toPlot.plot() outputName = "./chartsOut/" + name + "_Line.png" print "Working On " + outputName plt.savefig(outputName) fig = toPlot.plot(kind="bar") outputName = "./chartsOut/" + name + "_Bar.png" print "Working On " + outputName plt.savefig(outputName) print "graphBox Done output in Charts" print "Base Name is " + name def graphBox(toPlot, name): '''Just the boxPlot graph ''' print "graphBox Done output in Charts" print "Base Name is " + name years= [] for x in range(2000, 2014,1): years.append(str(x)) print years #graphAlot(prettyPrintMonth(dailyPriceOfIssue(years)), "CZ2000-TO-CZ2013_ALL") #graphAlot(prettyPrintMonth(dailyPriceOfIssue(["2010", "2011", "2012", "2013"])), "CZ2010-TO-CZ2013_ALL") #graphAlot(prettyPrintMonth(dailyPriceOfIssue(["2010", "2011", "2012", "2013"], full=False)), "CZ2010-TO-CZ2013_Partial") #graphAlot(prettyPrintMonth(dailyPriceOfIssue(["2012", "2013"])), "CZ2012-TO-CZ2013_ALL") #graphAlot(prettyPrintMonth(dailyPriceOfIssue(["2012", "2013"], full=False)), "CZ2012-TO-CZ2013_Partial") #dailyPriceOfIssue(years) #plt.show() #plt.bar(years,low, color='yellow') print "DONE WITH EVERYTHING"