''' Created on May 19, 2014 @author: Brett Paufler Copyright Brett Paufler 5-20-14 2pm start 4:30start 2.5hours from start to something graphical 7:30 - 8:30 1 hour 5x charts 10-:30 11 .5 hr correlation 4 hours, done with oil Eh, not quite, +1 more hour for scatter and a few more rolling correlations ''' import pandas import matplotlib.pyplot as plt ''' Could work as a function by why bother? vary these four variables and should work fairly well for other commodity types commodityDataFile IS location of csv file cN IS header name for commodity in that file cL IS the name desired on graphical output normalization factor IS rough factor difference between commodity and corn default values for OIL ARE commodityDataFile = r"rawCommodityData/RWTC-WTI-Crude-Oil-Spot-Price-Cushing-OK-FOB.csv" cN = "Oil" cL = "Price of Oil in Dollars" normalizationFactor = 5 #FOR REFERENCE, VALUES TO BELOW WORK FOR OIL commodityDataFile = r"rawCommodityData/RWTC-WTI-Crude-Oil-Spot-Price-Cushing-OK-FOB.csv" cN = "Oil" cL = "Price of Oil in Dollars" normalizationFactor = 5 ''' #Modify These Four to Transfer to Another Commodity commodityDataFile = r"rawCommodityData/RWTC-WTI-Crude-Oil-Spot-Price-Cushing-OK-FOB.csv" cN = "Oil" cL = "Price of Oil in Dollars" normalizationFactor = 5 #From Here down, be careful about modifying anything cornDataFile = r"./cornOneDataFile/compositeCorn.txt" cornData = pandas.read_csv(cornDataFile, index_col='Date', parse_dates=['Date']) print cornData otherData = pandas.read_csv(commodityDataFile, index_col='Date', parse_dates=['Date']) print otherData '''merging the Data frames cornData is left otherData is right how='inner' means the intersection of the left_index=True means left_index is used as value to merge the left table on right_index=True means right_index is used as the value to merge the right table on #Old not used anymore on="Date", Date value in both dataFrames ''' mergedData = cornData.merge(otherData, how='inner', left_index="Date", right_index=True) del mergedData["Open"] del mergedData["High"] del mergedData["Low"] del mergedData["Volume"] del mergedData["Interest"] mD = mergedData #Prepping the DateYear Value (extra, stringify, slice) mD["dateYear"] = mD.index mD["dateYear"] = mD["dateYear"].apply(str) mD["dateYear"] = mD["dateYear"].str.slice(0,4,1) #much faster, in onde step mD["futureYear"] = mD["Future"].str.replace("CZ", "") #This is the Filter Logic mD = mD[mD["dateYear"] == mD["futureYear"]] #delete extraneous del mD["dateYear"] del mD["futureYear"] del mD["Future"] mD[cL] = mD[cN] del mD[cN] mD["Price of Corn in Dollars"] = mD["Settle"] del mD["Settle"] ''' mD is the raw dataFrame, complete and full at this point index = date Price of Corn in Dollars Price of Whatever in Dollars ''' print mD #Making and Printing the Charts plt.rcParams['figure.figsize'] = 20,10 fig = mD.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_Line.png" print "Working On " + outputName plt.savefig(outputName) fig = mD.boxplot() outputName = "./chartsOut/Corn_vs_" + cN + "_BoxPlot.png" print "Working On " + outputName plt.savefig(outputName) corrData = pandas.rolling_corr(mD["Price of Corn in Dollars"], mD["Price of Oil in Dollars"], window=120) corrThirty = pandas.rolling_corr(mD["Price of Corn in Dollars"], mD["Price of Oil in Dollars"], window=30) corrYear = pandas.rolling_corr(mD["Price of Corn in Dollars"], mD["Price of Oil in Dollars"], window=360) corrThreeYear = pandas.rolling_corr(mD["Price of Corn in Dollars"], mD["Price of Oil in Dollars"], window=1080) factor = normalizationFactor if factor: fS = "Price of Oil in %dx Dollars" % factor mD[fS] = mD[cL] * factor del mD[cL] mD["temp"] = mD["Price of Corn in Dollars"] del mD["Price of Corn in Dollars"] mD["Price of Corn in Dollars"] = mD["temp"] del mD["temp"] fig = mD.plot() outputName = "./chartsOut/Corn_vs_%s_%dx_Line.png" % (cN, factor) print "Working On " + outputName plt.savefig(outputName) fig = mD.boxplot() outputName = "./chartsOut/Corn_vs_%s_%dx_BoxPlot.png" % (cN, factor) print "Working On " + outputName plt.savefig(outputName) mD[cL] = mD[fS] / factor del mD[fS] print mD else: print "No Factor" #This would often work better reversed... mD["Ratio Price Corn / Price Oil"] = mD["Price of Corn in Dollars"] / mD[cL] del mD["Price of Corn in Dollars"] del mD[cL] fig = mD.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_Ratio_Line.png" print "Working On " + outputName plt.savefig(outputName) fig = mD.boxplot() outputName = "./chartsOut/Corn_vs_" + cN + "_Ratio_BoxPlot.png" print "Working On " + outputName plt.savefig(outputName) print "Printing mD" print mD #preFormatting the corrData dataFrames print "Printing corrData" corrData = pandas.DataFrame(corrData) corrData.columns = ["Correlation Oil to Corn",] print corrData print "Printing corrThirty" corrThirty = pandas.DataFrame(corrThirty) corrThirty.columns = ["Correlation Oil to Corn",] print corrThirty print "Printing corrYear" corrYear = pandas.DataFrame(corrYear) corrYear.columns = ["Correlation Oil to Corn",] print corrYear print "Printing corrYear" corrThreeYear = pandas.DataFrame(corrThreeYear) corrThreeYear.columns = ["Correlation Oil to Corn",] print corrThreeYear #line plot corr 360, year long fig = corrThreeYear.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_1080_Rolling_Correlation.png" print "Working On " + outputName plt.savefig(outputName) #line plot corr 360, year long fig = corrYear.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_360_Rolling_Correlation.png" print "Working On " + outputName plt.savefig(outputName) #line plot corr 30 fig = corrThirty.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_30_Rolling_Correlation.png" print "Working On " + outputName plt.savefig(outputName) #line plot, corr 120 fig = corrData.plot() outputName = "./chartsOut/Corn_vs_" + cN + "_120_Rolling_Correlation.png" print "Working On " + outputName plt.savefig(outputName) #scatter plot corr 120 plt.scatter(corrData.index, corrData["Correlation Oil to Corn"] ) outputName = "./chartsOut/Corn_vs_" + cN + "_120_Rolling_Scatter.png" print "Working On " + outputName plt.savefig(outputName) print corrThirty print "End of cornToCommodity"