''' Created on Dec 30, 2014 @author: Brett Paufler Copyright Brett Paufler ''' import urllib import pickle import os import sets from reddit_offline import quick_report from reddit_offline import thousands_to_dataframe sD = "save_directory_path" manifest_path = sD + "manifest.txt" def load_manifest(): ''' ''' if not os.path.isfile(manifest_path): return sets.Set([]) else: with open(manifest_path,"r") as f: return pickle.load(f) def save_manifest(data): ''' ''' with open(manifest_path,"w") as f: pickle.dump(data, f) if __name__ == "__main__": dF = thousands_to_dataframe(last_first=False, max_num_files=1) iF = dF[dF["url"].str.endswith('.jpg')] quick_report(iF, 5, 10) imgList = iF.url.tolist()[:1] print imgList print "Load Manifest" manifest = load_manifest() print "manifest: %r" % manifest for img in imgList: sN = sD + img.split("/")[-1] if img not in manifest: try: urllib.urlretrieve(img, sN) manifest.update([img]) #a = 1/0 except ZeroDivisionError: #TODO, clearly wrong print "Something" else: print "Image Already Saved: %s" % img print sN print "manifest: %r" % manifest save_manifest(manifest) #print "Here" #print manifest