''' Created on Jan 7, 2015 @author: Brett Paufler Copyright Brett Paufler ''' from reddit_offline import thousands_to_dataframe from reddit_offline import quick_report import pandas as pd import numpy as np from summarize_text_utility import summarize #This may or not be the name #I got the above utility (not included) from GitHub #Looks like a Tokenizer, offhand, said well after the fact 2020-03-24 dF = thousands_to_dataframe(last_first=True, max_num_files=1) dF = dF[dF.category == 'front_page'] dF = dF.dropna(subset=["selftext"]) dF = dF[:25] quick_report(dF, entries=2, col_width=15) title = dF["selftext"].tolist() title = [t for t in title if t != np.nan] #print "post nan" #print title[1:11] title = ". ".join(title) #print title title = summarize(title) print title