'''
Created on Dec 16, 2014
@author: Brett Paufler
Copyright Brett Paufler
        # -*- coding: utf-8 -*-


Pulls 1000 new, 1000 front_page submissions
    and saves all sorts of data on them


v1.3
    12-30-14 pulled praw_submission_to_dict as seperate function

v1.2
    new & front page only

v1.1
    12-29-14 Fixed Slow to_csv function
        dF.to_csv(path_or_buf=sN, encoding='utf-8')
        was very slow, so killed all utf on page and works much faster
        old data presumed dead
    12-29-14
        No longer recording: "media_embed": s.media_embed,
'''

import praw
import datetime
import pandas as pd

from reddit_offline import quick_report



def time_label():
    '''
    returns a date string of 'now' for use as file header for easy sort order
        "2014-12-22-10-21-35"
    '''
    return str(datetime.datetime.now().isoformat())[:-7].replace(":","-").replace("T","-")


def praw_submission_to_dict(s, category_name):
    '''given a praw submission object (s)
        returns a dictionary of values
        usuable as a row in a pandas dataframe
    
        category_name is name of the pull (new, front_page, games, pr_n)
    '''
    try:
    
        #s.selftext scrub
        sText = s.selftext
        killList = ["\n", "\r", "\t", "\0", "\O",]
        for k in killList:
            sText = sText.replace(k, " ")
       
        row = {"id":s.id.encode('ascii', 'replace'),
               "category": category_name.encode('ascii', 'replace'),
               "title": s.title.encode('ascii', 'replace'),
               "selftext": sText.encode('ascii', 'replace'),
               "domain": s.domain.encode('ascii', 'replace'),
               "url": s.url.encode('ascii', 'replace'), # img link
               "permalink": s.permalink.encode('ascii', 'replace'),
               "subreddit": s.subreddit.display_name.encode('ascii', 'replace'),
               "author": str(s.author).encode('ascii', 'replace'),
               
               "num_comments": s.num_comments,
               "score": s.score,
               "over_18": s.over_18,
               "created_utc": s.created_utc, #time stamp
               }
        
        print row
        return row
        
    except AttributeError:
        print "\tBad Egg: Incomplete Submission %s" % category_name
    
    

def get_thousands(tako, n):
    '''
    returns n reddit_update_manifests for each of the listed submission groups
    '''

    
    
    categoryDictionary = {"new": tako.get_new(limit=n),
                          "front_page": tako.get_front_page(limit=n),
                          #"controversial": tako.get_controversial(limit=n),
                          #"rising": tako.get_rising(limit=n),
                          #"top": tako.get_top(limit=n),
                          }

    subs = []
    
    for catName, catGen in categoryDictionary.items():
        print "GETTING %s" % catName
        for s in catGen:
            row = praw_submission_to_dict(s, catName)
            if row:
                subs.append(row)
                
    dF = pd.DataFrame(subs)
    
    sN = "C:/%s_thousands.txt" % time_label()
    print "IN PROCESS OF SAVING PANDAS: %s" % sN
    dF.to_csv(path_or_buf=sN)
    
    quick_report(dF)
    print "Got %d Links" % len(subs)
    print "Thousands Ending"
    return dF


    ###
    ### thousands Above, update_subreddits Below 
    ###





if __name__ == "__main__":

    tako = praw.Reddit(user_agent="unknown")
    get_thousands(tako, 1000)
    
    print datetime.datetime.now()
    #TODO - subReddit Specific Pulls with memory