'''
Created on Jan 15, 2015
@author: Brett Paufler
(c) Copyright Brett Paufler

stream_view() 
    runs for an hour, saving the stream to a csv
'''

import praw
import pandas as pd

from reddit_thousands import time_label
import datetime
import time
from reddit_offline import quick_report #, tally_report, thousands_to_dataframe
#import matplotlib.pyplot as plt



def stream_info(s):
    sub = ",".join([s.id.encode('ascii', 'replace'),
                    s.domain.encode('ascii', 'replace'),
                    s.subreddit.display_name.encode('ascii', 'replace'),
                    str(s.author).encode('ascii', 'replace'),
                    str(s.over_18),
                    str(s.created_utc),
                    ])
    return sub


def stream_view(sleepTime=300, postsToPull=1000, runTime=4000):
    '''partial view of the reddit stream (new)
        With Duplicates: No Effort to Screen out Duplicates
    
        sleepTime is in seconds, 5 for test, five minutes = 300                   
        postsToPull, 5 for test, 1000 for run      
        runTime, 30 for test, 3600 for run
        
    Typical Usuage
        TEST:    stream_view(sleepTime=5, postsToPull=5, runTime=30 )
        MID:     stream_view(sleepTime=60, postsToPull=100, runTime=600)
        RUN:     stream_view(sleepTime=300, postsToPull=1000, runTime=36000)
             
    '''
    
    tako = praw.Reddit(user_agent="nope")
    
    #initializes output file for csv
    sN = "save_path/%s_%s.txt" % (runTime, time_label())
    f = open(sN,"w")
    f.write("id,domain,subreddit,author,over_18,time\n")
    
    #list of posts reviewed
    #subIds = []
    
    start = datetime.datetime.now()
    finish = start + datetime.timedelta(seconds=runTime)
    print "Starting: %s \t Ending: %s" % (start, finish)
    
    #Main Timer Loop
    while datetime.datetime.now() < finish:
        newSubs = tako.get_new(limit=postsToPull)
        for s in newSubs:
            text = stream_info(s)
            print text
            f.write(text)
            f.write("\n")
            f.flush()      
        print "Now: %s \t Sleep: %s \t End: %s \t" % (datetime.datetime.now(),
                                                      sleepTime,
                                                      finish
                                                      )
        time.sleep(sleepTime)
    f.flush()
    f.close()
    print "ALL ENDS THAT ENDES WELL: %s" % datetime.datetime.now()


def sixty_minutes_of_reddit(fN="C:/Users/etc"):
    '''returns a pandas data frame
        from a stream_view csv file
        of all data time-stamped within an hour of the last
        (i.e. gives a 60min view of the stream)
    
    fN = an absolute path "C:/Users/etc"
    
    '''

    #fN = 'test file name path'
    
    dF = pd.DataFrame.from_csv(fN)
    quick_report(dF, 2, 25)
    
    #unique, drop_duplicates, as it sounds
    dF = dF.drop_duplicates() 
    quick_report(dF, 2, 25)
    
    #working backwards from maxTime
    #limits data to within an hour of that
    dF = dF[dF["time"] >= (max(dF["time"]) - 3600)] 
    quick_report(dF, 2, 25)
    

    return dF
#fN = "C:/data_reddit/"
#fN += "stream_view_4000_2015-01-16-06-45-24.txt"
#dF = sixty_minutes_of_reddit(fN)



def next_sub_id(subId):
    '''given a reddit id#, increments id by one
        
        XXXX7z to XXXX80
        or
        XXXyzz to XXXz00
    '''
    
    plusOne = True
    d = -1
    subId = [ord(x) for x in list(subId)]

    while plusOne: 
        subId[d] += 1
        if subId[d] == 123:
            subId[d] = 48
            d -= 1
        elif subId[d] == 58:
            subId[d] = 97
            plusOne = False
        else:
            plusOne = False

    subId = ''.join([chr(x) for x in subId])
    
    return subId


def mass_pull(start="6 digits alpha numeral", howMany=10000, info=stream_info, addToExisting=True):
    '''pulls sequential reddit submissions and saves to mass_pull
    
    start = optional reddit Id of form start=""
        where to start sequential pull from
    howMany = number of sequential submissions to pull (if they exist)
    info = a function to pull data (see stream_info for format/info)
    
    mass_pull()
        resumes where left off
    '''
    tako = praw.Reddit(user_agent="agent id")
    
    #add to old or not
    if addToExisting:
        sN = "C:/data_reddit/mass_pull.txt"
        with open(sN,"r") as f:
            start = f.readlines()[-1].split(',')[0]
        subId = next_sub_id(start)
        print "mass_pull() resuming at %s" % start
        f = open(sN,"a")
    else:
        sN = "C:/data_reddit/mass_pull_%s_%d.txt" % (start, howMany)
        f = open(sN,"w")
        f.write("id,domain,subreddit,author,over_18,time\n")
        subId = start
    
    count = 0
    while count < (howMany):
        try:
            sub = tako.get_submission(submission_id=subId)
            sub = stream_info(sub)
            f.write(sub)
            f.write("\n")
            f.flush()
            print sub
        except:
            print "Bad ID: %s" % subId
        subId = next_sub_id(subId)
        count += 1
    f.flush()   
    f.close()
    print "MASS_PULL() FINISHED: %s %d" % (start, howMany)
        
if __name__ == "__main__":
    
    mass_pull()
    #I don't think we need this anymore
    
    
    
    
    
    #tako = praw.Reddit(user_agent="user account")
    
    #dF = thousands_to_dataframe(max_num_files=1)
    #print dF
    #dF = dF[]
    
    
    #
    #stream_view()
    
    
    
    #fN += "stream_view_4000_2015-01-15-16-50-03.txt"

    
    #tally_report(dF)


    #dF = dF[dF["over_18"]==False]
    #print dF


    #graphing sub
    #s = 5
    #dF.plot(kind="pie", figsize=(s,s), legend=False, title="STREAM", subplots=True, fontsize=5)
    #plt.xlabel=("")
    #plt.ylabel("")
    #sN = "subreddits_%s_%s_%s.png" % ("new","pie","stream")
    #plt.savefig(sN)

    #print "End of It All"
    #stream_view()