''' Created on Sep 20, 2015 @author: Brett Paufler Copyright Brett Paufler Pretty clearly, this is a work in progress path_to_latest_scan is probably the only meaningful part and even that needs help. ''' #from csv import reader import pandas as pd from os import listdir from os.path import join from collections import Counter #Have this copied, so presumably refined in comp_compare #Probably should break full and partial into seperate functions, if this is a thing... def path_to_latest_scan(full_scans_only=True): '''Returns the file path to the latest comp_catalog scan of computer. If full_scans_only, partial scans (none C:\ scans) are ignored.''' input_directory = 'C: TODO' dL = sorted(listdir(input_directory), reverse=True) if full_scans_only: dL = [d for d in dL if not 'partial' in d] return join(input_directory, dL[0]) pd.set_option('display.width', 1000) pd.set_option('max_colwidth', 250) dF = pd.read_csv(path_to_latest_scan(full_scans_only=False)) #PARIAL OR FULL print dF.columns.values.tolist() iF = dF.copy() def last_three(x): x = x.split('\\')[-1] if '.' in x: x = x.split('.')[-1] else: x = 'no_ext' if len(x) > 5: x = 'no_ext' return x iF['ext'] = iF['path'].map(lambda x: last_three(x)) #g = iF.groupby('ext') e = iF['ext'] print e eL = list(e) print e c = Counter(eL) print c i = 0 for k, v in c.items(): print k, v i += 1 if i > 10: break print len(c) print print 31180.0/445227 print 29597.0/445227