''' Created on Dec 12, 2015 @author: Brett Paufler Copyright Brett Paufler TODO: NEW MODIFIED DELETED Difference?? Names = Directory specific TODO: To Where we were Then, add directories ''' #import difflib #diff = difflib.Differ().compare(a, b) from os import listdir from os.path import join from datetime import datetime, timedelta from collections import namedtuple import csv from comp_common import data_dir, time_format_string #data_dir = '' # path #new_data_file = data_dir + '\\' + '2015_12_10_19_24_computer_file_listing.csv' #old_data_file = data_dir + '\\' + '2015_10_24_13_58_computer_file_listing.csv' # these are comparisons of that class Comparisons(): ''' ''' scan = namedtuple('scan', ['time', 'file_path']) #field_names, verbose, rename) def __init__(self, new_days_offset=0, old_days_offset=0, max_lines=1000000000000 # #data_dir = , #base_file_name=None, #old_file_name=None ): ''' days_offset = how many days (or fractional days to skip data 7 == use data from at least a week ago, may be quite longer measured from now ''' self.max_lines = max_lines #0r NoneTrue self.scans = self.list_all_scans() self.time_new = self.select_time_new(new_days_offset) self.time_old = self.select_time_old(old_days_offset) self.new_files = self.load_file_scan(self.select_scan(self.time_new, 'file')) self.old_files = self.load_file_scan(self.select_scan(self.time_old, 'file')) self.old_file_names = set([old.path for old in self.old_files]) self.new_file_names = set([new.path for new in self.new_files]) self.new_dir = self.load_dir_scan(self.select_scan(self.time_new, 'dir')) self.old_dir = self.load_dir_scan(self.select_scan(self.time_old, 'dir')) def list_all_scans(self): scan = namedtuple('scan', ['time', 'type', 'path']) file_template = '%s_computer_file_listing.txt' % time_format_string dir_template = '%s_computer_directories.txt' % time_format_string file_list = sorted(listdir(data_dir), reverse=True) sL = [] for f in file_list: if 'computer_file_listing' in f: sL.append(scan(datetime.strptime(f, file_template), 'file', f)) elif 'computer_directories' in f: sL.append(scan(datetime.strptime(f, dir_template), 'dir', f)) return sL def select_scan(self, target_time, scan_type='file'): sL = [s for s in self.scans if s.time <= target_time] sL = [s for s in sL if s.type == scan_type] return sL[0] def select_time_new(self, offset): target_time = datetime.now() - timedelta(days=offset) return self.select_scan(target_time).time def select_time_old(self, offset): target_time = self.time_new - timedelta(days=offset, minutes=1) return self.select_scan(target_time).time def select_new_target_scan(self, offset): target_time = datetime.now() - timedelta(days=offset) return self.select_scan(target_time) def select_old_target_scan(self, offset): target_time = self.new_target_scan.time - timedelta(days=offset, minutes=1) return self.select_scan(target_time) def load_dir_scan(self, scan): assert scan.type == 'dir' with open(join(data_dir, scan.path), 'r') as scan_file: scan_data = scan_file.read() scan_data = scan_data.split('\n') return set(scan_data[:self.max_lines]) def load_file_scan(self, scan): assert scan.type == 'file' print scan.path file_stat = namedtuple('file_info', ['path', 'st_mode', 'st_ino', 'st_dev', 'st_nlink', 'st_uid', 'st_gid', 'st_size', 'st_atime', 'st_mtime', 'st_ctime']) #.split('') #Read Scan in and return as set scan_data = set() with open(join(data_dir, scan.path), 'r') as scan_file: scan_reader = csv.reader(scan_file, delimiter=',', quotechar='"') for i, row in enumerate(scan_reader): scan_data.add(file_stat(*row)) if i >= self.max_lines: break return scan_data def files_added(self): #old_names = set([old.path for old in self.old]) #new_names = set([new.path for new in self.new]) return self.old_file_names - self.new_file_names def files_deleted(self): #old_names = set([old.path for old in self.old]) #new_names = set([new.path for new in self.new]) return self.new_file_names - self.old_file_names def files_modified(self): changed = self.new_files - self.old_files changed = [c for c in changed if c.path in self.old_file_names] #[a for a in self.files_added()]] return changed def dir_added(self): return self.new_dir - self.old_dir def dir_deleted(self): return self.old_dir - self.new_dir def report(self): print 'Comparisons.report()' print 'NEW: files: %d, dir: %d, time: %s' % ( len(self.new_files), len(self.new_dir), self.time_new) print 'OLD: files: %d, dir: %d, time: %s' % ( len(self.old_files), len(self.old_dir), self.time_old) #TODO - report: basic #TODO - slice off various directories, focus results c = Comparisons(0, 0)#, 1000)#1000) #(0,0) for full system - spaces easier to see... next time for s in c.scans: print s print c.report() print 'Deleted Dirs' for d in c.dir_deleted(): print d def any_in(match, against): '''Returns True if any in match found in against, where both match & against support iteration.''' return any(map(lambda x: x in list(against), list(match))) ''' num_lines = 100 new = get_data_set(new_data_file, True, num_lines) old = get_data_set(old_data_file, True, num_lines) for n in (new - old): print n ''' ''' num_lines = 100000000 print 'Name Set:' new = file_name_set(path_to_latest_full_scan(n=0), max_lines=num_lines) old = file_name_set(path_to_latest_full_scan(n=1), max_lines=num_lines) diff = new - old print len(diff) for n in diff: print n ''' ''' #THIS IS JUST A TEST RUN new_data_set = get_data_set(new_data_file) old_data_set = get_data_set(old_data_file) diff_set_new_from_old = new_data_set - old_data_set diff_set_old_from_new = old_data_set - new_data_set print 'LEN (old): ', len(old_data_set) print 'LEN (new): ', len(new_data_set) print 'LEN (diff: new/old):', len(diff_set_new_from_old) print 'LEN (diff: old/new):', len(diff_set_old_from_new) print 'old - new %d' % (len(old_data_set) - len(new_data_set)) num_lines = 100 D = difflib.Differ() result = list(D.compare( get_data_set(new_data_file, False, num_lines), get_data_set(old_data_file, False, num_lines) )) print "RESULT", result for r in result: if r.startswith('?'): print r '''