''' Created on Jun 24, 2015 Copyright Brett Paufler @author: Brett Paufler Analysis of the Football data This is based on 2 man teams And will inevitably break on the refactored data ''' import re import pandas as pd from itertools import groupby pd.set_option('display.width',500) pd.set_option('display.max_colwidth',10) pd.set_option('display.expand_frame_repr', 'False') pd.set_option('display.max_rows',10) pd.set_option('display.max_colwidth', 35) #pd.options.display.float_format='${:,.2f}' import numpy as np import matplotlib.pyplot as plt from collections import Counter, namedtuple class Analyze(): ''' mG = MutaGenetics(team=2, keepers=10, cycles=25)''' def __init__(self, csv): self.data = pd.read_csv(csv, header=0) self.run_list = sorted(list(set((r for r in self.data.run)))) self.num_runs = len(self.run_list) self.np_random = self.init_np_random() def init_np_random(self): '''Called by __init__, extracts a sorted list of calls to random in run order [run=0, run=2, etc...].''' run_list = list(self.data.run) np_random = list(self.data.random) pairs = list(set(zip(run_list, np_random))) np_random = [r for _, r in sorted(pairs)] return np.array(np_random, dtype=int) def analyze_rand_roster(self): '''Number of Random Calls to Initialize Run This is a Function of the Rating (How hard it is to exceed 1.0) Keepers * Cycles * 2 = Num Random after initialization So, random per run - 500 is number of tr_random() calls for initilization ''' t = '\nANALYZE RAND ROSTER\n' tot_calls_rand_roster = sum(self.np_random) t += 'Calls to rand_roster:\n\t%s\n' % self.np_random t += 'TOTAL: %d\n' % tot_calls_rand_roster init_calls = self.np_random - 500 t += 'Calls to rand_roster for initilization of runs\n\t%s\n' % init_calls t += 'Init (ave): %d\n' % init_calls.mean() t += 'Init (std): %d \t(standard deviation)\n\n' % init_calls.std() return t def analyze_plays(self): t = '\n\nANALYZE PLAYS:\n' all_plays = [p.split() for p in self.data.plays] len_plays_player_np = np.array([len(p) / 2.0 for p in all_plays], dtype=float) t += 'Play Length (ave per player): %.2f\n' % len_plays_player_np.mean() t += 'Play Length (std per player): %.2f\n' % len_plays_player_np.std() all_plays_flat = [item for sublist in all_plays for item in sublist] #flattens plays = ' '.join(all_plays_flat) plays = plays.replace('play_', ' ') plays = plays.replace('pass_coverage_closest', 'pass_cover') all_plays_flat = plays.split() c = Counter(all_plays_flat) plt.bar(range(len(c)), c.values(), align='center') plt.xticks(range(len(c)), c.keys()) plt.savefig('./reports/25_play_ratio.png') return t def analyze_mutations(self): t = '\n\nANALYZE MUTATIONS:\n' mutations = list(self.data.mutations) mutations = ' '.join(mutations) ratings = [int(x) for x in re.findall(r'\d+', mutations)] mutations = re.findall(r'[a-z_]+(?