''' 2021-05-22 To Dead Code Previously Entitled: utilities_email_refactored Created on Mar 7, 2019 @author: Brett Paufler Copyright Brett Paufler Utility Functions text_iterator_eml() sequentially returns text utilizing get_raw_text() returns a simple email's raw textual content strip_text() strips header and footer from raw email texts ''' import email from os import listdir from os.path import join as path_join TEST_EML = '.\\input\\test.eml' def get_list_of_emails(dir_in = '.\\input\\'): '''Returns a list of email files in input directory''' list_of_emails = [path_join(dir_in, f) for f in listdir(dir_in) if f.endswith('.eml')] return list_of_emails def get_raw_text(path_to_email): '''Returns an email's text. Presumes a simple text only email.''' msg = email.message_from_file( open(path_to_email)) attachments = msg.get_payload() raw_email_text = str(attachments[0]) return raw_email_text def strip_raw_text(raw_email_text): '''Removes default headers and footers from raw_email_text ''' #Reverse Markup Encodings no_blanks = raw_email_text.replace('=20', '') no_returns = no_blanks.replace('=\n', '') unescaped_text = no_returns #Into a List line_items = unescaped_text.split('\n') #Removes Headers, Footers, and Blank Items no_header = line_items[5:] #Strips Header no_footer = [i for i in no_header if 'Love, Brett' not in i] no_blank_lines = [i for i in no_footer if i != ''] #Re-Assemble into Text Block from Lines stripped_text = '\n'.join(no_blank_lines) return stripped_text def text_iterator_eml(): '''Sequentially yields text of all emails in ./input''' for next_email_path in get_list_of_emails(): raw_text = get_raw_text(next_email_path) working_text = strip_raw_text(raw_text) yield working_text if __name__ == '__main__': for text in text_iterator_eml(): print text #print working_text #print working_text #print working_text