''' 2021-06-04 Transformed Specific HTML Pages to Raw Text [Redacted] & ----- (meaning the same) throughout If I knew grep, sed, or similar, I'd probably would have used instead. # # # Created on Mar 11, 2018 @author: Brett Paufler Copyright Brett Paufler ONE SHOT: CAN KILL This is quite possibly a throw-away Rip Text from [Redacted] HTML Save as txt file Takes a HTML isolates [Redacted] text returns txt ''' import re from os import listdir begin = '''
'''
end = '''
''' pattern = re.compile( '%s(.*?)%s' % (begin, end), re.IGNORECASE | re.DOTALL) for file_in in listdir('.\\input'): print file_in file_out = file_in[:-4] + 'txt' save_name = '.\\output\\' + file_out print file_out print save_name with open('.\\input\\' + file_in) as f: text = f.read() print text text = section_list = re.findall( pattern, text)[0] print text with open(save_name, 'w') as f: f.write(text)