''' 2021-06-04 Transformed Specific HTML Pages to Raw Text [Redacted] & ----- (meaning the same) throughout If I knew grep, sed, or similar, I'd probably would have used instead. # # # Created on Mar 11, 2018 @author: Brett Paufler Copyright Brett Paufler ONE SHOT: CAN KILL This is quite possibly a throw-away Rip Text from [Redacted] HTML Save as txt file Takes a HTML isolates [Redacted] text returns txt ''' import re from os import listdir begin = '''
'''
end = '''
'''
pattern = re.compile(
'%s(.*?)%s' % (begin, end),
re.IGNORECASE | re.DOTALL)
for file_in in listdir('.\\input'):
print file_in
file_out = file_in[:-4] + 'txt'
save_name = '.\\output\\' + file_out
print file_out
print save_name
with open('.\\input\\' + file_in) as f:
text = f.read()
print text
text = section_list = re.findall(
pattern, text)[0]
print text
with open(save_name, 'w') as f:
f.write(text)