1: 2: 3: 4: 5: 6: 7: 8: 9: 10:
import tidy import urllib2 options = dict(output_xhtml=0, add_xml_decl=0, indent=0, tidy_mark=0) f = urllib2.urlopen("http://pcs.essex.ac.uk/") page = f.read() f.close() tidy_page = tidy.parseString(page, **options)
1: 2: 3: 4:
tidy_page = tidy.parseString(page, **options) out=open('output.html','w') # save to file 'output.html' out.write(str(tidy_page)) out.close()