#!/usr/bin/python # download a web page and parse its HTML # Based partly on sample from O'Reilly's Python Standard Library book # Usage: httpdn.py [http://]url import formatter, htmllib, urllib, string, sys import mimetools, rfc822 class Parser(htmllib.HTMLParser): def __init__(self, verbose=0): self.anchors = {} f = formatter.NullFormatter() htmllib.HTMLParser.__init__(self, f, verbose) def start_a(self, attributes): for attr in attributes: (name, value) = attr print 'Name: '+ name+' value: '+ value self.save_bgn() def end_a(self): atitle= self.save_end() print 'Title: '+ atitle url= sys.argv[1] hc= urllib.urlopen(url) html= hc.read() hc.close() p = Parser() p.feed(html) p.close() for k, v in p.anchors.items(): print k, "=>", v