# del.icio.us (Netscape Bookmark) dumps to Turtle # # Uses BeautifulSoup # # acts on a file called delicious.html in current dir # # get dump from here https://secure.delicious.com/settings/bookmarks/export (after login) # # @danja 2010-12-16 # @wikier 2010-12-17 # # bug reports/patches - danny.ayers@gmail.com # # key bit of input: # #
YouTube - Jake Rothman part1 #
hee hee - not seen him in years from BeautifulSoup import BeautifulSoup import re import time import sys args = sys.argv[1:] if (len(args)<1): sys.exit("file is required\nusage: python souper.py [file]") path = args[0] page = None try: page = open(path, 'r').read() except IOError: sys.exit("'%s' is not a valid file, please check the path" % path) soup = BeautifulSoup(page) dts = soup.dl.findAll('dt') print '@prefix rdf: .' print '@prefix rdfs: .' print '@prefix dc: .' print '@prefix rss: .' print '@prefix tag: .' for dt in dts: title = dt.a.contents[0] comment = None tags = None if dt.nextSibling and dt.nextSibling.name and dt.nextSibling.name == 'dd': comment = dt.nextSibling.contents[0] for name, value in dt.a.attrs: if name == 'href': uri = value if name == 'add_date': raw_date = value if name == 'tags': tags = value.split(',') date = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(eval(raw_date))) try: # hopefully this will be adequate for the majority r = '<'+uri+'>' print r+' a rss:item .' print r+' dc:title "'+title+'" .' if comment: print r+' rdfs:comment "'+comment[:-1]+'" .' # linebreak removed for tag in tags: if tag != '': print r+' tag:taggedWithTag [ tag:tagName "'+tag+'"] .' print r+' dc:date "'+date+'" .' except UnicodeEncodeError: pass