#!/usr/bin/env python
import sys, os, pprint, re, urllib
if __name__ == '__main__':
if len (sys.argv) > 1 and sys.argv[1] == 'colbert':
show_id = 18252
else:
show_id = 934
shows = {}
statusfile = os.path.expanduser ("~/.dailyshow")
try:
shows.update (eval (file (statusfile).read ()))
except:
pass
base = "http://www.comedycentral.com/sitewide/media_player/"
ds = urllib.urlopen (base + "browseresults.jhtml?showId=%d"%show_id).read ()
clips = re.findall ("(?ims)
]*class=\"results_desc\">.*?", ds)
clips.reverse ()
try:
count = 0;
for i in clips:
m = re.search ("(?ims)href=\"(play\.jhtml\?itemId=[0-9]+)\">([^<]*) --([^<]+)", i)
if m:
url = base + m.group (1)
title = m.group (2).strip ()
desc = m.group (3).strip ()
if url not in shows.keys ():
vidpage = urllib.urlopen (url).read ()
mov = re.search ("(?ims)