.*?(
.*?).*?
(.*?)
'
+
+
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ , 'linearize_tables' : True
+ }
+
+
+ feeds = [
+ (u'Reviews' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews' )
+ ,(u'Commentary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY')
+ ,(u'Great Movies' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08')
+ ,(u'People' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE')
+ ,(u'Glossary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY')
+
+ ]
+
+ preprocess_regexps = [
+ (re.compile(r'
.*?This is a printer friendly.*?.*?
', re.DOTALL|re.IGNORECASE),
+ lambda m: '')
+ ]
+
+
+
+ def print_version(self, url):
+ return url + '&template=printart'
+
+ def parse_index(self):
+ totalfeeds = []
+ lfeeds = self.get_feeds()
+ for feedobj in lfeeds:
+ feedtitle, feedurl = feedobj
+ self.log('\tFeedurl: ', feedurl)
+ self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+ articles = []
+ page = urllib2.urlopen(feedurl).read()
+
+ if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+ pattern = self.patternReviews
+ elif feedtitle == 'Commentary':
+ pattern = self.patternCommentary
+ elif feedtitle == 'People':
+ pattern = self.patternPeople
+ elif feedtitle == 'Glossary':
+ pattern = self.patternGlossary
+
+
+ regex = re.compile(pattern, re.IGNORECASE|re.DOTALL)
+
+ for match in regex.finditer(page):
+ if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+ movietitle = match.group(1)
+ thislink = match.group(2)
+ description = match.group(3)
+ elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary':
+ thislink = match.group(1)
+ description = match.group(2)
+
+ self.log(thislink)
+
+ for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
+ thisurl = self.PREFIX + link['href']
+ thislinktext = self.tag_to_string(link)
+
+ if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+ thistitle = movietitle
+ elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary':
+ thistitle = thislinktext
+
+ if thistitle == '':
+ thistitle = 'Ebert Journal Post'
+
+ """
+ pattern2 = r'AID=\/(.*?)\/'
+ reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
+ match2 = reg2.search(thisurl)
+ date = match2.group(1)
+ c = time.strptime(match2.group(1),"%Y%m%d")
+ date=time.strftime("%a, %b %d, %Y", c)
+ self.log(date)
+ """
+
+ articles.append({
+ 'title' :thistitle
+ ,'date' :''
+ ,'url' :thisurl
+ ,'description':description
+ })
+ totalfeeds.append((feedtitle, articles))
+
+ return totalfeeds
+