From fcf4957e91a62d5d89f88e269fb4cb11d1262e78 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Feb 2012 19:04:45 +0530 Subject: [PATCH] Remove de Volksrant subscription version as it is no longer available --- recipes/volksrant_sub.recipe | 115 ----------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 recipes/volksrant_sub.recipe diff --git a/recipes/volksrant_sub.recipe b/recipes/volksrant_sub.recipe deleted file mode 100644 index 8a5f1543b5..0000000000 --- a/recipes/volksrant_sub.recipe +++ /dev/null @@ -1,115 +0,0 @@ -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - -class Volkskrant_full(BasicNewsRecipe): - # This recipe will download the Volkskrant newspaper, - # from the subscribers site. It requires a password. - # Known issues are: articles that are spread out over - # multiple pages will appear multiple times. Pages - # that contain only adverts will appear, but empty. - # The supplement 'Volkskrant Magazine' on saturday - # is currently not downloaded. - # You can set a manual date, to download an archived - # newspaper. Volkskrant stores over a month at the - # moment of writing. To do so I suggest you unmark - # the date on the line below, and insert it in the title. Then - # follow the instructions marked further below. - - title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]' - __author__ = u'Selcal' - description = u"Volkskrant" - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - language = 'nl' - use_embedded_content = False - simultaneous_downloads = 1 - delay = 1 - needs_subscription = True - # Set RETRIEVEDATE to 'yyyymmdd' to load an older - # edition. Otherwise keep '%Y%m%d' - # When setting a manual date, unmark and add the date - # to the title above, and unmark the timefmt line to stop - # Calibre from adding today's date in addition. - - # timefmt = '' - RETRIEVEDATE = strftime('%Y%m%d') - INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text' - INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/' - LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do' - remove_tags = [dict(name='address')] - cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg' - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - - if self.username is not None and self.password is not None: - br.open(self.LOGIN) - br.select_form(nr = 0) - br['username'] = self.username - br['password'] = self.password - br.submit() - return br - - def parse_index(self): - krant = [] - def strip_title(_title): - i = 0 - while ((_title[i] <> ":") and (i <= len(_title))): - i = i + 1 - return(_title[0:i]) - for temp in range (5): - try: - soup = self.index_to_soup(self.INDEX_MAIN) - break - except: - #print '(Retrying main index load)' - continue - mainsoup = soup.find('td', attrs={'id': 'select_page_top'}) - for option in mainsoup.findAll('option'): - articles = [] - _INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text' - _INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/' - #print '' - #print '<------- Processing section: ' + _INDEX + ' ------------------------->' - for temp in range (5): - try: - soup = self.index_to_soup(_INDEX) - break - except: - #print '(Retrying index load)' - continue - for item in soup.findAll('area'): - art_nr = item['class'] - attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)] - #print '==> Found: ' + attrname; - index_title = soup.find('div', attrs={'class': attrname}) - get_title = index_title['title']; - _ARTICLE = _INDEX_ARTICLE + attrname + '.html#text' - title = get_title; - #print '--> Title: ' + title; - #print '--> URL: ' + _ARTICLE; - for temp in range (5): - try: - souparticle = self.index_to_soup(_ARTICLE); - break - except: - print '(Retrying URL load)' - continue - headerurl = souparticle.findAll('frame')[0]['src']; - #print '--> Read frame name for header: ' + headerurl; - url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html'; - #print '--> Corrected URL: ' + url; - if (get_title <> ''): - title = strip_title(get_title) - date = strftime(' %B %Y') - if (title <> ''): - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':'' - }) - krant.append( (option.string, articles)) - return krant -