diff --git a/resources/recipes/heise.recipe b/resources/recipes/heise.recipe index bff7179074..23a8068c47 100644 --- a/resources/recipes/heise.recipe +++ b/resources/recipes/heise.recipe @@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe class heiseDe(BasicNewsRecipe): - + title = 'heise' description = 'Computernews from Germany' __author__ = 'Oliver Niesner' use_embedded_content = False + language = 'de' timefmt = ' [%d %b %Y]' max_articles_per_feed = 40 no_stylesheets = True - + remove_tags = [dict(id='navi_top'), dict(id='navi_bottom'), dict(id='logo'), @@ -35,8 +36,8 @@ class heiseDe(BasicNewsRecipe): dict(name='p', attrs={'class':'news_navi'}), dict(name='div', attrs={'class':'news_foren'})] remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})] - - feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] - + + feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] + diff --git a/resources/recipes/iliteratura_cz.recipe b/resources/recipes/iliteratura_cz.recipe index 7d603f0cec..90ffa108a2 100644 --- a/resources/recipes/iliteratura_cz.recipe +++ b/resources/recipes/iliteratura_cz.recipe @@ -4,7 +4,7 @@ import re class SmeRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'Abelturd' - language = 'cz' + language = 'cs' version = 1 title = u'iLiteratura.cz' diff --git a/resources/recipes/johm.recipe b/resources/recipes/johm.recipe index 6178af9d30..ee162b27c2 100644 --- a/resources/recipes/johm.recipe +++ b/resources/recipes/johm.recipe @@ -9,6 +9,7 @@ class JournalofHospitalMedicine(BasicNewsRecipe): description = 'Medical news' timefmt = ' [%d %b, %Y]' needs_subscription = True + language = 'en' no_stylesheets = True #remove_tags_before = dict(name='div', attrs={'align':'center'}) diff --git a/resources/recipes/midday.recipe b/resources/recipes/midday.recipe new file mode 100644 index 0000000000..4dbee1d2f3 --- /dev/null +++ b/resources/recipes/midday.recipe @@ -0,0 +1,13 @@ +from calibre.web.feeds.news import CalibrePeriodical + +class MiDDay(CalibrePeriodical): + + title = 'MiDDay' + calibre_periodicals_slug = 'midday' + + description = '''Get your dose of the latest news, views and fun - from the + world of politics, sports and Bollywood to the cartoons, comics and games of + the entertainment section - India’s leading tabloid has it all. To subscribe + visit calibre + Periodicals.''' + language = 'en_IN' diff --git a/resources/recipes/nursingtimes.recipe b/resources/recipes/nursingtimes.recipe index efde06d778..699bc281f3 100644 --- a/resources/recipes/nursingtimes.recipe +++ b/resources/recipes/nursingtimes.recipe @@ -18,7 +18,7 @@ class NursingTimes(BasicNewsRecipe): encoding = 'utf-8' publisher = 'emap' category = 'news, health, nursing, UK' - language = 'en-UK' + language = 'en_GB' needs_subscription = True LOGIN = 'http://www.nursingtimes.net/sign-in' diff --git a/resources/recipes/tanea.recipe b/resources/recipes/tanea.recipe index 4e6cd09c3f..f11ae88125 100644 --- a/resources/recipes/tanea.recipe +++ b/resources/recipes/tanea.recipe @@ -6,6 +6,7 @@ class TaNea(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True + language = 'el' remove_tags_before = dict(name='div',attrs={'id':'print-body'}) remove_tags_after = dict(name='div',attrs={'id':'text'}) diff --git a/resources/recipes/telepolis.recipe b/resources/recipes/telepolis.recipe index 3fecd7b293..1009dca275 100644 --- a/resources/recipes/telepolis.recipe +++ b/resources/recipes/telepolis.recipe @@ -8,43 +8,44 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class TelepolisNews(BasicNewsRecipe): - title = u'Telepolis (News)' - __author__ = 'Gerhard Aigner' - publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' - description = 'News from telepolis' - category = 'news' - oldest_article = 7 - max_articles_per_feed = 100 - recursion = 0 - no_stylesheets = True - encoding = "utf-8" + title = u'Telepolis (News)' + __author__ = 'Gerhard Aigner' + publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' + description = 'News from telepolis' + category = 'news' + oldest_article = 7 + max_articles_per_feed = 100 + recursion = 0 + no_stylesheets = True + encoding = "utf-8" + language = 'de_AT' - use_embedded_content = False - remove_empty_feeds = True + use_embedded_content = False + remove_empty_feeds = True - preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] + preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] - keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})] - remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})] + keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})] + remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})] - feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')] + feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')] - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - def get_article_url(self, article): - '''if the linked article is of kind artikel don't take it''' - if (article.link.count('artikel') > 0) : - return None - return article.link + def get_article_url(self, article): + '''if the linked article is of kind artikel don't take it''' + if (article.link.count('artikel') > 0) : + return None + return article.link - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - return soup \ No newline at end of file + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + return soup diff --git a/resources/recipes/telepolis_artikel.recipe b/resources/recipes/telepolis_artikel.recipe index 25e71afc05..0c50668946 100644 --- a/resources/recipes/telepolis_artikel.recipe +++ b/resources/recipes/telepolis_artikel.recipe @@ -8,36 +8,37 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class TelepolisArtikel(BasicNewsRecipe): - title = u'Telepolis (Artikel)' - __author__ = 'Gerhard Aigner' - publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' - category = 'news' - description = 'Telepolis Artikel' - oldest_article = 7 - max_articles_per_feed = 100 - recursion = 0 - no_stylesheets = True + title = u'Telepolis (Artikel)' + __author__ = 'Gerhard Aigner' + publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' + category = 'news' + description = 'Telepolis Artikel' + language = 'de_AT' + oldest_article = 7 + max_articles_per_feed = 100 + recursion = 0 + no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True + use_embedded_content = False + remove_empty_feeds = True - remove_tags_before = dict(name='h1') - remove_tags = [dict(name='img')] + remove_tags_before = dict(name='h1') + remove_tags = [dict(name='img')] - feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')] + feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')] - preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] + preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher] + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - def print_version(self, url): - p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE) - m = p.search(url) - return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print" + def print_version(self, url): + p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE) + m = p.search(url) + return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print" diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 496a1f4d5b..2a09fc261b 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1359,25 +1359,51 @@ class AutomaticNewsRecipe(BasicNewsRecipe): self.web2disk_options.keep_only_tags = [] return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds) -class DownloadedNewsRecipe(BasicNewsRecipe): +class LoginFailed(ValueError): + pass - def get_downloaded_recipe(self): - 'Return path on local filesystem to downloaded recipe' - raise NotImplementedError +class CalibrePeriodical(BasicNewsRecipe): + + #: Set this to the slug for the calibre periodical + calibre_periodicals_slug = None + + LOG_IN = 'http://news.calibre-ebook.com/accounts/login' + needs_subscription = True + __author__ = 'calibre Periodicals' + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.open(self.LOG_IN) + br.select_form(name='login') + br['username'] = self.username + br['password'] = self.password + raw = br.submit().read() + if 'href="/my-account"' not in raw: + raise LoginFailed( + 'Failed to log in, check your username and password for' + ' the calibre Periodicals service.') + + return br def download(self): + import cStringIO self.log('Fetching downloaded recipe') - rpath = self.get_downloaded_recipe() + raw = self.browser.open_novisit( + 'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe' + % self.calibre_periodicals_slug + ).read() + f = cStringIO.StringIO(raw) from calibre.utils.zipfile import ZipFile - zf = ZipFile(rpath) + zf = ZipFile(f) zf.extractall() zf.close() from calibre.web.feeds.recipes import compile_recipe from glob import glob try: - recipe = compile_recipe(open(glob('*.downloaded_recipe')[0], + recipe = compile_recipe(open(glob('*.recipe')[0], 'rb').read()) self.conversion_options = recipe.conversion_options except: self.log.exception('Failed to compile downloaded recipe') return os.path.abspath('index.html') + diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index fb83fa04b3..a72f500862 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -5,14 +5,16 @@ __copyright__ = '2008, Kovid Goyal ' Builtin recipes. ''' import re, imp, inspect, time, os -from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, \ + AutomaticNewsRecipe, CalibrePeriodical from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ptempfile import PersistentTemporaryDirectory from calibre import __appname__, english_sort BeautifulSoup, time, english_sort -basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe) +basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe, + CalibrePeriodical) _tdir = None _crep = 0 def compile_recipe(src):