diff --git a/src/calibre/gui2/images/news/darknet.png b/src/calibre/gui2/images/news/darknet.png new file mode 100644 index 0000000000..a64867c3e7 Binary files /dev/null and b/src/calibre/gui2/images/news/darknet.png differ diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 2892e88379..3a2b7a4e13 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -55,7 +55,7 @@ recipe_modules = ['recipe_' + r for r in ( 'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress', 'volksrant', 'theeconomictimes_india', 'ourdailybread', 'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti', - 'esquire', 'livemint', 'thedgesingapore', + 'esquire', 'livemint', 'thedgesingapore', 'darknet', )] diff --git a/src/calibre/web/feeds/recipes/recipe_darknet.py b/src/calibre/web/feeds/recipes/recipe_darknet.py new file mode 100644 index 0000000000..e86be1655a --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_darknet.py @@ -0,0 +1,43 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch darknet. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class darknet(BasicNewsRecipe): + + title = 'darknet' + description = 'Ethical hacking and security news' + __author__ = 'Oliver Niesner' + language = _('English') + use_embedded_content = False + timefmt = ' [%b %d %Y]' + max_articles_per_feed = 40 + no_stylesheets = True + oldest_article = 180 + + remove_tags = [dict(id='navi_top'), + dict(id='navi_bottom'), + dict(id='logo'), + dict(id='login_suche'), + dict(id='navi_login'), + dict(id='breadcrumb'), + dict(id='subtitle'), + dict(id='bannerzone'), + dict(name='span', attrs={'class':'rsaquo'}), + dict(name='span', attrs={'class':'next'}), + dict(name='span', attrs={'class':'prev'}), + dict(name='div', attrs={'class':'news_logo'}), + dict(name='div', attrs={'class':'nextprev'}), + dict(name='p', attrs={'class':'news_option'}), + dict(name='p', attrs={'class':'news_foren'})] + remove_tags_after = [dict(name='div', attrs={'class':'entrybody'})] + + feeds = [ ('darknet', 'http://feedproxy.google.com/darknethackers') ] + + + diff --git a/src/calibre/web/feeds/recipes/recipe_elektrolese.py b/src/calibre/web/feeds/recipes/recipe_elektrolese.py index 622190a286..07eef95bbc 100644 --- a/src/calibre/web/feeds/recipes/recipe_elektrolese.py +++ b/src/calibre/web/feeds/recipes/recipe_elektrolese.py @@ -32,7 +32,6 @@ class elektrolese(BasicNewsRecipe): - feeds = [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ] - + feeds = [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ] diff --git a/src/calibre/web/feeds/recipes/recipe_hna.py b/src/calibre/web/feeds/recipes/recipe_hna.py index c4faec94ba..928d3f032b 100644 --- a/src/calibre/web/feeds/recipes/recipe_hna.py +++ b/src/calibre/web/feeds/recipes/recipe_hna.py @@ -19,16 +19,24 @@ class hnaDe(BasicNewsRecipe): timefmt = ' [%d %b %Y]' max_articles_per_feed = 40 no_stylesheets = True + remove_javascript = True encoding = 'iso-8859-1' remove_tags = [dict(id='topnav'), dict(id='nav_main'), + dict(id='teaser'), dict(id='suchen'), + dict(id='superbanner'), + dict(id='navigation'), + dict(id='skyscraper'), dict(id=''), dict(name='span'), dict(name='ul', attrs={'class':'linklist'}), dict(name='a', attrs={'href':'#'}), + dict(name='div', attrs={'class':'hlist'}), + dict(name='div', attrs={'class':'subc noprint'}), dict(name='p', attrs={'class':'breadcrumb'}), + dict(name='a', attrs={'style':'cursor:hand'}), dict(name='p', attrs={'class':'h5'})] #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] remove_tags_after = [dict(name='a', attrs={'href':'#'})] @@ -38,3 +46,4 @@ class hnaDe(BasicNewsRecipe): + diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py index 5f2ef3529b..ab79b4b911 100644 --- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -6,6 +6,7 @@ Fetch Linuxdevices. ''' import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup class Sueddeutsche(BasicNewsRecipe): @@ -16,22 +17,22 @@ class Sueddeutsche(BasicNewsRecipe): use_embedded_content = False timefmt = ' [%a %d %b %Y]' max_articles_per_feed = 50 - language = _('English') no_stylesheets = True - html2epub_options = 'linearize_tables = True\nbase_font_size2=14' - html2lrf_options = ['--ignore-tables'] + language = _('English') + remove_javascript = True + conversion_options {' linearize_tables' : True} encoding = 'latin1' - remove_tags_after = [dict(id='nointelliTXT')] + remove_tags_after = [dict(id='intelliTxt')] filter_regexps = [r'ad\.doubleclick\.net'] remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), dict(name='div', attrs={'class':'bannerSky'}), + dict(name='div', attrs={'border':'0'}), dict(name='div', attrs={'class':'footerLinks'}), dict(name='div', attrs={'class':'seitenanfang'}), dict(name='td', attrs={'class':'mar5'}), - dict(name='td', attrs={'class':'mar5'}), dict(name='table', attrs={'class':'pageAktiv'}), dict(name='table', attrs={'class':'xartable'}), dict(name='table', attrs={'class':'wpnavi'}), @@ -40,24 +41,26 @@ class Sueddeutsche(BasicNewsRecipe): dict(name='table', attrs={'class':'artikelBox'}), dict(name='table', attrs={'class':'kommentare'}), dict(name='table', attrs={'class':'pageBoxBot'}), + dict(name='table', attrs={'td':'height="3"'}), + dict(name='table', attrs={'class':'contentpaneopen'}), dict(name='td', attrs={'nowrap':'nowrap'}), - dict(name='td', attrs={'valign':'middle'}), dict(name='td', attrs={'align':'left'}), - dict(name='td', attrs={'align':'center'}), dict(name='td', attrs={'height':'5'}), + dict(name='td', attrs={'class':'ArticleWidgetsHeadline'}), dict(name='div', attrs={'class':'artikelBox navigatorBox'}), dict(name='div', attrs={'class':'similar-article-box'}), dict(name='div', attrs={'class':'videoBigHack'}), dict(name='td', attrs={'class':'artikelDruckenRight'}), dict(name='td', attrs={'class':'width="200"'}), + dict(name='span', attrs={'class':'content_rating'}), + dict(name='a', attrs={'href':'http://www.addthis.com/bookmark.php'}), dict(name='a', attrs={'href':'/news'}), - dict(name='a', attrs={'href':'/'}), - dict(name='a', attrs={'href':'/articles'}), dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), dict(name='iframe'), dict(name='form'), dict(name='span', attrs={'class':'hidePrint'}), + dict(id='ArticleWidgets'), dict(id='headerLBox'), dict(id='nointelliTXT'), dict(id='rechteSpalte'), @@ -69,29 +72,20 @@ class Sueddeutsche(BasicNewsRecipe): dict(id='nnav-headerteaser'), dict(id='nnav-head'), dict(id='nnav-top'), - dict(id='nnav-logodiv'), - dict(id='nnav-logo'), - dict(id='nnav-oly'), dict(id='readcomment')] - feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + feeds = [ (u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml') ] def preprocess_html(self, soup): - for item in soup.findAll(re.compile('^a')): - item.extract() match = re.compile(r"^Related") for item in soup.findAll('b', text=match): item.extract() - for item in soup.findAll(re.compile('^li')): - item.extract() for item in soup.findAll(re.compile('^ul')): - item.extract() - for item in soup.find(re.compile('^br')): - item.extract() + item.extract() for item in soup.findAll('br', limit=10): - item.extract() + item.extract() return soup @@ -101,4 +95,3 @@ class Sueddeutsche(BasicNewsRecipe): return soup -