New recipe Darknet by Oliver Beusner

2025-07-07 10:14:46 -04:00 · 2009-08-26 08:28:11 -06:00 · 2009-08-26 08:28:11 -06:00 · fd2d6bdd3d
commit fd2d6bdd3d
parent 4bcede833d
6 changed files with 69 additions and 25 deletions
--- a/src/calibre/gui2/images/news/darknet.png
+++ b/src/calibre/gui2/images/news/darknet.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -55,7 +55,7 @@ recipe_modules = ['recipe_' + r for r in (
           'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress',
           'volksrant', 'theeconomictimes_india', 'ourdailybread',
           'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
-           'esquire', 'livemint', 'thedgesingapore',
+           'esquire', 'livemint', 'thedgesingapore', 'darknet',
          )]
--- a/src/calibre/web/feeds/recipes/recipe_darknet.py
+++ b/src/calibre/web/feeds/recipes/recipe_darknet.py
@ -0,0 +1,43 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch darknet.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class darknet(BasicNewsRecipe):
    title = 'darknet'
    description = 'Ethical hacking and security news'
    __author__ = 'Oliver Niesner'
    language = _('English')
    use_embedded_content   = False
    timefmt = ' [%b %d %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
    oldest_article = 180
    remove_tags = [dict(id='navi_top'),
 		   dict(id='navi_bottom'),
 		   dict(id='logo'),
 		   dict(id='login_suche'),
 		   dict(id='navi_login'),
 		   dict(id='breadcrumb'),
 		   dict(id='subtitle'),
 		   dict(id='bannerzone'),
 		   dict(name='span', attrs={'class':'rsaquo'}),
 		   dict(name='span', attrs={'class':'next'}),
 		   dict(name='span', attrs={'class':'prev'}),
 		   dict(name='div', attrs={'class':'news_logo'}),
 		   dict(name='div', attrs={'class':'nextprev'}),
 		   dict(name='p', attrs={'class':'news_option'}),
 		   dict(name='p', attrs={'class':'news_foren'})]
    remove_tags_after = [dict(name='div', attrs={'class':'entrybody'})]
    feeds =  [ ('darknet', 'http://feedproxy.google.com/darknethackers') ]
--- a/src/calibre/web/feeds/recipes/recipe_elektrolese.py
+++ b/src/calibre/web/feeds/recipes/recipe_elektrolese.py
@ -32,7 +32,6 @@ class elektrolese(BasicNewsRecipe):
-    feeds =  [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
+    feeds =  [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
--- a/src/calibre/web/feeds/recipes/recipe_hna.py
+++ b/src/calibre/web/feeds/recipes/recipe_hna.py
@ -19,16 +19,24 @@ class hnaDe(BasicNewsRecipe):
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
    remove_javascript = True
    encoding = 'iso-8859-1'
    remove_tags = [dict(id='topnav'),
 		   dict(id='nav_main'),
 		   dict(id='teaser'),
 		   dict(id='suchen'),
 		   dict(id='superbanner'),
 		   dict(id='navigation'),
 		   dict(id='skyscraper'),
 		   dict(id=''),
                   dict(name='span'),
 		   dict(name='ul', attrs={'class':'linklist'}),
 		   dict(name='a', attrs={'href':'#'}),
 		   dict(name='div', attrs={'class':'hlist'}),
 		   dict(name='div', attrs={'class':'subc noprint'}),
 		   dict(name='p', attrs={'class':'breadcrumb'}),
 		   dict(name='a', attrs={'style':'cursor:hand'}),
 		   dict(name='p', attrs={'class':'h5'})]
    #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
    remove_tags_after = [dict(name='a', attrs={'href':'#'})]
@ -38,3 +46,4 @@ class hnaDe(BasicNewsRecipe):
--- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
+++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
@ -6,6 +6,7 @@ Fetch Linuxdevices.
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Sueddeutsche(BasicNewsRecipe):
@ -16,22 +17,22 @@ class Sueddeutsche(BasicNewsRecipe):
    use_embedded_content   = False
    timefmt = ' [%a %d %b %Y]'
    max_articles_per_feed = 50
    language = _('English')
    no_stylesheets = True
-    html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
+    language = _('English')
-    html2lrf_options = ['--ignore-tables']
+    remove_javascript = True
    conversion_options {' linearize_tables' : True}
    encoding = 'latin1'
-    remove_tags_after = [dict(id='nointelliTXT')]
+    remove_tags_after = [dict(id='intelliTxt')]
    filter_regexps = [r'ad\.doubleclick\.net']
    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
                   dict(name='div', attrs={'class':'bannerSky'}),
                   dict(name='div', attrs={'border':'0'}),
                   dict(name='div', attrs={'class':'footerLinks'}),
                   dict(name='div', attrs={'class':'seitenanfang'}),
                   dict(name='td', attrs={'class':'mar5'}),
                   dict(name='td', attrs={'class':'mar5'}),
                   dict(name='table', attrs={'class':'pageAktiv'}),
                   dict(name='table', attrs={'class':'xartable'}),
                   dict(name='table', attrs={'class':'wpnavi'}),
@ -40,24 +41,26 @@ class Sueddeutsche(BasicNewsRecipe):
                   dict(name='table', attrs={'class':'artikelBox'}),
                   dict(name='table', attrs={'class':'kommentare'}),
                   dict(name='table', attrs={'class':'pageBoxBot'}),
                   dict(name='table', attrs={'td':'height="3"'}),
                   dict(name='table', attrs={'class':'contentpaneopen'}),
                   dict(name='td', attrs={'nowrap':'nowrap'}),
                   dict(name='td', attrs={'valign':'middle'}),
                   dict(name='td', attrs={'align':'left'}),
                   dict(name='td', attrs={'align':'center'}),
                   dict(name='td', attrs={'height':'5'}),
                   dict(name='td', attrs={'class':'ArticleWidgetsHeadline'}),
                   dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
                   dict(name='div', attrs={'class':'similar-article-box'}),
                   dict(name='div', attrs={'class':'videoBigHack'}),
                   dict(name='td', attrs={'class':'artikelDruckenRight'}),
                   dict(name='td', attrs={'class':'width="200"'}),
                   dict(name='span', attrs={'class':'content_rating'}),
                   dict(name='a', attrs={'href':'http://www.addthis.com/bookmark.php'}),
                   dict(name='a', attrs={'href':'/news'}),
                   dict(name='a', attrs={'href':'/'}),
                   dict(name='a', attrs={'href':'/articles'}),
                   dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
                   dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
                   dict(name='iframe'),
                   dict(name='form'),
                   dict(name='span', attrs={'class':'hidePrint'}),
                   dict(id='ArticleWidgets'),
                   dict(id='headerLBox'),
                   dict(id='nointelliTXT'),
                   dict(id='rechteSpalte'),
@ -69,27 +72,18 @@ class Sueddeutsche(BasicNewsRecipe):
                   dict(id='nnav-headerteaser'),
                   dict(id='nnav-head'),
                   dict(id='nnav-top'),
                   dict(id='nnav-logodiv'),
                   dict(id='nnav-logo'),
                   dict(id='nnav-oly'),
                   dict(id='readcomment')]
-    feeds =  [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
+    feeds =  [ (u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml') ]
    def preprocess_html(self, soup):
        for item in soup.findAll(re.compile('^a')):
            item.extract()
        match = re.compile(r"^Related")
        for item in soup.findAll('b', text=match):
            item.extract()
        for item in soup.findAll(re.compile('^li')):
            item.extract()
        for item in soup.findAll(re.compile('^ul')):
 	    item.extract()
        for item in soup.find(re.compile('^br')):
            item.extract()
        for item in soup.findAll('br', limit=10):
 	    item.extract()
        return soup
@ -101,4 +95,3 @@ class Sueddeutsche(BasicNewsRecipe):
        return soup
`@ -32,7 +32,6 @@ class elektrolese(BasicNewsRecipe):`



	`feeds = [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]`	`feeds = [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]`