mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
New recipe Darknet by Oliver Beusner
This commit is contained in:
parent
4bcede833d
commit
fd2d6bdd3d
BIN
src/calibre/gui2/images/news/darknet.png
Normal file
BIN
src/calibre/gui2/images/news/darknet.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.8 KiB |
@ -55,7 +55,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress',
|
'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress',
|
||||||
'volksrant', 'theeconomictimes_india', 'ourdailybread',
|
'volksrant', 'theeconomictimes_india', 'ourdailybread',
|
||||||
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
||||||
'esquire', 'livemint', 'thedgesingapore',
|
'esquire', 'livemint', 'thedgesingapore', 'darknet',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
|
43
src/calibre/web/feeds/recipes/recipe_darknet.py
Normal file
43
src/calibre/web/feeds/recipes/recipe_darknet.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch darknet.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class darknet(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'darknet'
|
||||||
|
description = 'Ethical hacking and security news'
|
||||||
|
__author__ = 'Oliver Niesner'
|
||||||
|
language = _('English')
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%b %d %Y]'
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 180
|
||||||
|
|
||||||
|
remove_tags = [dict(id='navi_top'),
|
||||||
|
dict(id='navi_bottom'),
|
||||||
|
dict(id='logo'),
|
||||||
|
dict(id='login_suche'),
|
||||||
|
dict(id='navi_login'),
|
||||||
|
dict(id='breadcrumb'),
|
||||||
|
dict(id='subtitle'),
|
||||||
|
dict(id='bannerzone'),
|
||||||
|
dict(name='span', attrs={'class':'rsaquo'}),
|
||||||
|
dict(name='span', attrs={'class':'next'}),
|
||||||
|
dict(name='span', attrs={'class':'prev'}),
|
||||||
|
dict(name='div', attrs={'class':'news_logo'}),
|
||||||
|
dict(name='div', attrs={'class':'nextprev'}),
|
||||||
|
dict(name='p', attrs={'class':'news_option'}),
|
||||||
|
dict(name='p', attrs={'class':'news_foren'})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'entrybody'})]
|
||||||
|
|
||||||
|
feeds = [ ('darknet', 'http://feedproxy.google.com/darknethackers') ]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -32,7 +32,6 @@ class elektrolese(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
|
feeds = [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,16 +19,24 @@ class hnaDe(BasicNewsRecipe):
|
|||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
|
|
||||||
remove_tags = [dict(id='topnav'),
|
remove_tags = [dict(id='topnav'),
|
||||||
dict(id='nav_main'),
|
dict(id='nav_main'),
|
||||||
|
dict(id='teaser'),
|
||||||
dict(id='suchen'),
|
dict(id='suchen'),
|
||||||
|
dict(id='superbanner'),
|
||||||
|
dict(id='navigation'),
|
||||||
|
dict(id='skyscraper'),
|
||||||
dict(id=''),
|
dict(id=''),
|
||||||
dict(name='span'),
|
dict(name='span'),
|
||||||
dict(name='ul', attrs={'class':'linklist'}),
|
dict(name='ul', attrs={'class':'linklist'}),
|
||||||
dict(name='a', attrs={'href':'#'}),
|
dict(name='a', attrs={'href':'#'}),
|
||||||
|
dict(name='div', attrs={'class':'hlist'}),
|
||||||
|
dict(name='div', attrs={'class':'subc noprint'}),
|
||||||
dict(name='p', attrs={'class':'breadcrumb'}),
|
dict(name='p', attrs={'class':'breadcrumb'}),
|
||||||
|
dict(name='a', attrs={'style':'cursor:hand'}),
|
||||||
dict(name='p', attrs={'class':'h5'})]
|
dict(name='p', attrs={'class':'h5'})]
|
||||||
#remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
|
#remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
|
||||||
remove_tags_after = [dict(name='a', attrs={'href':'#'})]
|
remove_tags_after = [dict(name='a', attrs={'href':'#'})]
|
||||||
@ -38,3 +46,4 @@ class hnaDe(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ Fetch Linuxdevices.
|
|||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class Sueddeutsche(BasicNewsRecipe):
|
class Sueddeutsche(BasicNewsRecipe):
|
||||||
@ -16,22 +17,22 @@ class Sueddeutsche(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%a %d %b %Y]'
|
timefmt = ' [%a %d %b %Y]'
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
language = _('English')
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
|
language = _('English')
|
||||||
html2lrf_options = ['--ignore-tables']
|
remove_javascript = True
|
||||||
|
conversion_options {' linearize_tables' : True}
|
||||||
encoding = 'latin1'
|
encoding = 'latin1'
|
||||||
|
|
||||||
|
|
||||||
remove_tags_after = [dict(id='nointelliTXT')]
|
remove_tags_after = [dict(id='intelliTxt')]
|
||||||
filter_regexps = [r'ad\.doubleclick\.net']
|
filter_regexps = [r'ad\.doubleclick\.net']
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
||||||
dict(name='div', attrs={'class':'bannerSky'}),
|
dict(name='div', attrs={'class':'bannerSky'}),
|
||||||
|
dict(name='div', attrs={'border':'0'}),
|
||||||
dict(name='div', attrs={'class':'footerLinks'}),
|
dict(name='div', attrs={'class':'footerLinks'}),
|
||||||
dict(name='div', attrs={'class':'seitenanfang'}),
|
dict(name='div', attrs={'class':'seitenanfang'}),
|
||||||
dict(name='td', attrs={'class':'mar5'}),
|
dict(name='td', attrs={'class':'mar5'}),
|
||||||
dict(name='td', attrs={'class':'mar5'}),
|
|
||||||
dict(name='table', attrs={'class':'pageAktiv'}),
|
dict(name='table', attrs={'class':'pageAktiv'}),
|
||||||
dict(name='table', attrs={'class':'xartable'}),
|
dict(name='table', attrs={'class':'xartable'}),
|
||||||
dict(name='table', attrs={'class':'wpnavi'}),
|
dict(name='table', attrs={'class':'wpnavi'}),
|
||||||
@ -40,24 +41,26 @@ class Sueddeutsche(BasicNewsRecipe):
|
|||||||
dict(name='table', attrs={'class':'artikelBox'}),
|
dict(name='table', attrs={'class':'artikelBox'}),
|
||||||
dict(name='table', attrs={'class':'kommentare'}),
|
dict(name='table', attrs={'class':'kommentare'}),
|
||||||
dict(name='table', attrs={'class':'pageBoxBot'}),
|
dict(name='table', attrs={'class':'pageBoxBot'}),
|
||||||
|
dict(name='table', attrs={'td':'height="3"'}),
|
||||||
|
dict(name='table', attrs={'class':'contentpaneopen'}),
|
||||||
dict(name='td', attrs={'nowrap':'nowrap'}),
|
dict(name='td', attrs={'nowrap':'nowrap'}),
|
||||||
dict(name='td', attrs={'valign':'middle'}),
|
|
||||||
dict(name='td', attrs={'align':'left'}),
|
dict(name='td', attrs={'align':'left'}),
|
||||||
dict(name='td', attrs={'align':'center'}),
|
|
||||||
dict(name='td', attrs={'height':'5'}),
|
dict(name='td', attrs={'height':'5'}),
|
||||||
|
dict(name='td', attrs={'class':'ArticleWidgetsHeadline'}),
|
||||||
dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
|
dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
|
||||||
dict(name='div', attrs={'class':'similar-article-box'}),
|
dict(name='div', attrs={'class':'similar-article-box'}),
|
||||||
dict(name='div', attrs={'class':'videoBigHack'}),
|
dict(name='div', attrs={'class':'videoBigHack'}),
|
||||||
dict(name='td', attrs={'class':'artikelDruckenRight'}),
|
dict(name='td', attrs={'class':'artikelDruckenRight'}),
|
||||||
dict(name='td', attrs={'class':'width="200"'}),
|
dict(name='td', attrs={'class':'width="200"'}),
|
||||||
|
dict(name='span', attrs={'class':'content_rating'}),
|
||||||
|
dict(name='a', attrs={'href':'http://www.addthis.com/bookmark.php'}),
|
||||||
dict(name='a', attrs={'href':'/news'}),
|
dict(name='a', attrs={'href':'/news'}),
|
||||||
dict(name='a', attrs={'href':'/'}),
|
|
||||||
dict(name='a', attrs={'href':'/articles'}),
|
|
||||||
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
|
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
|
||||||
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
|
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
|
||||||
dict(name='iframe'),
|
dict(name='iframe'),
|
||||||
dict(name='form'),
|
dict(name='form'),
|
||||||
dict(name='span', attrs={'class':'hidePrint'}),
|
dict(name='span', attrs={'class':'hidePrint'}),
|
||||||
|
dict(id='ArticleWidgets'),
|
||||||
dict(id='headerLBox'),
|
dict(id='headerLBox'),
|
||||||
dict(id='nointelliTXT'),
|
dict(id='nointelliTXT'),
|
||||||
dict(id='rechteSpalte'),
|
dict(id='rechteSpalte'),
|
||||||
@ -69,27 +72,18 @@ class Sueddeutsche(BasicNewsRecipe):
|
|||||||
dict(id='nnav-headerteaser'),
|
dict(id='nnav-headerteaser'),
|
||||||
dict(id='nnav-head'),
|
dict(id='nnav-head'),
|
||||||
dict(id='nnav-top'),
|
dict(id='nnav-top'),
|
||||||
dict(id='nnav-logodiv'),
|
|
||||||
dict(id='nnav-logo'),
|
|
||||||
dict(id='nnav-oly'),
|
|
||||||
dict(id='readcomment')]
|
dict(id='readcomment')]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
|
feeds = [ (u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml') ]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(re.compile('^a')):
|
|
||||||
item.extract()
|
|
||||||
match = re.compile(r"^Related")
|
match = re.compile(r"^Related")
|
||||||
for item in soup.findAll('b', text=match):
|
for item in soup.findAll('b', text=match):
|
||||||
item.extract()
|
item.extract()
|
||||||
for item in soup.findAll(re.compile('^li')):
|
|
||||||
item.extract()
|
|
||||||
for item in soup.findAll(re.compile('^ul')):
|
for item in soup.findAll(re.compile('^ul')):
|
||||||
item.extract()
|
item.extract()
|
||||||
for item in soup.find(re.compile('^br')):
|
|
||||||
item.extract()
|
|
||||||
for item in soup.findAll('br', limit=10):
|
for item in soup.findAll('br', limit=10):
|
||||||
item.extract()
|
item.extract()
|
||||||
return soup
|
return soup
|
||||||
@ -101,4 +95,3 @@ class Sueddeutsche(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user