Pull from trunk

This commit is contained in:
Kovid Goyal 2009-05-29 00:14:33 -07:00
commit f004382336
11 changed files with 108 additions and 63 deletions

View File

@ -352,7 +352,7 @@ License: other
Liberation Fonts Liberation Fonts
----------------- -----------------
calibre includes a copy of the liberation fonts, available from calibre includes a copy of the liberation fonts, available from
https://fedorahosted.org/liberation-fonts https://calibre.kovidgoyal.net/downloads/liberation-fonts
BSD License (for all the BSD licensed code indicated above) BSD License (for all the BSD licensed code indicated above)
----------------------------------------------------------- -----------------------------------------------------------

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

@ -39,6 +39,7 @@ def get_linux_data(version='1.0.0'):
('debian', 'Debian Sid'), ('debian', 'Debian Sid'),
('exherbo', 'Exherbo'), ('exherbo', 'Exherbo'),
('foresight', 'Foresight 2.1'), ('foresight', 'Foresight 2.1'),
('gentoo', 'Gentoo'),
('ubuntu', 'Ubuntu Jaunty Jackalope'), ('ubuntu', 'Ubuntu Jaunty Jackalope'),
('linux_mint', 'Linux Mint Gloria'), ('linux_mint', 'Linux Mint Gloria'),
]: ]:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -54,6 +54,9 @@ def create_man_page(prog, parser):
'http://calibre.kovidgoyal.net/user_manual', 'http://calibre.kovidgoyal.net/user_manual',
'.PP', '.B Created by '+__author__] '.PP', '.B Created by '+__author__]
return bz2.compress('\n'.join(lines)) lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
x in lines]
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))

View File

@ -43,7 +43,7 @@ recipe_modules = ['recipe_' + r for r in (
'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms', 'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews', 'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts', 'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,35 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch elektrolese.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class elektrolese(BasicNewsRecipe):
title = u'elektrolese'
description = 'News about electronic publishing'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%a %d %b %Y]'
language = _('German')
oldest_article = 14
max_articles_per_feed = 50
no_stylesheets = True
#html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
encoding = 'utf-8'
remove_tags_after = [dict(id='comments')]
filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(id='comments')]
feeds = [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]

View File

@ -8,7 +8,7 @@ Fetch heise.
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HeiseDe(BasicNewsRecipe): class heiseDe(BasicNewsRecipe):
title = 'heise' title = 'heise'
description = 'Computernews from Germany' description = 'Computernews from Germany'
@ -31,6 +31,7 @@ class HeiseDe(BasicNewsRecipe):
dict(id='bannerzone'), dict(id='bannerzone'),
dict(name='span', attrs={'class':'rsaquo'}), dict(name='span', attrs={'class':'rsaquo'}),
dict(name='div', attrs={'class':'news_logo'}), dict(name='div', attrs={'class':'news_logo'}),
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
dict(name='p', attrs={'class':'news_option'}), dict(name='p', attrs={'class':'news_option'}),
dict(name='p', attrs={'class':'news_navi'}), dict(name='p', attrs={'class':'news_navi'}),
dict(name='p', attrs={'class':'news_foren'})] dict(name='p', attrs={'class':'news_foren'})]
@ -40,3 +41,5 @@ class HeiseDe(BasicNewsRecipe):

View File

@ -1,53 +1,57 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
newyorker.com newyorker.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NewYorker(BasicNewsRecipe): class NewYorker(BasicNewsRecipe):
title = 'The New Yorker'
title = u'The New Yorker'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'The best of US journalism' description = 'The best of US journalism'
oldest_article = 7 oldest_article = 7
language = _('English') language = _('English')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = True
use_embedded_content = False use_embedded_content = False
extra_css = ''' publisher = 'Conde Nast Publications'
.calibre_feed_list {font-size:xx-small} category = 'news, politics, USA'
.calibre_article_list {font-size:xx-small} encoding = 'cp1252'
.calibre_feed_title {font-size:normal}
.calibre_recipe_title {font-size:normal}
.calibre_feed_description {font-size:xx-small}
'''
html2lrf_options = [
keep_only_tags = [ '--comment', description
dict(name='div' , attrs={'id':'printbody' }) , '--category', category
, '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':'utils' }) dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
,dict(name='div' , attrs={'id':'bottomFeatures' }) ,dict(name='link')
,dict(name='div' , attrs={'id':'articleBottom' })
] ]
feeds = [ feeds = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
]
def print_version(self, url): def print_version(self, url):
return url + '?printable=true' return url + '?printable=true'
def get_article_url(self, article):
return article.get('guid', None)
def postprocess_html(self, soup, x): def postprocess_html(self, soup, x):
body = soup.find('body') body = soup.find('body')
if body: if body:
html = soup.find('html') html = soup.find('html')
if html: if html:
body.extract() body.extract()
html.insert(-1, body) html.insert(2, body)
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(1,mcharset)
return soup return soup

View File

@ -9,15 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe): class Sueddeutsche(BasicNewsRecipe):
title = u'S\xc3\xbcddeutsche' title = u'S\xfcddeutsche'
description = 'News from Germany' description = 'News from Germany'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
language = _('German')
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 50 max_articles_per_feed = 50
no_stylesheets = True no_stylesheets = True
language = _('German')
encoding = 'iso-8859-15' encoding = 'iso-8859-15'
remove_javascript = True remove_javascript = True
@ -89,3 +89,5 @@ class Sueddeutsche(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('/text/', '/text/print.html') return url.replace('/text/', '/text/print.html')

View File

@ -6,7 +6,6 @@ Fetch zdnet.
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class cdnet(BasicNewsRecipe): class cdnet(BasicNewsRecipe):
@ -19,15 +18,9 @@ class cdnet(BasicNewsRecipe):
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
encoding = 'iso-8859-1' encoding = 'latin1'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='eyebrows'), remove_tags = [dict(id='eyebrows'),
dict(id='header'), dict(id='header'),
@ -36,12 +29,16 @@ class cdnet(BasicNewsRecipe):
dict(id=''), dict(id=''),
dict(name='div', attrs={'class':'banner'}), dict(name='div', attrs={'class':'banner'}),
dict(name='p', attrs={'class':'tags'}), dict(name='p', attrs={'class':'tags'}),
dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
dict(name='div', attrs={'class':'special1'})] dict(name='div', attrs={'class':'special1'})]
remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})] remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})]
feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ] feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup