Pull from trunk

This commit is contained in:
Kovid Goyal 2009-05-29 00:14:33 -07:00
commit f004382336
11 changed files with 108 additions and 63 deletions

View File

@ -352,7 +352,7 @@ License: other
Liberation Fonts
-----------------
calibre includes a copy of the liberation fonts, available from
https://fedorahosted.org/liberation-fonts
https://calibre.kovidgoyal.net/downloads/liberation-fonts
BSD License (for all the BSD licensed code indicated above)
-----------------------------------------------------------

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

@ -39,6 +39,7 @@ def get_linux_data(version='1.0.0'):
('debian', 'Debian Sid'),
('exherbo', 'Exherbo'),
('foresight', 'Foresight 2.1'),
('gentoo', 'Gentoo'),
('ubuntu', 'Ubuntu Jaunty Jackalope'),
('linux_mint', 'Linux Mint Gloria'),
]:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -16,7 +16,7 @@ def create_man_page(prog, parser):
else:
usage[i] = line.replace('%prog', prog)
lines = [
'.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') +
'.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') +
' "%s (%s %s)" "%s"'%(prog, __appname__, __version__, __appname__),
'.SH NAME',
prog + r' \- part of '+__appname__,
@ -25,7 +25,7 @@ def create_man_page(prog, parser):
'.SH DESCRIPTION',
]
lines += usage[1:]
lines += [
'.SH OPTIONS'
]
@ -39,7 +39,7 @@ def create_man_page(prog, parser):
help = opt.help if opt.help else ''
ans.append(help.replace('%prog', prog).replace('%default', str(opt.default)))
return ans
for opt in parser.option_list:
lines.extend(format_option(opt))
for group in parser.option_groups:
@ -48,12 +48,15 @@ def create_man_page(prog, parser):
lines.extend(['.PP', group.description])
for opt in group.option_list:
lines.extend(format_option(opt))
lines += ['.SH SEE ALSO',
lines += ['.SH SEE ALSO',
'The User Manual is available at '
'http://calibre.kovidgoyal.net/user_manual',
'.PP', '.B Created by '+__author__]
return bz2.compress('\n'.join(lines))
lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
x in lines]
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))

View File

@ -43,7 +43,7 @@ recipe_modules = ['recipe_' + r for r in (
'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
'h1', 'h2', 'h3', 'phd_comics', 'woz_die',
'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,35 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch elektrolese.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class elektrolese(BasicNewsRecipe):
title = u'elektrolese'
description = 'News about electronic publishing'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%a %d %b %Y]'
language = _('German')
oldest_article = 14
max_articles_per_feed = 50
no_stylesheets = True
#html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
encoding = 'utf-8'
remove_tags_after = [dict(id='comments')]
filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(id='comments')]
feeds = [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]

View File

@ -8,7 +8,7 @@ Fetch heise.
from calibre.web.feeds.news import BasicNewsRecipe
class HeiseDe(BasicNewsRecipe):
class heiseDe(BasicNewsRecipe):
title = 'heise'
description = 'Computernews from Germany'
@ -20,23 +20,26 @@ class HeiseDe(BasicNewsRecipe):
no_stylesheets = True
remove_tags = [dict(id='navi_top'),
dict(id='navi_bottom'),
dict(id='logo'),
dict(id='login_suche'),
dict(id='navi_login'),
dict(id='navigation'),
dict(id='breadcrumb'),
dict(id=''),
dict(id='sitemap'),
dict(id='bannerzone'),
dict(name='span', attrs={'class':'rsaquo'}),
dict(name='div', attrs={'class':'news_logo'}),
dict(name='p', attrs={'class':'news_option'}),
dict(name='p', attrs={'class':'news_navi'}),
dict(name='p', attrs={'class':'news_foren'})]
dict(id='navi_bottom'),
dict(id='logo'),
dict(id='login_suche'),
dict(id='navi_login'),
dict(id='navigation'),
dict(id='breadcrumb'),
dict(id=''),
dict(id='sitemap'),
dict(id='bannerzone'),
dict(name='span', attrs={'class':'rsaquo'}),
dict(name='div', attrs={'class':'news_logo'}),
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
dict(name='p', attrs={'class':'news_option'}),
dict(name='p', attrs={'class':'news_navi'}),
dict(name='p', attrs={'class':'news_foren'})]
remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]
feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]

View File

@ -1,53 +1,57 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
newyorker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NewYorker(BasicNewsRecipe):
title = u'The New Yorker'
title = 'The New Yorker'
__author__ = 'Darko Miletic'
description = 'The best of US journalism'
description = 'The best of US journalism'
oldest_article = 7
language = _('English')
max_articles_per_feed = 100
no_stylesheets = False
no_stylesheets = True
use_embedded_content = False
extra_css = '''
.calibre_feed_list {font-size:xx-small}
.calibre_article_list {font-size:xx-small}
.calibre_feed_title {font-size:normal}
.calibre_recipe_title {font-size:normal}
.calibre_feed_description {font-size:xx-small}
'''
publisher = 'Conde Nast Publications'
category = 'news, politics, USA'
encoding = 'cp1252'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='div' , attrs={'id':'printbody' })
]
keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
remove_tags = [
dict(name='div' , attrs={'class':'utils' })
,dict(name='div' , attrs={'id':'bottomFeatures' })
,dict(name='div' , attrs={'id':'articleBottom' })
dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
,dict(name='link')
]
feeds = [
(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
]
feeds = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
def print_version(self, url):
return url + '?printable=true'
def get_article_url(self, article):
return article.get('guid', None)
def postprocess_html(self, soup, x):
body = soup.find('body')
if body:
html = soup.find('html')
if html:
body.extract()
html.insert(-1, body)
html.insert(2, body)
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(1,mcharset)
return soup

View File

@ -9,15 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe):
title = u'S\xc3\xbcddeutsche'
title = u'S\xfcddeutsche'
description = 'News from Germany'
__author__ = 'Oliver Niesner'
use_embedded_content = False
language = _('German')
timefmt = ' [%d %b %Y]'
oldest_article = 7
max_articles_per_feed = 50
no_stylesheets = True
language = _('German')
encoding = 'iso-8859-15'
remove_javascript = True
@ -89,3 +89,5 @@ class Sueddeutsche(BasicNewsRecipe):
def print_version(self, url):
return url.replace('/text/', '/text/print.html')

View File

@ -6,11 +6,10 @@ Fetch zdnet.
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class cdnet(BasicNewsRecipe):
title = 'zdnet'
description = 'zdnet security'
__author__ = 'Oliver Niesner'
@ -19,16 +18,10 @@ class cdnet(BasicNewsRecipe):
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 40
no_stylesheets = True
encoding = 'iso-8859-1'
encoding = 'latin1'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='eyebrows'),
dict(id='header'),
dict(id='search'),
@ -36,12 +29,16 @@ class cdnet(BasicNewsRecipe):
dict(id=''),
dict(name='div', attrs={'class':'banner'}),
dict(name='p', attrs={'class':'tags'}),
dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
dict(name='div', attrs={'class':'special1'})]
remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})]
feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]
feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup