Pull from trunk

2025-08-30 23:00:21 -04:00 · 2009-05-29 00:14:33 -07:00 · 2009-05-29 00:14:33 -07:00 · f004382336
commit f004382336
parent eb2d348103 857d114ab9
11 changed files with 108 additions and 63 deletions
--- a/2
+++ b/2
@ -352,7 +352,7 @@ License: other
 Liberation Fonts
 -----------------
 calibre includes a copy of the liberation fonts, available from
-https://fedorahosted.org/liberation-fonts
+https://calibre.kovidgoyal.net/downloads/liberation-fonts

 BSD License (for all the BSD licensed code indicated above)
 -----------------------------------------------------------
--- a/src/calibre/gui2/images/news/elektrolese.png
+++ b/src/calibre/gui2/images/news/elektrolese.png
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@ -39,6 +39,7 @@ def get_linux_data(version='1.0.0'):
                        ('debian', 'Debian Sid'),
                        ('exherbo', 'Exherbo'),
                        ('foresight', 'Foresight 2.1'),
+                        ('gentoo', 'Gentoo'),
                        ('ubuntu', 'Ubuntu Jaunty Jackalope'),
                        ('linux_mint', 'Linux Mint Gloria'),
                        ]:
--- a/src/calibre/trac/plugins/htdocs/images/gentoo_logo.png
+++ b/src/calibre/trac/plugins/htdocs/images/gentoo_logo.png
--- a/src/calibre/utils/help2man.py
+++ b/src/calibre/utils/help2man.py
@ -16,7 +16,7 @@ def create_man_page(prog, parser):
        else:
            usage[i] = line.replace('%prog', prog)
    lines = [
-             '.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') + 
+             '.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') +
             ' "%s (%s %s)" "%s"'%(prog, __appname__, __version__, __appname__),
             '.SH NAME',
             prog + r' \- part of '+__appname__,
@ -25,7 +25,7 @@ def create_man_page(prog, parser):
             '.SH DESCRIPTION',
             ]
    lines += usage[1:]
-    
+
    lines += [
              '.SH OPTIONS'
              ]
@ -39,7 +39,7 @@ def create_man_page(prog, parser):
        help = opt.help if opt.help else ''
        ans.append(help.replace('%prog', prog).replace('%default', str(opt.default)))
        return ans
-    
+
    for opt in parser.option_list:
        lines.extend(format_option(opt))
    for group in parser.option_groups:
@ -48,12 +48,15 @@ def create_man_page(prog, parser):
            lines.extend(['.PP', group.description])
        for opt in group.option_list:
            lines.extend(format_option(opt))
-    
-    lines += ['.SH SEE ALSO', 
+
+    lines += ['.SH SEE ALSO',
              'The User Manual is available at '
              'http://calibre.kovidgoyal.net/user_manual',
              '.PP', '.B Created by '+__author__]
-    
-    return  bz2.compress('\n'.join(lines))
+
+    lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
+            x in lines]
+
+    return  bz2.compress((u'\n'.join(lines)).encode('utf-8'))


--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -43,7 +43,7 @@ recipe_modules = ['recipe_' + r for r in (
           'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
           'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
           'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
-           'h1', 'h2', 'h3', 'phd_comics', 'woz_die',
+           'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_elektrolese.py
+++ b/src/calibre/web/feeds/recipes/recipe_elektrolese.py
@ -0,0 +1,35 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch elektrolese.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class elektrolese(BasicNewsRecipe):
+
+    title = u'elektrolese'
+    description = 'News about electronic publishing'
+    __author__ = 'Oliver Niesner'
+    use_embedded_content   = False
+    timefmt = ' [%a %d %b %Y]'
+    language = _('German')
+    oldest_article = 14
+    max_articles_per_feed = 50
+    no_stylesheets = True
+    #html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
+    encoding = 'utf-8'
+
+
+    remove_tags_after = [dict(id='comments')]
+    filter_regexps = [r'ad\.doubleclick\.net']
+
+    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
+                   dict(id='comments')]
+
+
+
+    feeds =  [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
+
--- a/src/calibre/web/feeds/recipes/recipe_heise.py
+++ b/src/calibre/web/feeds/recipes/recipe_heise.py
@ -8,7 +8,7 @@ Fetch heise.
 from calibre.web.feeds.news import BasicNewsRecipe


-class HeiseDe(BasicNewsRecipe):
+class heiseDe(BasicNewsRecipe):

    title = 'heise'
    description = 'Computernews from Germany'
@ -20,23 +20,26 @@ class HeiseDe(BasicNewsRecipe):
    no_stylesheets = True

    remove_tags = [dict(id='navi_top'),
-           dict(id='navi_bottom'),
-           dict(id='logo'),
-           dict(id='login_suche'),
-           dict(id='navi_login'),
-           dict(id='navigation'),
-           dict(id='breadcrumb'),
-           dict(id=''),
-           dict(id='sitemap'),
-           dict(id='bannerzone'),
-           dict(name='span', attrs={'class':'rsaquo'}),
-           dict(name='div', attrs={'class':'news_logo'}),
-           dict(name='p', attrs={'class':'news_option'}),
-           dict(name='p', attrs={'class':'news_navi'}),
-           dict(name='p', attrs={'class':'news_foren'})]
+		   dict(id='navi_bottom'),
+		   dict(id='logo'),
+		   dict(id='login_suche'),
+		   dict(id='navi_login'),
+		   dict(id='navigation'),
+		   dict(id='breadcrumb'),
+		   dict(id=''),
+		   dict(id='sitemap'),
+		   dict(id='bannerzone'),
+		   dict(name='span', attrs={'class':'rsaquo'}),
+		   dict(name='div', attrs={'class':'news_logo'}),
+		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
+		   dict(name='p', attrs={'class':'news_option'}),
+		   dict(name='p', attrs={'class':'news_navi'}),
+		   dict(name='p', attrs={'class':'news_foren'})]
    remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]

    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]



+
+
--- a/src/calibre/web/feeds/recipes/recipe_new_yorker.py
+++ b/src/calibre/web/feeds/recipes/recipe_new_yorker.py
@ -1,53 +1,57 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class NewYorker(BasicNewsRecipe):
-
-    title                 = u'The New Yorker'
+    title                 = 'The New Yorker'
    __author__            = 'Darko Miletic'
-    description           = 'The best of US journalism'
+    description           = 'The best of US journalism'    
    oldest_article        = 7
    language              = _('English')
    max_articles_per_feed = 100
-    no_stylesheets        = False
+    no_stylesheets        = True
    use_embedded_content  = False
-    extra_css = '''
-    .calibre_feed_list {font-size:xx-small}
-    .calibre_article_list {font-size:xx-small}
-    .calibre_feed_title {font-size:normal}
-    .calibre_recipe_title {font-size:normal}
-    .calibre_feed_description {font-size:xx-small}
-    '''
+    publisher             = 'Conde Nast Publications'
+    category              = 'news, politics, USA'
+    encoding              = 'cp1252'
+                    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 

-
-    keep_only_tags = [
-                        dict(name='div'  , attrs={'id':'printbody'   })
-                     ]
+    keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
+    remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
    remove_tags = [
-                     dict(name='div'  , attrs={'class':'utils'       })
-                    ,dict(name='div'  , attrs={'id':'bottomFeatures' })
-                    ,dict(name='div'  , attrs={'id':'articleBottom'  })
+                     dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
+                    ,dict(name='link')
                  ]

-    feeds          = [
-                        (u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
-                     ]
+    feeds          = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]

    def print_version(self, url):
        return url + '?printable=true'

+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
    def postprocess_html(self, soup, x):
        body = soup.find('body')
        if body:
            html = soup.find('html')
            if html:
                body.extract()
-                html.insert(-1, body)
+                html.insert(2, body)
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(1,mcharset)
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_sueddeutsche.py
+++ b/src/calibre/web/feeds/recipes/recipe_sueddeutsche.py
@ -9,15 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Sueddeutsche(BasicNewsRecipe):

-    title = u'S\xc3\xbcddeutsche'
+    title = u'S\xfcddeutsche'
    description = 'News from Germany'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
-    language = _('German')
    timefmt = ' [%d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 50
    no_stylesheets = True
+    language = _('German')
    encoding = 'iso-8859-15'
    remove_javascript = True

@ -89,3 +89,5 @@ class Sueddeutsche(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('/text/', '/text/print.html')

+
+
--- a/src/calibre/web/feeds/recipes/recipe_zdnet.py
+++ b/src/calibre/web/feeds/recipes/recipe_zdnet.py
@ -6,11 +6,10 @@ Fetch zdnet.
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-import re


 class cdnet(BasicNewsRecipe):
-    
+
    title = 'zdnet'
    description = 'zdnet security'
    __author__ = 'Oliver Niesner'
@ -19,16 +18,10 @@ class cdnet(BasicNewsRecipe):
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
-    encoding = 'iso-8859-1'
+    encoding = 'latin1'
+
+

-    #preprocess_regexps = \
-#	[(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-#		[
-#		(r'<84>', lambda match: ''),
-#		(r'<93>', lambda match: ''),
-#		]
-#	]
-    
    remove_tags = [dict(id='eyebrows'),
 		   dict(id='header'),
 		   dict(id='search'),
@ -36,12 +29,16 @@ class cdnet(BasicNewsRecipe):
 		   dict(id=''),
 		   dict(name='div', attrs={'class':'banner'}),
 		   dict(name='p', attrs={'class':'tags'}),
+		   dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
 		   dict(name='div', attrs={'class':'special1'})]
    remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})]
-    
-    feeds =  [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ] 
-    
+
+    feeds =  [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]


+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup