Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-10-08 18:43:52 +01:00 · 2010-10-08 18:43:52 +01:00 · 5cc1b67952
commit 5cc1b67952
parent bdfb1cf643 ef070d0e51
8 changed files with 150 additions and 32 deletions
--- a/resources/recipes/nightflier.recipe
+++ b/resources/recipes/nightflier.recipe
@ -0,0 +1,46 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 nightfliersbookspace.blogspot.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class NightfliersBookspace(BasicNewsRecipe):
    title                 = "Nightflier's Bookspace"
    __author__            = 'Darko Miletic'
    description           = 'SF, Fantasy, Books, Knjige'    
    oldest_article        = 35
    max_articles_per_feed = 100
    language              = 'sr'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = True
    publication_type      = 'blog'
    cover_url             = ''    
    extra_css             = """ 
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
                                body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} 
                                .article_description{font-family: sans1, sans-serif} 
                                img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } 
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : 'SF, fantasy, prevod, blog, Srbija'
                        , 'publisher': 'Ivan Jovanovic'
                        , 'language' : language
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    feeds = [(u'Posts', u'http://nightfliersbookspace.blogspot.com/feeds/posts/default')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/the_age.recipe
+++ b/resources/recipes/the_age.recipe
@ -9,15 +9,19 @@ theage.com.au
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
+import re
 class TheAge(BasicNewsRecipe):
    title            = 'The Age'
    description      = 'Business News, World News and Breaking News in Melbourne, Australia'
    publication_type = 'newspaper'
    __author__       = 'Matthew Briggs'
    language         = 'en_AU'
    max_articles_per_feed = 1000
    recursions        = 0
    remove_tags       = [dict(name=['table', 'script', 'noscript', 'style']), dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/text/'})]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -28,22 +32,22 @@ class TheAge(BasicNewsRecipe):
        soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
-        feeds, articles = [], []
+        section = None
-        feed = None
+        sections = {}
        for tag in soup.findAll(['h3', 'a']):
            if tag.name == 'h3':
-                if articles:
+                section = self.tag_to_string(tag)
-                    feeds.append((feed, articles))
+                sections[section] = []
-                    articles = []
+
-                feed = self.tag_to_string(tag)
+            # Make sure to skip: <a href="/">TheAge</a>
-            elif feed is not None and tag.has_key('href') and tag['href'].strip():
+
            elif section and tag.has_key('href') and len(tag['href'].strip())>1:
                url = tag['href'].strip()
                if url.startswith('/'):
                    url = 'http://www.theage.com.au' + url
                title = self.tag_to_string(tag)
-                articles.append({
+                sections[section].append({
                                 'title': title,
                                 'url'  : url,
                                 'date' : strftime('%a, %d %b'),
@ -51,7 +55,58 @@ class TheAge(BasicNewsRecipe):
                                 'content'     : '',
                                 })
        feeds = []
        # Insert feeds in specified order, if available
        feedSort = [ 'National', 'World', 'Opinion', 'Columns', 'Business', 'Sport', 'Entertainment' ]
        for i in feedSort:
          if i in sections:
            feeds.append((i,sections[i]))
        # Done with the sorted feeds
        for i in feedSort:
          del sections[i]
        # Append what is left over...
        for i in sections:
          feeds.append((i,sections[i]))
        return feeds
    def get_cover_url(self):
        soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/todays-paper').read())
        for i in soup.findAll('a'):
          href = i['href']
          if href and re.match('http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf',href):
            return href
        return None
    def preprocess_html(self,soup):
        for p in soup.findAll('p'):
          # Collapse the paragraph by joining the non-tag contents
          contents = [i for i in p.contents if isinstance(i,unicode)]
          if len(contents):
            contents = ''.join(contents)
            # Filter out what's left of the text-mode navigation stuff
            if re.match('((\s)|(\&nbsp\;))*\[[\|\s*]*\]((\s)|(\&nbsp\;))*$',contents):
              p.extract()
              continue
            # Shrink the fine print font 
            if contents=='This material is subject to copyright and any unauthorised use, copying or mirroring is prohibited.':
              p['style'] = 'font-size:small'
              continue        
        return soup
--- a/resources/recipes/the_oz.recipe
+++ b/resources/recipes/the_oz.recipe
@ -16,7 +16,7 @@ class DailyTelegraph(BasicNewsRecipe):
    language = 'en_AU'
    oldest_article = 2
-    max_articles_per_feed = 20
+    max_articles_per_feed = 30
    remove_javascript      = True
    no_stylesheets         = True
    encoding               = 'utf8'
@ -50,20 +50,22 @@ class DailyTelegraph(BasicNewsRecipe):
    feeds = [       (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
                    (u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'),
                    (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
                    (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'),
                    (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'),
                    (u'The Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'),
                    (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
                    (u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'),
-                    (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'),
+                    (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),
-                    (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'),
+                    (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml'),
                    (u'Defence', u'http://feeds.news.com.au/public/rss/2.0/aus_defence_54.xml'),
                    (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
                    (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml'),
                    (u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'),
                    (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'),
                    (u'IT', u'http://feeds.news.com.au/public/rss/2.0/ausit_itnews_topstories_367.xml'),
                    (u'Exec Tech', u'http://feeds.news.com.au/public/rss/2.0/ausit_exec_topstories_385.xml'),
                    (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'),
                    (u'Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'),
                    (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'),
                    (u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'),
-                    (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml')]
+                    (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'),
                    (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
                    (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
                    (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
                    (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
    def get_article_url(self, article):
        return article.id
--- a/resources/recipes/wikinews_en.recipe
+++ b/resources/recipes/wikinews_en.recipe
@ -55,6 +55,9 @@ class WikiNews(BasicNewsRecipe):
        rest, sep, article_id  = url.rpartition('/')
        return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes'
    def get_cover_url(self):
        return 'http://upload.wikimedia.org/wikipedia/commons/b/bd/Wikinews-logo-en.png'
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="en"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        soup.head.insert(0,mtag)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -461,7 +461,7 @@ from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
@ -574,6 +574,7 @@ plugins += [
    SPECTRA,
    GEMEI,
    VELOCITYMICRO,
    PDNOVEL_KOBO,
    ITUNES,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -108,6 +108,16 @@ class PDNOVEL(USBMS):
            with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile:
                coverfile.write(coverdata[2])
 class PDNOVEL_KOBO(PDNOVEL):
    name = 'Pandigital Kobo device interface'
    gui_name = 'PD Novel (Kobo)'
    description = _('Communicate with the Pandigital Novel')
    BCD         = [0x222]
    EBOOK_DIR_MAIN = 'eBooks/Kobo'
 class VELOCITYMICRO(USBMS):
    name = 'VelocityMicro device interface'
    gui_name = 'VelocityMicro'
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -189,7 +189,7 @@ class MobiMLizer(object):
                para = wrapper
                emleft = int(round(left / self.profile.fbase)) - ems
                emleft = min((emleft, 10))
-                while emleft > 0:
+                while emleft > ems/2.0:
                    para = etree.SubElement(para, XHTML('blockquote'))
                    emleft -= ems
            else:
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -985,7 +985,8 @@ def command_restore_database(args, dbpath):
        return 1
    if not opts.really_do_it:
-        print _('You must provide the --really-do-it option to do a recovery\n')
+        prints(_('You must provide the --really-do-it option to do a'
            ' recovery'), end='\n\n')
        parser.print_help()
        return 1