merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-05-24 20:40:14 +08:00 · 2012-05-24 20:40:14 +08:00 · b74dbab58e
commit b74dbab58e
parent 364c5cf492 45b96807a3
87 changed files with 37423 additions and 34210 deletions
--- a/recipes/attac_es.recipe
+++ b/recipes/attac_es.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AttacEspanaRecipe (BasicNewsRecipe):
+   __author__ = u'Marc Busqué'
+   __url__ = 'http://www.lamarciana.com'
+   __version__ = '1.0'
+   __license__   = 'GPL v3'
+   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
+   title = u'attac.es'
+   description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.'
+   url = 'http://www.attac.es'
+   language = 'es'
+   tags = 'contrainformación, información alternativa'
+   oldest_article = 7
+   remove_empty_feeds = True
+   no_stylesheets = True
+   cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg'
+
+   feeds = [
+         (u'Attac', u'http://www.attac.es/feed'),
+         ]
--- a/recipes/bwmagazine.recipe
+++ b/recipes/bwmagazine.recipe
@ -15,6 +15,7 @@ class BusinessWeek(BasicNewsRecipe):
    oldest_article        = 7
    max_articles_per_feed = 200
    no_stylesheets        = True
+    auto_cleanup = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
@ -36,12 +37,12 @@ class BusinessWeek(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_tags       = [
-                           dict(attrs={'class':'inStory'})
-                          ,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
-                          ,dict(attrs={'id':['inset','videoDisplay']})
-                        ]
-    keep_only_tags    = [dict(name='div', attrs={'id':['story-body','storyBody']})]
+    #remove_tags       = [
+                           #dict(attrs={'class':'inStory'})
+                          #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
+                          #,dict(attrs={'id':['inset','videoDisplay']})
+                        #]
+    #keep_only_tags    = [dict(name='div', attrs={'id':['story-body','storyBody']})]
    remove_attributes = ['lang']
    match_regexps     = [r'http://www.businessweek.com/.*_page_[1-9].*']

@ -100,3 +101,4 @@ class BusinessWeek(BasicNewsRecipe):
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
+ 
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -34,6 +34,8 @@ from BeautifulSoup import BeautifulSoup
    Added new feeds
    Updated css
    Changed order of regex to speedup proces
+ Version 1.9.3 23-05-2012
+    Updated Cover image
 '''

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
@ -51,7 +53,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
-    cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
+    cover_url = 'http://www.readmetro.com/en/holland/metro-holland/image/large/last/'
    publication_type = 'newspaper'
    encoding = 'utf-8'
    remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']
--- a/session.vim
+++ b/session.vim
@ -6,7 +6,7 @@ fun! CalibreLog()
    " making a release.
    enew
    read ! bzr log -l 500
-    set nomodifiable noswapfile buftype=nofile
+    setl nomodifiable noswapfile buftype=nofile
    edit Changelog.yaml
    edit src/calibre/constants.py
 endfun
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -626,7 +626,10 @@ class HTMLPreProcessor(object):
        if getattr(self.extra_opts, 'smarten_punctuation', False):
            html = self.smarten_punctuation(html)

-        unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
+        try:
+            unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
+        except AttributeError:
+            unsupported_unicode_chars = u''
        if unsupported_unicode_chars:
            from calibre.utils.localization import get_udc
            unihandecoder = get_udc()
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@ -120,13 +120,12 @@ class OEB2HTML(object):
                el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]

    def get_css(self, oeb_book):
-        css = u''
+        css = b''
        for item in oeb_book.manifest:
            if item.media_type == 'text/css':
-                css = item.data.cssText
-                break
+                css += item.data.cssText + b'\n\n'
        return css
-    
+
    def prepare_string_for_html(self, raw):
        raw = prepare_string_for_xml(raw)
        raw = raw.replace(u'\u00ad', '&shy;')
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -377,6 +377,7 @@ class MOBIHeader(object): # {{{

        a('Compression: %s'%self.compression)
        a('Unused: %r'%self.unused)
+        a('Text length: %d'%self.text_length)
        a('Number of text records: %d'%self.number_of_text_records)
        a('Text record size: %d'%self.text_record_size)
        a('Encryption: %s'%self.encryption_type)
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -411,6 +411,7 @@ class MobiWriter(object):
        header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
        header_fields['flis_record'] = flis_number
        header_fields['fcis_record'] = fcis_number
+        header_fields['text_length'] = self.text_length
        extra_data_flags = 0b1 # Has multibyte overlap bytes
        if self.primary_index_record_idx is not None:
            extra_data_flags |= 0b10
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -42,7 +42,9 @@ class Tweak(object): # {{{
    def __init__(self, name, doc, var_names, defaults, custom):
        translate = _
        self.name = translate(name)
-        self.doc = translate(doc.strip())
+        self.doc = doc.strip()
+        if self.doc:
+            self.doc = translate(self.doc)
        self.var_names = var_names
        self.default_values = {}
        for x in var_names:
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/si.po
+++ b/src/calibre/translations/si.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/ur.po
+++ b/src/calibre/translations/ur.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/wa.po
+++ b/src/calibre/translations/wa.po
--- a/src/calibre/translations/yi.po
+++ b/src/calibre/translations/yi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_HK.po
+++ b/src/calibre/translations/zh_HK.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/smartypants.py
+++ b/src/calibre/utils/smartypants.py
@ -597,15 +597,17 @@ def educateQuotes(str):
 	str = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", str)
 	str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str)
 	str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str)
-	
+
+    # The following are commented out as smartypants tokenizes text by
+    # stripping out html tags. Therefore, there is no guarantee that the
+    # start-of-line and end-ol-line regex operators will match anything
+    # meaningful
+
 	# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
-	str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
-	str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
-	
+	#str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
+	#str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
+
 	# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
-	# This case commented out as it works under the assumption that the regex pattern will always
-	# evaluate a complete sentence - the tokenize function called earlier in smarty will break up
-	# the text based on tags, so sentence fragments can be passed to the patterns as well.
 	#str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str)
 	#str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str)

--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -854,9 +854,24 @@ ol, ul { padding-left: 2em; }
            else:
                css_styles[css2] = [name]

+        def filter_margins(css2):
+            names = { k for k, v in css2 }
+            ignore = set()
+            if {'margin-left', 'margin-right', 'margin-top',
+                    'margin-bottom'}.issubset(names):
+                # These come from XML and we cannot preserve XML attribute
+                # order so we assume that margin is to be overridden See
+                # https://bugs.launchpad.net/calibre/+bug/941134 and
+                # https://bugs.launchpad.net/calibre/+bug/1002702
+                ignore.add('margin')
+            css2 = sorted(css2, key=lambda x:{'margin':0}.get(x[0], 1))
+            for k, v in css2:
+                if k not in ignore:
+                    yield k, v
+
        for css2, names in css_styles.iteritems():
            self.writeout("%s {\n" % ', '.join(names))
-            for style, val in css2:
+            for style, val in filter_margins(css2):
                self.writeout("\t%s: %s;\n" % (style, val) )
            self.writeout("}\n")

@ -941,20 +956,8 @@ ol, ul { padding-left: 2em; }
        if self.currentstyle is None: # Added by Kovid
            return

-        # Added by Kovid
-        names = {x[1]:x for x in attrs.iterkeys()}
-        ignore_keys = set()
-        if ('margin' in names and 'margin-top' in names and 'margin-left' in
-                names and 'margin-right' in names and 'margin-bottom' in
-                names):
-            # These come from XML and we cannot preserve XML attribute order so
-            # we assume that margin is to be overridden
-            # See https://bugs.launchpad.net/calibre/+bug/941134
-            ignore_keys.add(names['margin'])
-
        for key,attr in attrs.items():
-            if key not in ignore_keys:
-                self.styledict[self.currentstyle][key] = attr
+            self.styledict[self.currentstyle][key] = attr


    familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation',