Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-11 10:44:40 +00:00 · 2011-02-11 10:44:40 +00:00 · 914ddaae86
commit 914ddaae86
parent 83cde4af65 c21d927caa
12 changed files with 993 additions and 645 deletions
--- a/resources/recipes/el_periodico.recipe
+++ b/resources/recipes/el_periodico.recipe
@ -5,8 +5,8 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.05'
+__version__     = 'v0.07'
-__date__        = '07, December 2010'
+__date__        = '06, February 2011'
 '''
 elperiodicodearagon.com
 '''
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-    feeds              = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+    feeds              = [
-                          (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
+                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
-                          (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
+                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
-                          (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
+                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
-                          (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
+                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
-                          (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
+                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
-                          (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
+                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
-                          (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
+                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
-                          (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
+                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
-                          (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
+                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
                         ]
    extra_css = '''
-                    h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
+                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
-                    dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
 		'''
    remove_attributes = ['height','width']
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               video_yt['src'] = fuente2 + '/0.jpg'
        return soup
--- a/resources/recipes/tedneward.recipe
+++ b/resources/recipes/tedneward.recipe
@ -0,0 +1,33 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blogs.tedneward.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class InteroperabilityHappens(BasicNewsRecipe):
    title                 = 'Interoperability Happens'
    __author__            = 'Darko Miletic'
    description           = 'Tech blog by Ted Neward'
    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = True
    publication_type      = 'blog'
    extra_css             = """
                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : 'blog, technology, microsoft, programming, C#, Java'
                        , 'publisher': 'Ted Neward'
                        , 'language' : language
                        }
    feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
--- a/resources/recipes/weblogs_sl.recipe
+++ b/resources/recipes/weblogs_sl.recipe
@ -0,0 +1,104 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __version__     = 'v0.05'
 __date__        = '9, February 2011'
 '''
 http://www.weblogssl.com/
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class weblogssl(BasicNewsRecipe):
    __author__     = 'desUBIKado'
    description    = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
    title          = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
    publisher      = 'Weblogs SL'
    category       = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1.5
    max_articles_per_feed = 100
    encoding       = 'utf-8'
    use_embedded_content  = False
    remove_empty_feeds    = True
    remove_javascript = True
    no_stylesheets = True
    # Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
    # un caracter # por delante, es decir,  # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
    # haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
    feeds              = [
                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
                          (u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
                          (u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
                          (u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
                          (u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
                          (u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
                          (u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
                          (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
                          (u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
                          (u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
                          (u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
                          (u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
                          (u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
                          (u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
                          (u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
                          (u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
                          (u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
                          (u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
                          (u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
                          (u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
                          (u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
                          (u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
                          (u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
                          (u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
                          (u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
                          (u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
                          (u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
                          (u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
                          (u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
                          (u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
                          (u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
                          (u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
                          (u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
                          (u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
                          (u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
                          (u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
                          (u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
                          (u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
                          (u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
                          (u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
                         ]
    keep_only_tags     = [dict(name='div', attrs={'id':'infoblock'}),
                          dict(name='div', attrs={'class':'post'}),
                          dict(name='div', attrs={'id':'blog-comments'})
                         ]
    remove_tags        = [dict(name='div', attrs={'id':'comment-nav'})]
    def print_version(self, url):
          return url.replace('http://www.', 'http://m.')
    preprocess_regexps = [
                            # Para poner una linea en blanco entre un comentario y el siguiente
                           (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
                         ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               fuente3 = fuente2.replace('?rel=0','')
               video_yt['src'] = fuente3 + '/0.jpg'
        return soup
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -83,7 +83,7 @@ class ANDROID(USBMS):
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
-            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
+            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT']
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -39,6 +39,13 @@ def asfloat(value):
        return 0.0
    return float(value)
 def isspace(text):
    if not text:
        return True
    if u'\xa0' in text:
        return False
    return text.isspace()
 class BlockState(object):
    def __init__(self, body):
        self.body = body
@ -438,7 +445,7 @@ class MobiMLizer(object):
        if elem.text:
            if istate.preserve:
                text = elem.text
-            elif len(elem) > 0 and elem.text.isspace():
+            elif len(elem) > 0 and isspace(elem.text):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
@ -481,7 +488,7 @@ class MobiMLizer(object):
            if child.tail:
                if istate.preserve:
                    tail = child.tail
-                elif bstate.para is None and child.tail.isspace():
+                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
        'r': ('<div style="text-align: right;">', '</div>'),
        't': ('<div style="margin-left: 5%;">', '</div>'),
-        'T': ('<div style="margin-left: %s;">', '</div>'),
+        'T': ('<div style="text-indent: %s;">', '</div>'),
        'i': ('<span style="font-style: italic;">', '</span>'),
        'u': ('<span style="text-decoration: underline;">', '</span>'),
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
        self.toc = []
        self.file_name = file_name
-        indent_state = {'t': False, 'T': False}
+        # t: Are we in an open \t tag set?
        # T: Are we in an open \T?
        # st: Did the \t start the line?
        # sT: Did the \T start the line?
        # et: Did the \t end the line?
        indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
        basic_indent = False
        adv_indent_val = ''
        # Keep track of the number of empty lines
        # between paragraphs. When we reach a set number
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
        for line in pml.splitlines():
            parsed = []
            empty = True
            basic_indent = indent_state['t']
-            adv_indent = indent_state['T']
+            indent_state['T'] = False
            # Determine if the \t starts the line or if we are
            # in an open \t block.
            if line.lstrip().startswith('\\t') or basic_indent:
                basic_indent = True
                indent_state['st'] = True
            else:
                indent_state['st'] = False
            # Determine if the \T starts the line.
            if line.lstrip().startswith('\\T'):
                indent_state['sT'] = True
            else:
                indent_state['sT'] = False
            # Determine if the \t ends the line.
            if line.rstrip().endswith('\\t'):
                indent_state['et'] = True
            else:
                indent_state['et'] = False
            # Must use StringIO, cStringIO does not support unicode
            line = StringIO.StringIO(line)
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
                        empty = False
                        text = '<hr width="%s" />' % self.code_value(line)
                    elif c == 't':
-                        indent_state[c] = not indent_state[c]
+                        indent_state['t'] = not indent_state['t']
                        if indent_state[c]:
                            basic_indent = True
                    elif c == 'T':
                        # Ensure we only store the value on the first T set for the line.
                        if not indent_state['T']:
                            adv_indent = True
                            adv_indent_val = self.code_value(line)
                        else:
                            # We detected a T previously on this line.
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
                text = self.end_line()
                parsed.append(text)
                # Basic indent will be set if the \t starts the line or
                # if we are in a continuing \t block.
                if basic_indent:
-                    parsed.insert(0, self.STATES_TAGS['t'][0])
+                    # if the \t started the line and either it ended the line or the \t
-                    parsed.append(self.STATES_TAGS['t'][1])
+                    # block is still open use a left margin.
-                elif adv_indent:
+                    if indent_state['st'] and (indent_state['et'] or indent_state['t']):
                        parsed.insert(0, self.STATES_TAGS['t'][0])
                        parsed.append(self.STATES_TAGS['t'][1])
                    # Use a text indent instead of a margin.
                    # This handles cases such as:
                    # \tO\tne upon a time...
                    else:
                        parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
                        parsed.append(self.STATES_TAGS['T'][1])
                # \t will override \T's on the line.
                # We only handle \T's that started the line.
                elif indent_state['T'] and indent_state['sT']:
                    parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
                    parsed.append(self.STATES_TAGS['T'][1])
                    indent_state['T'] = False
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -227,8 +227,12 @@ class PluginTweaks(QDialog): # {{{
        self.highlighter = PythonHighlighter(self.edit.document())
        self.l = QVBoxLayout()
        self.setLayout(self.l)
-        self.l.addWidget(QLabel(
+        self.msg = QLabel(
-            _('Add/edit tweaks for any custom plugins you have installed.')))
+            _('Add/edit tweaks for any custom plugins you have installed. '
                'Documentation for these tweaks should be available '
                'on the website from where you downloaded the plugins.'))
        self.msg.setWordWrap(True)
        self.l.addWidget(self.msg)
        self.l.addWidget(self.edit)
        self.edit.setPlainText(raw)
        self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
    @property
    def height(self):
-        j = self.javascript('document.body.offsetHeight', 'int')
+        # Note that document.body.offsetHeight does not include top and bottom
        # margins on body and in some cases does not include the top margin on
        # the first element inside body either. See ticket #8791 for an example
        # of the latter.
        q = self.mainFrame().contentsSize().height()
-        if q == j:
+        if q < 0:
-            return j
+            # Don't know if this is still needed, but it can't hurt
-        if min(j, q) <= 0:
+            j = self.javascript('document.body.offsetHeight', 'int')
-            return max(j, q)
+            if j >= 0:
-        window_height = self.window_height
+                q = j
-        if j == window_height:
+        return q
            return j if q < 1.2*j else q
        return j
    @property
    def width(self):
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
 Convert Microsoft Word documents
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
+|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
 as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
-"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
+"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
 produces really messy HTML, converting it can take a long time, so be patient.
 There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
 generating the Table of Contents much simpler. It is called BookCreator and is available for free
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -8,11 +8,13 @@ import re, htmlentitydefs
 _ascii_pat = None
 def clean_ascii_chars(txt, charlist=None):
-    'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
+    '''
    Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
    This is all control chars except \\t,\\n and \\r
    '''
    global _ascii_pat
    if _ascii_pat is None:
-        chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
+        chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
            + [0x1A, 0x1B]
        _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
    if charlist is None:
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
 from calibre.utils.logging import default_log
 from calibre import entity_to_unicode, strftime
 from calibre.utils.date import dt_factory, utcnow, local_tz
 from calibre.utils.cleantext import clean_ascii_chars
 class Article(object):
@ -43,7 +44,7 @@ class Article(object):
                print summary.encode('utf-8')
                traceback.print_exc()
                summary = u''
-        self.text_summary = summary
+        self.text_summary = clean_ascii_chars(summary)
        self.author = author
        self.content = content
        self.date = published