Pull from trunk

2025-07-09 03:04:10 -04:00 · 2009-03-03 17:29:39 -08:00 · 2009-03-03 17:29:39 -08:00 · 5be5277f32
commit 5be5277f32
parent 549e2b9efb 73af726c71
33 changed files with 13918 additions and 7267 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -465,7 +465,3 @@ if isosx:
    except:
        import traceback
        traceback.print_exc()
-                
-# Migrate from QSettings based config system
-from calibre.utils.config import migrate
-migrate()
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.141'
+__version__   = '0.4.142'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -845,7 +845,12 @@ class Processor(Parser):
            except:
                size = '3'
            if size and size.strip() and size.strip()[0] in ('+', '-'):
-                size = 3 + float(size) # Hack assumes basefont=3
+                size = re.search(r'[+-]{0,1}[\d\.]+', size)
+                try:
+                    size = float(size.group())
+                except:
+                    size = 0
+                size += 3 # Hack assumes basefont=3
            try:
                setting = 'font-size: %d%%;'%int((float(size)/3) * 100)
            except ValueError:
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -122,11 +122,15 @@ class UnBinary(object):
    OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
    CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
    DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
+    EMPTY_ATOMS = ({},{})
    
-    def __init__(self, bin, path, manifest={}, map=HTML_MAP):
+    def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
        self.is_html = map is HTML_MAP
+        self.tag_atoms, self.attr_atoms = atoms
+        self.opf = map is OPF_MAP
+        self.bin = bin
        self.dir = os.path.dirname(path)
        buf = StringIO()
        self.binary_to_text(bin, buf)
@ -205,7 +209,10 @@ class UnBinary(object):
                        state = 'get custom length'
                        continue
                    if flags & FLAG_ATOM:
-                        raise LitError('TODO: Atoms not yet implemented')
+                        if not self.tag_atoms or tag not in self.tag_atoms:
+                            raise LitError("atom tag %d not in atom tag list" % tag)
+                        tag_name = self.tag_atoms[tag]
+                        current_map = self.attr_atoms
                    elif tag < len(self.tag_map):
                        tag_name = self.tag_map[tag]
                        current_map = self.tag_to_attr_map[tag]
@ -804,6 +811,54 @@ class LitFile(object):
            raise LitError("Failed to completely decompress section")
        return ''.join(result)

+    def get_atoms(self, entry):
+        name = '/'.join(('/data', entry.internal, 'atom'))
+        if name not in self.entries:
+            return ({}, {})
+        data = self.get_file(name)
+        nentries, data = u32(data), data[4:]
+        tags = {}
+        for i in xrange(1, nentries + 1):
+            if len(data) <= 1:
+                break
+            size, data = ord(data[0]), data[1:]
+            if size == 0 or len(data) < size:
+                break
+            tags[i], data = data[:size], data[size:]
+        if len(tags) != nentries:
+            self._warn("damaged or invalid atoms tag table")
+        if len(data) < 4:
+            return (tags, {})
+        attrs = {}
+        nentries, data = u32(data), data[4:]
+        for i in xrange(1, nentries + 1):
+            if len(data) <= 4:
+                break
+            size, data = u32(data), data[4:]
+            if size == 0 or len(data) < size:
+                break
+            attrs[i], data = data[:size], data[size:]
+        if len(attrs) != nentries:
+            self._warn("damaged or invalid atoms attributes table")
+        return (tags, attrs)
+    
+    def get_entry_content(self, entry, pretty_print=False):
+        if 'spine' in entry.state:
+            name = '/'.join(('/data', entry.internal, 'content'))
+            path = entry.path
+            raw = self.get_file(name)
+            decl, map = (OPF_DECL, OPF_MAP) \
+                if name == '/meta' else (HTML_DECL, HTML_MAP)
+            atoms = self.get_atoms(entry)
+            content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
+            if pretty_print:
+                content = self._pretty_print(content)
+            content = content.encode('utf-8')
+        else:
+            internal = '/'.join(('/data', entry.internal))
+            content = self._litfile.get_file(internal)
+        return content
+ 

 class LitContainer(object):
    """Simple Container-interface, read-only accessor for LIT files."""
@ -826,11 +881,7 @@ class LitContainer(object):
            raw = self._litfile.get_file(internal)
            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
            content = HTML_DECL + str(unbin)
-        else:
-            internal = '/'.join(('/data', entry.internal))
-            content = self._litfile.get_file(internal)
-        return content
-    
+   
    def _read_meta(self):
        path = 'content.opf'
        raw = self._litfile.get_file('/meta')
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -39,13 +39,13 @@ def metadata_from_formats(formats):
            return mi2
    
    for path, ext in zip(formats, extensions):
-        stream = open(path, 'rb')
-        try:
-            mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
-        except:
-            continue
-        if getattr(mi, 'application_id', None) is not None:
-            return mi
+        with open(path, 'rb') as stream:
+            try:
+                mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
+            except:
+                continue
+            if getattr(mi, 'application_id', None) is not None:
+                return mi
    
    if not mi.title:
        mi.title = _('Unknown')
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -227,7 +227,7 @@ class CSSFlattener(object):
            items.sort()
            css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
            classes = node.get('class', None) or 'calibre'
-            klass = STRIPNUM.sub('', classes.split()[0])
+            klass = STRIPNUM.sub('', classes.split()[0].replace('_', ''))
            if css in styles:
                match = styles[css]
            else:
--- a/src/calibre/gui2/images/news/el_universal.png
+++ b/src/calibre/gui2/images/news/el_universal.png
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -32,7 +32,8 @@ recipe_modules = ['recipe_' + r for r in (
           'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
           'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
           'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
-           'lamujerdemivida', 'soldiers', 'theonion',
+           'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
+           'el_universal',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_el_universal.py
+++ b/src/calibre/web/feeds/recipes/recipe_el_universal.py
@ -0,0 +1,65 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+eluniversal.com.mx
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElUniversal(BasicNewsRecipe):
+    title                 = 'El Universal'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Mexico'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    publisher             = 'El Universal'
+    category              = 'news, politics, Mexico'    
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    remove_javascript     = True
+    language              = _('Spanish')
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+
+    remove_tags = [dict(name='link')]
+                            
+    feeds = [ 
+              (u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
+             ,(u'Mundo'            , u'http://www.eluniversal.com.mx/rss/mundo.xml'        )
+             ,(u'Mexico'           , u'http://www.eluniversal.com.mx/rss/mexico.xml'       )
+             ,(u'Estados'          , u'http://www.eluniversal.com.mx/rss/estados.xml'      )
+             ,(u'Finanzas'         , u'http://www.eluniversal.com.mx/rss/finanzas.xml'     )
+             ,(u'Deportes'         , u'http://www.eluniversal.com.mx/rss/deportes.xml'     )
+             ,(u'Espectaculos'     , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
+             ,(u'Cultura'          , u'http://www.eluniversal.com.mx/rss/cultura.xml'      )
+             ,(u'Ciencia'          , u'http://www.eluniversal.com.mx/rss/ciencia.xml'      )
+             ,(u'Computacion'      , u'http://www.eluniversal.com.mx/rss/computo.xml'      )
+             ,(u'Sociedad'         , u'http://www.eluniversal.com.mx/rss/sociedad.xml'     )
+            ]
+            
+    def print_version(self, url):
+        return url.replace('/notas/','/notas/vi_')
+
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(font=True):
+            del item['font']
+        for item in soup.findAll(face=True):
+            del item['face']
+        for item in soup.findAll(helvetica=True):
+            del item['helvetica']
+        return soup
+        
--- a/src/calibre/web/feeds/recipes/recipe_news_times.py
+++ b/src/calibre/web/feeds/recipes/recipe_news_times.py
@ -0,0 +1,28 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewsTimes(BasicNewsRecipe):
+    title                 = 'Newstimes'
+    __author__            = 'Darko Miletic'
+    description           = 'news from USA'
+    language              = _('English')
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    remove_javascript     = True
+
+    keep_only_tags = [
+                         dict(name='h1', attrs={'id':'articleTitle'})
+                        ,dict(name='div', attrs={'id':['articleByline','articleDate','articleBody']})
+                     ]
+    remove_tags = [
+                    dict(name=['object','link'])
+                   ,dict(name='div', attrs={'class':'articleEmbeddedAdBox'})
+                  ]
+
+    
+    feeds = [
+              (u'Latest news'    , u'http://feeds.newstimes.com/mngi/rss/CustomRssServlet/3/201071.xml' )
+            ]
+