Sync to trunk

2025-07-09 03:04:10 -04:00 · 2009-02-26 17:07:14 -05:00 · 2009-02-26 17:07:14 -05:00 · 4d21dd9fe2
commit 4d21dd9fe2
parent a1c6108b72 46c7759e88
12 changed files with 199 additions and 22 deletions
--- a/installer/linux/freeze.py
+++ b/installer/linux/freeze.py
@ -36,6 +36,7 @@ def freeze():
                       '/lib/libbz2.so.1',
                       '/usr/lib/libpoppler.so.4',
                       '/usr/lib/libxml2.so.2',
+                       '/usr/lib/libdbus-1.so.3',
                       '/usr/lib/libxslt.so.1',
                       '/usr/lib/libxslt.so.1',
                       '/usr/lib/libgthread-2.0.so.0',
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.140'
+__version__   = '0.4.141'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -276,8 +276,11 @@ class UnBinary(object):
                    state = 'get attr'
                elif count > 0:
                    if not in_censorship:
-                        self.buf.write(c.encode(
-                            'ascii', 'xmlcharrefreplace'))
+                        if c == '"':
+                            c = '&quot;'
+                        elif c == '<':
+                            c = '&lt;'
+                        self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@ -3,6 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 ''''''

 import sys, os, subprocess, logging
+import errno
 from functools import partial
 from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
 from calibre.ebooks import ConversionError, DRMError
@ -41,14 +42,26 @@ def generate_html(pathtopdf, tdir):
    try:
        os.chdir(tdir)
        try:
-            p = popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+            p = popen(cmd, stderr=subprocess.PIPE)
        except OSError, err:
            if err.errno == 2:
                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
            else:
                raise
+
+        '''
        print p.stdout.read()
-        ret = p.wait()
+        '''
+        while True:
+            try:
+                ret = p.wait()
+                break
+            except OSError, e:
+                if e.errno == errno.EINTR:
+                    continue
+                else:
+                    raise
+
        if ret != 0:
            err = p.stderr.read()
            raise ConversionError, err
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -81,6 +81,7 @@ class MetadataUpdater(object):
        type = self.type = data[60:68]
        self.nrecs, = unpack('>H', data[76:78])
        record0 = self.record0 = self.record(0)
+        self.encryption_type, = unpack('>H', record0[12:14])
        codepage, = unpack('>I', record0[28:32])
        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
        image_base, = unpack('>I', record0[108:112])
@ -134,6 +135,8 @@ class MetadataUpdater(object):
        if self.thumbnail_record is not None:
            recs.append((202, pack('>I', self.thumbnail_rindex)))
        exth = StringIO()
+        if getattr(self, 'encryption_type', -1) != 0:
+            raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
            exth.write(data)
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -37,6 +37,8 @@ class KeyMapper(object):

    @staticmethod
    def relate(size, base):
+        if size == 0:
+            return base
        size = float(size)
        base = float(base)
        if abs(size - base) < 0.1: return 0
@ -48,6 +50,7 @@ class KeyMapper(object):
        return result
        
    def __getitem__(self, ssize):
+        ssize = asfloat(ssize, 0)
        if ssize in self.cache:
            return self.cache[ssize]
        dsize = self.map(ssize)
@ -66,6 +69,7 @@ class ScaleMapper(object):
        self.dscale = float(dbase) / float(sbase)

    def __getitem__(self, ssize):
+        ssize = asfloat(ssize, 0)
        dsize = ssize * self.dscale
        return dsize

--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -90,11 +90,11 @@ class DateDelegate(QStyledItemDelegate):
    def displayText(self, val, locale):
        d = val.toDate()
        return d.toString('dd MMM yyyy')
-        if d.isNull():
-            return ''
-        d = datetime(d.year(), d.month(), d.day())
-        return strftime(BooksView.TIME_FMT, d.timetuple())
        
+    def createEditor(self, parent, option, index):
+        qde = QStyledItemDelegate.createEditor(self, parent, option, index)
+        qde.setDisplayFormat('MM/dd/yyyy')
+        return qde

 class BooksModel(QAbstractTableModel):
    coding = zip(
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -44,7 +44,6 @@ from calibre.gui2.dialogs.search import SearchDialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.gui2.dialogs.book_info import BookInfo
 from calibre.ebooks.metadata.meta import set_metadata
-from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.library.database2 import LibraryDatabase2, CoverCache
 from calibre.parallel import JobKilled
@ -399,7 +398,7 @@ class Main(MainWindow, Ui_MainWindow):
    def change_output_format(self, x):
        of = unicode(x).strip()
        if of != prefs['output_format']:
-            if of not in ('LRF', 'EPUB'):
+            if of not in ('LRF', 'EPUB', 'MOBI'):
                warning_dialog(self, 'Warning', 
                               '<p>%s support is still in beta. If you find bugs, please report them by opening a <a href="http://calibre.kovidgoyal.net">ticket</a>.'%of).exec_()
            prefs.set('output_format', of)
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -31,7 +31,8 @@ recipe_modules = ['recipe_' + r for r in (
           'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
           'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
           'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
-           'al_jazeera', 'winsupersite', 'borba',
+           'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
+           'lamujerdemivida',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_courrierinternational.py
+++ b/src/calibre/web/feeds/recipes/recipe_courrierinternational.py
@ -0,0 +1,41 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
+'''
+Courrier International
+'''
+
+import re
+from datetime import date
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CourrierInternational(BasicNewsRecipe):
+    title          = 'Courrier International'
+    __author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
+    description = 'Global news in french from international newspapers'
+    oldest_article = 7
+    language = _('French')
+    max_articles_per_feed = 50
+    no_stylesheets = True
+
+    html2lrf_options = ['--base-font-size', '10']
+    
+    feeds =  [
+        # Some articles requiring subscription fails on download.
+        ('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'),
+    ]
+             
+    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
+        [
+            #Handle Depeches
+            (r'.*<td [^>]*>([0-9][0-9]/.*</p>)</td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</td></tr></table></body></html>'),
+            #Handle Articles
+            (r'.*<td [^>]*>(Courrier international.*?)							<td width="10"><img src="/img/espaceur.gif"></td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</body></html>'),
+        ]
+    ]
+    
+    
+    def print_version(self, url):
+        return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url)
+
--- a/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
+++ b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
@ -0,0 +1,76 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+lamujerdemivida.com.ar
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaMujerDeMiVida(BasicNewsRecipe):
+    title                 = 'La Mujer de mi Vida'
+    __author__            = 'Darko Miletic'
+    description           = 'Cultura de otra manera'    
+    oldest_article        = 90
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    publisher             = 'La Mujer de mi Vida'
+    category              = 'literatura, critica, arte, ensayos'    
+    language              = _('Spanish')
+    INDEX                 = 'http://www.lamujerdemivida.com.ar/'
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+
+    keep_only_tags = [dict(name='table', attrs={'width':'570'})]
+
+    feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = 'es-AR'
+        soup.html['lang']     = 'es-AR'
+        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.INDEX)
+        cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
+        if cover_item:
+           cover_url = self.INDEX + cover_item['src']
+        return cover_url
+    
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('td', attrs={'width':'390'}):
+                atag = item.find('a',href=True)
+                if atag:
+                    url         = atag['href']
+                    title       = self.tag_to_string(atag)
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':''
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+                
--- a/src/calibre/web/feeds/recipes/recipe_le_monde.py
+++ b/src/calibre/web/feeds/recipes/recipe_le_monde.py
@ -7,7 +7,7 @@ lemonde.fr
 '''

 import re
-
+from datetime import date
 from calibre.web.feeds.news import BasicNewsRecipe


@ -15,10 +15,14 @@ class LeMonde(BasicNewsRecipe):
    title          = 'LeMonde.fr'
    __author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
    description = 'Global news in french'
-    oldest_article = 7
+    oldest_article = 3
    language = _('French')
-    max_articles_per_feed = 20
+    max_articles_per_feed = 30
    no_stylesheets = True
+    cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg'
+
+
+    html2lrf_options = ['--base-font-size', '10']
    
    feeds =  [
             ('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
@ -47,16 +51,48 @@ class LeMonde(BasicNewsRecipe):
    
    extra_css      = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}'

-    filter_regexps = [r'xiti\.com']
-
-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
        [
+            (r'<html.*(<div class="post".*?>.*?</div>.*?<div class="entry">.*?</div>).*You can start editing here.*</html>', lambda match : '<html><body>'+match.group(1)+'</body></html>'),
            (r'<p>&nbsp;</p>', lambda match : ''),
            (r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>'+match.group(1).upper()),
+            (r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/q(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>"'+match.group(1).upper()),
            (r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
        ]
    ]
                       
-    def print_version(self, url):
-        return re.sub('http:.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)
+    article_match_regexps = [ (re.compile(i)) for i in
+        [
+            (r'http://www\.lemonde\.fr/\S+/article/.*'),
+            (r'http://www\.lemonde\.fr/\S+/portfolio/.*'),
+            (r'http://www\.lemonde\.fr/\S+/article_interactif/.*'),
+            (r'http://\S+\.blog\.lemonde\.fr/.*'),
+        ]
+    ]
    
+    def print_version(self, url):
+        return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)
+
+    # Used to filter duplicated articles
+    articles_list = []
+
+    def get_article_url(self, article):
+        url=article.get('link',  None)
+        url=url[0:url.find("#")]
+        if url in self.articles_list:
+            self.log_debug(_('Skipping duplicated article: %s')%url)
+            return False
+        if self.is_article_wanted(url):
+            self.articles_list.append(url)
+            return url
+        self.log_debug(_('Skipping filtered article: %s')%url)
+        return False
+
+
+    def is_article_wanted(self, url):
+        if self.article_match_regexps:
+            for m in self.article_match_regexps:
+                if m.search(url):
+                    return True
+            return False
+        return False