Pull from trunk

2025-07-09 03:04:10 -04:00 · 2009-04-12 12:16:16 -07:00 · 2009-04-12 12:16:16 -07:00 · 0b59ff9c09
commit 0b59ff9c09
parent a423691dd5 2708065870
14 changed files with 284 additions and 104 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -263,6 +263,17 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.mobi import set_metadata
        set_metadata(stream, mi)
 class PDFMetadataWriter(MetadataWriterPlugin):
    name        = 'Set PDF metadata'
    file_types  = set(['pdf'])
    description = _('Set metadata in %s files') % 'PDF'
    author      = 'John Schember'
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.pdf import set_metadata
        set_metadata(stream, mi)
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, shutil, traceback, functools, sys
+import os, shutil, traceback, functools, sys, re
 from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
                              MetadataWriterPlugin
@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file):
    for name in zf.namelist():
        if name.lower().endswith('plugin.py'):
            locals = {}
-            exec zf.read(name) in locals
+            raw = zf.read(name)
            match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
            encoding = 'utf-8'
            if match is not None:
                encoding = match.group(1)
            raw = raw.decode(encoding)
            raw = re.sub('\r\n', '\n', raw)
            exec raw in locals
            for x in locals.values():
                if isinstance(x, type) and issubclass(x, Plugin):
                    if x.minimum_calibre_version > version or \
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -31,6 +31,11 @@ Run an embedded python interpreter.
    parser.add_option('--migrate', action='store_true', default=False,
                      help='Migrate old database. Needs two arguments. Path '
                           'to library1.db and path to new library folder.')
    parser.add_option('--add-simple-plugin', default=None,
            help='Add a simple plugin (i.e. a plugin that consists of only a '
            '.py file), by specifying the path to the py file containing the '
            'plugin code.')
    return parser
 def update_zipfile(zipfile, mod, path):
@ -115,6 +120,22 @@ def debug_device_driver():
            print 'Total space:', d.total_space()
            break
 def add_simple_plugin(path_to_plugin):
    import tempfile, zipfile, shutil
    tdir = tempfile.mkdtemp()
    open(os.path.join(tdir, 'custom_plugin.py'),
            'wb').write(open(path_to_plugin, 'rb').read())
    odir = os.getcwd()
    os.chdir(tdir)
    zf = zipfile.ZipFile('plugin.zip', 'w')
    zf.write('custom_plugin.py')
    zf.close()
    from calibre.customize.ui import main
    main(['calibre-customize', '-a', 'plugin.zip'])
    os.chdir(odir)
    shutil.rmtree(tdir)
 def main(args=sys.argv):
    opts, args = option_parser().parse_args(args)
@ -137,6 +158,8 @@ def main(args=sys.argv):
            print 'You must specify the path to library1.db and the path to the new library folder'
            return 1
        migrate(args[1], args[2])
    elif opts.add_simple_plugin is not None:
        add_simple_plugin(opts.add_simple_plugin)
    else:
        from IPython.Shell import IPShellEmbed
        ipshell = IPShellEmbed()
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -57,13 +57,18 @@ class USBMS(Device):
        prefix = self._card_prefix if oncard else self._main_prefix
        ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
-        # Get all books in all directories under the root ebook_dir directory
+        # Get all books in the ebook_dir directory
        if self.SUPPORTS_SUB_DIRS:
            for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
-            # Filter out anything that isn't in the list of supported ebook
+                # Filter out anything that isn't in the list of supported ebook types
            # types
                for book_type in self.FORMATS:
                    for filename in fnmatch.filter(files, '*.%s' % (book_type)):
                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
        else:
            path = os.path.join(prefix, ebook_dir)
            for filename in os.listdir(path):
                if path_to_ext(filename) in self.FORMATS:
                    bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
        return bl
    def upload_books(self, files, names, on_card=False, end_session=True,
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,11 +1,10 @@
 '''Read meta information from PDF files'''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
-import sys, os, re, StringIO
+import sys, os, StringIO
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
 from calibre.ptempfile import TemporaryDirectory
@ -52,18 +51,27 @@ def get_metadata(stream, extract_cover=True):
 def set_metadata(stream, mi):
    stream.seek(0)
-    raw = stream.read()
+
-    if mi.title:
+    # Use a StringIO object for the pdf because we will want to over
-        tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title
+    # write it later and if we are working on the stream directly it
-        raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw)
+    # could cause some issues.
-    if mi.authors:
+    raw = StringIO.StringIO(stream.read())
-        au = authors_to_string(mi.authors)
+    orig_pdf = PdfFileReader(raw)
-        if isinstance(au, unicode):
+
-            au = au.encode('utf-8')
+    title = mi.title if mi.title else orig_pdf.documentInfo.title
-        raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw)
+    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
    out_str = StringIO.StringIO()
    out_pdf.write(out_str)
    stream.seek(0)
    stream.truncate()
-    stream.write(raw)
+    out_str.seek(0)
    stream.write(out_str.read())
    stream.seek(0)
 def get_cover(stream):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -15,7 +15,8 @@ except ImportError:
 from lxml import html, etree
-from calibre import entity_to_unicode
+from calibre import entity_to_unicode, sanitize_file_name
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
 from calibre.ebooks.mobi import MobiError
@ -25,7 +26,6 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.ebooks.metadata.toc import TOC
 from calibre import sanitize_file_name
 class EXTHHeader(object):
@ -157,6 +157,62 @@ class BookHeader(object):
                self.exth.mi.language = self.language
 class MetadataHeader(BookHeader):
    def __init__(self, stream):
        self.stream = stream
        self.ident = self.identity()
        self.num_sections = self.section_count()
        if self.num_sections >= 2:
            header = self.header()
            BookHeader.__init__(self, header, self.ident, None)
        else:
            self.exth = None
    def identity(self):
        self.stream.seek(60)
        ident = self.stream.read(8).upper()
        if ident not in ['BOOKMOBI', 'TEXTREAD']:
            raise MobiError('Unknown book type: %s' % ident)
        return ident
    def section_count(self):
        self.stream.seek(76)
        return struct.unpack('>H', self.stream.read(2))[0]
    def section_offset(self, number):
        self.stream.seek(78+number*8)
        return struct.unpack('>LBBBB', self.stream.read(8))[0]
    def header(self):
        section_headers = []
        # First section with the metadata
        section_headers.append(self.section_offset(0))
        # Second section used to get the lengh of the first
        section_headers.append(self.section_offset(1))
        end_off = section_headers[1]
        off = section_headers[0]
        self.stream.seek(off)
        return self.stream.read(end_off - off)
    def section_data(self, number):
        start = self.section_offset(number)
        if number == self.num_sections -1:
            end = os.stat(self.stream.name).st_size
        else:
            end = self.section_offset(number + 1)
        self.stream.seek(start)
        return self.stream.read(end - start)
 class MobiReader(object):
    PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
@ -593,27 +649,34 @@ class MobiReader(object):
            im.convert('RGB').save(open(path, 'wb'), format='JPEG')
 def get_metadata(stream):
-    from calibre.utils.logging import Log
+    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    log = Log()
    mr = MobiReader(stream, log)
    if mr.book_header.exth is None:
        mi = MetaInformation(mr.name, [_('Unknown')])
    else:
        mi = mr.create_opf('dummy.html')[0]
    try:
-            if hasattr(mr.book_header.exth, 'cover_offset'):
+        mh = MetadataHeader(stream)
-                cover_index = mr.book_header.first_image_index + \
+
-                              mr.book_header.exth.cover_offset
+        if mh.exth is not None:
-                data  = mr.sections[int(cover_index)][0]
+            if mh.exth.mi is not None:
                mi = mh.exth.mi
        else:
-                data  = mr.sections[mr.book_header.first_image_index][0]
+            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                mr = MobiReader(stream)
                mr.extract_content(tdir, {})
                if mr.embedded_mi is not None:
                    mi = mr.embedded_mi
        if hasattr(mh.exth, 'cover_offset'):
            cover_index = mh.first_image_index + mh.exth.cover_offset
            data  = mh.section_data(int(cover_index))
        else:
            data  = mh.section_data(mh.first_image_index)
        buf = cStringIO.StringIO(data)
        im = PILImage.open(buf)
        obuf = cStringIO.StringIO()
        im.convert('RGBA').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    except:
-            log.exception()
+        import traceback
        traceback.print_exc()
    return mi
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -67,6 +67,8 @@ def _config():
    c.add_opt('default_send_to_device_action', default=None,
            help=_('Default action to perform when send to device button is '
                'clicked'))
    c.add_opt('show_donate_button', default=True,
            help='Show donation button')
    return ConfigProxy(c)
 config = _config()
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -658,7 +658,9 @@ class DeviceGUI(object):
            bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
            d = warning_dialog(self, _('No suitable formats'),
                    _('Could not upload the following books to the device, '
-                'as no suitable formats were found:<br><ul>%s</ul>')%(bad,))
+                'as no suitable formats were found. Try changing the output '
                'format in the upper right corner next to the red heart and '
                're-converting. <br><ul>%s</ul>')%(bad,))
            d.exec_()
    def upload_booklists(self):
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog):
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
-        publisher = unicode(self.publisher.text())
+        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
-        author_sort = unicode(self.author_sort.text())
+        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
-        comments = unicode(self.comment.toPlainText())
+        comments = unicode(self.comment.toPlainText()).strip()
        if comments:
            mi.comments = comments
        mi.series_index = int(self.series_index.value())
        if self.series.currentIndex() > -1:
-            mi.series = unicode(self.series.currentText())
+            mi.series = unicode(self.series.currentText()).strip()
-        tags = [t.strip() for t in unicode(self.tags.text()).split(',')]
+        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags
@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog):
                                 ).exec_()
                    return
        mi = self.get_metadata()
        self.user_mi = mi
        self.read_settings()
        self.cover_file = None
        if self.row is not None:
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.donate_action  = self.system_tray_menu.addAction(
                QIcon(':/images/donate.svg'), _('&Donate to support calibre'))
        self.donate_button.setDefaultAction(self.donate_action)
        if not config['show_donate_button']:
            self.donate_button.setVisible(False)
        self.addAction(self.quit_action)
        self.action_restart = QAction(_('&Restart'), self)
        self.addAction(self.action_restart)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -225,10 +225,11 @@ def convert_bulk(fmt, parent, db, comics, others):
    if others:
        d = get_dialog(fmt)(parent, db)
        if d.exec_() != QDialog.Accepted:
-            others = []
+            others, user_mi = [], None
        else:
            opts = d.opts
            opts.verbose = 2
            user_mi = d.user_mi
    if comics:
        comic_opts = ComicConf.get_bulk_conversion_options(parent)
        if not comic_opts:
@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others):
                continue
            options = opts.copy()
            mi = db.get_metadata(row)
            if user_mi is not None:
                if user_mi.series_index == 1:
                    user_mi.series_index = None
                mi.smart_update(user_mi)
            db.set_metadata(db.id(row), mi)
            opf = OPFCreator(os.getcwdu(), mi)
            opf_file = PersistentTemporaryFile('.opf')
            opf.render(opf_file)
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
           'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_moneynews.py
+++ b/src/calibre/web/feeds/recipes/recipe_moneynews.py
@ -0,0 +1,49 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 moneynews.newsmax.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MoneyNews(BasicNewsRecipe):
    title                 = 'Moneynews.com'
    __author__            = 'Darko Miletic'
    description           = 'Financial news worldwide'  
    publisher             = 'moneynews.com'
    category              = 'news, finances, USA, business'    
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
    feeds = [ 
              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
             ,(u'Companies'            , u'http://moneynews.newsmax.com/xml/companies.xml'   )
             ,(u'Markets'              , u'http://moneynews.newsmax.com/xml/Markets.xml'     )
             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
            ]
    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
    remove_tags = [
                     dict(name='td'   , attrs={'id':'article_fontsize'})
                    ,dict(name='table', attrs={'id':'toolbox'         })
                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
                  ]