Pull from trunk

2025-07-09 03:04:10 -04:00 · 2009-04-12 12:16:16 -07:00 · 2009-04-12 12:16:16 -07:00 · 0b59ff9c09
commit 0b59ff9c09
parent a423691dd5 2708065870
14 changed files with 284 additions and 104 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -263,6 +263,17 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.mobi import set_metadata
        set_metadata(stream, mi)
        
+class PDFMetadataWriter(MetadataWriterPlugin):
+
+    name        = 'Set PDF metadata'
+    file_types  = set(['pdf'])
+    description = _('Set metadata in %s files') % 'PDF'
+    author      = 'John Schember'
+    
+    def set_metadata(self, stream, mi, type):
+        from calibre.ebooks.metadata.pdf import set_metadata
+        set_metadata(stream, mi)
+

 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import os, shutil, traceback, functools, sys
+import os, shutil, traceback, functools, sys, re

 from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
                              MetadataWriterPlugin
@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file):
    for name in zf.namelist():
        if name.lower().endswith('plugin.py'):
            locals = {}
-            exec zf.read(name) in locals
+            raw = zf.read(name)
+            match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
+            encoding = 'utf-8'
+            if match is not None:
+                encoding = match.group(1)
+            raw = raw.decode(encoding)
+            raw = re.sub('\r\n', '\n', raw)
+            exec raw in locals
            for x in locals.values():
                if isinstance(x, type) and issubclass(x, Plugin):
                    if x.minimum_calibre_version > version or \
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -31,6 +31,11 @@ Run an embedded python interpreter.
    parser.add_option('--migrate', action='store_true', default=False,
                      help='Migrate old database. Needs two arguments. Path '
                           'to library1.db and path to new library folder.')
+    parser.add_option('--add-simple-plugin', default=None,
+            help='Add a simple plugin (i.e. a plugin that consists of only a '
+            '.py file), by specifying the path to the py file containing the '
+            'plugin code.')
+
    return parser

 def update_zipfile(zipfile, mod, path):
@ -115,6 +120,22 @@ def debug_device_driver():
            print 'Total space:', d.total_space()
            break

+def add_simple_plugin(path_to_plugin):
+    import tempfile, zipfile, shutil
+    tdir = tempfile.mkdtemp()
+    open(os.path.join(tdir, 'custom_plugin.py'),
+            'wb').write(open(path_to_plugin, 'rb').read())
+    odir = os.getcwd()
+    os.chdir(tdir)
+    zf = zipfile.ZipFile('plugin.zip', 'w')
+    zf.write('custom_plugin.py')
+    zf.close()
+    from calibre.customize.ui import main
+    main(['calibre-customize', '-a', 'plugin.zip'])
+    os.chdir(odir)
+    shutil.rmtree(tdir)
+
+

 def main(args=sys.argv):
    opts, args = option_parser().parse_args(args)
@ -137,6 +158,8 @@ def main(args=sys.argv):
            print 'You must specify the path to library1.db and the path to the new library folder'
            return 1
        migrate(args[1], args[2])
+    elif opts.add_simple_plugin is not None:
+        add_simple_plugin(opts.add_simple_plugin)
    else:
        from IPython.Shell import IPShellEmbed
        ipshell = IPShellEmbed()
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -57,13 +57,18 @@ class USBMS(Device):
        prefix = self._card_prefix if oncard else self._main_prefix
        ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN

-        # Get all books in all directories under the root ebook_dir directory
+        # Get all books in the ebook_dir directory
+        if self.SUPPORTS_SUB_DIRS:
            for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
-            # Filter out anything that isn't in the list of supported ebook
-            # types
+                # Filter out anything that isn't in the list of supported ebook types
                for book_type in self.FORMATS:
                    for filename in fnmatch.filter(files, '*.%s' % (book_type)):
                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+        else:
+            path = os.path.join(prefix, ebook_dir)
+            for filename in os.listdir(path):
+                if path_to_ext(filename) in self.FORMATS:
+                    bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
        return bl

    def upload_books(self, files, names, on_card=False, end_session=True,
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,11 +1,10 @@
-'''Read meta information from PDF files'''
-
 from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+'''Read meta information from PDF files'''

-import sys, os, re, StringIO
+import sys, os, StringIO

 from calibre.ebooks.metadata import MetaInformation, authors_to_string
 from calibre.ptempfile import TemporaryDirectory
@ -52,18 +51,27 @@ def get_metadata(stream, extract_cover=True):

 def set_metadata(stream, mi):
    stream.seek(0)
-    raw = stream.read()
-    if mi.title:
-        tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title
-        raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw)
-    if mi.authors:
-        au = authors_to_string(mi.authors)
-        if isinstance(au, unicode):
-            au = au.encode('utf-8')
-        raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw)
+
+    # Use a StringIO object for the pdf because we will want to over
+    # write it later and if we are working on the stream directly it
+    # could cause some issues.
+    raw = StringIO.StringIO(stream.read())
+    orig_pdf = PdfFileReader(raw)
+
+    title = mi.title if mi.title else orig_pdf.documentInfo.title
+    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
+
+    out_pdf = PdfFileWriter(title=title, author=author)
+    for page in orig_pdf.pages:
+        out_pdf.addPage(page)
+
+    out_str = StringIO.StringIO()
+    out_pdf.write(out_str)
+
    stream.seek(0)
    stream.truncate()
-    stream.write(raw)
+    out_str.seek(0)
+    stream.write(out_str.read())
    stream.seek(0)

 def get_cover(stream):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -15,7 +15,8 @@ except ImportError:

 from lxml import html, etree

-from calibre import entity_to_unicode
+from calibre import entity_to_unicode, sanitize_file_name
+from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
 from calibre.ebooks.mobi import MobiError
@ -25,7 +26,6 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.ebooks.metadata.toc import TOC
-from calibre import sanitize_file_name

 class EXTHHeader(object):

@ -157,6 +157,62 @@ class BookHeader(object):
                self.exth.mi.language = self.language


+class MetadataHeader(BookHeader):
+    def __init__(self, stream):
+        self.stream = stream
+
+        self.ident = self.identity()
+        self.num_sections = self.section_count()
+
+        if self.num_sections >= 2:
+            header = self.header()
+            BookHeader.__init__(self, header, self.ident, None)
+        else:
+            self.exth = None
+
+    def identity(self):
+        self.stream.seek(60)
+        ident = self.stream.read(8).upper()
+
+        if ident not in ['BOOKMOBI', 'TEXTREAD']:
+            raise MobiError('Unknown book type: %s' % ident)
+        return ident
+
+    def section_count(self):
+        self.stream.seek(76)
+        return struct.unpack('>H', self.stream.read(2))[0]
+
+    def section_offset(self, number):
+        self.stream.seek(78+number*8)
+        return struct.unpack('>LBBBB', self.stream.read(8))[0]
+
+    def header(self):
+        section_headers = []
+
+        # First section with the metadata
+        section_headers.append(self.section_offset(0))
+        # Second section used to get the lengh of the first
+        section_headers.append(self.section_offset(1))
+
+        end_off = section_headers[1]
+        off = section_headers[0]
+
+        self.stream.seek(off)
+        return self.stream.read(end_off - off)
+
+    def section_data(self, number):
+        start = self.section_offset(number)
+
+        if number == self.num_sections -1:
+            end = os.stat(self.stream.name).st_size
+        else:
+            end = self.section_offset(number + 1)
+
+        self.stream.seek(start)
+
+        return self.stream.read(end - start)
+
+
 class MobiReader(object):
    PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
@ -593,27 +649,34 @@ class MobiReader(object):
            im.convert('RGB').save(open(path, 'wb'), format='JPEG')

 def get_metadata(stream):
-    from calibre.utils.logging import Log
-    log = Log()
-    mr = MobiReader(stream, log)
-    if mr.book_header.exth is None:
-        mi = MetaInformation(mr.name, [_('Unknown')])
-    else:
-        mi = mr.create_opf('dummy.html')[0]
+    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    try:
-            if hasattr(mr.book_header.exth, 'cover_offset'):
-                cover_index = mr.book_header.first_image_index + \
-                              mr.book_header.exth.cover_offset
-                data  = mr.sections[int(cover_index)][0]
+        mh = MetadataHeader(stream)
+
+        if mh.exth is not None:
+            if mh.exth.mi is not None:
+                mi = mh.exth.mi
        else:
-                data  = mr.sections[mr.book_header.first_image_index][0]
+            with TemporaryDirectory('_mobi_meta_reader') as tdir:
+                mr = MobiReader(stream)
+                mr.extract_content(tdir, {})
+                if mr.embedded_mi is not None:
+                    mi = mr.embedded_mi
+
+        if hasattr(mh.exth, 'cover_offset'):
+            cover_index = mh.first_image_index + mh.exth.cover_offset
+            data  = mh.section_data(int(cover_index))
+        else:
+            data  = mh.section_data(mh.first_image_index)
        buf = cStringIO.StringIO(data)
        im = PILImage.open(buf)
        obuf = cStringIO.StringIO()
        im.convert('RGBA').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    except:
-            log.exception()
+        import traceback
+        traceback.print_exc()
+
    return mi


--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -67,6 +67,8 @@ def _config():
    c.add_opt('default_send_to_device_action', default=None,
            help=_('Default action to perform when send to device button is '
                'clicked'))
+    c.add_opt('show_donate_button', default=True,
+            help='Show donation button')
    return ConfigProxy(c)

 config = _config()
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -658,7 +658,9 @@ class DeviceGUI(object):
            bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
            d = warning_dialog(self, _('No suitable formats'),
                    _('Could not upload the following books to the device, '
-                'as no suitable formats were found:<br><ul>%s</ul>')%(bad,))
+                'as no suitable formats were found. Try changing the output '
+                'format in the upper right corner next to the red heart and '
+                're-converting. <br><ul>%s</ul>')%(bad,))
            d.exec_()

    def upload_booklists(self):
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog):
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
-        publisher = unicode(self.publisher.text())
+        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
-        author_sort = unicode(self.author_sort.text())
+        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
-        comments = unicode(self.comment.toPlainText())
+        comments = unicode(self.comment.toPlainText()).strip()
        if comments:
            mi.comments = comments
        mi.series_index = int(self.series_index.value())
        if self.series.currentIndex() > -1:
-            mi.series = unicode(self.series.currentText())
-        tags = [t.strip() for t in unicode(self.tags.text()).split(',')]
+            mi.series = unicode(self.series.currentText()).strip()
+        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags

@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog):
                                 ).exec_()
                    return
        mi = self.get_metadata()
+        self.user_mi = mi
        self.read_settings()
        self.cover_file = None
        if self.row is not None:
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.donate_action  = self.system_tray_menu.addAction(
                QIcon(':/images/donate.svg'), _('&Donate to support calibre'))
        self.donate_button.setDefaultAction(self.donate_action)
+        if not config['show_donate_button']:
+            self.donate_button.setVisible(False)
        self.addAction(self.quit_action)
        self.action_restart = QAction(_('&Restart'), self)
        self.addAction(self.action_restart)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -225,10 +225,11 @@ def convert_bulk(fmt, parent, db, comics, others):
    if others:
        d = get_dialog(fmt)(parent, db)
        if d.exec_() != QDialog.Accepted:
-            others = []
+            others, user_mi = [], None
        else:
            opts = d.opts
            opts.verbose = 2
+            user_mi = d.user_mi
    if comics:
        comic_opts = ComicConf.get_bulk_conversion_options(parent)
        if not comic_opts:
@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others):
                continue
            options = opts.copy()
            mi = db.get_metadata(row)
+            if user_mi is not None:
+                if user_mi.series_index == 1:
+                    user_mi.series_index = None
+                mi.smart_update(user_mi)
+            db.set_metadata(db.id(row), mi)
            opf = OPFCreator(os.getcwdu(), mi)
            opf_file = PersistentTemporaryFile('.opf')
            opf.render(opf_file)
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
           'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
+           'moneynews',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_moneynews.py
+++ b/src/calibre/web/feeds/recipes/recipe_moneynews.py
@ -0,0 +1,49 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+moneynews.newsmax.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MoneyNews(BasicNewsRecipe):
+    title                 = 'Moneynews.com'
+    __author__            = 'Darko Miletic'
+    description           = 'Financial news worldwide'  
+    publisher             = 'moneynews.com'
+    category              = 'news, finances, USA, business'    
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+                            
+    feeds = [ 
+              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
+             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
+             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
+             ,(u'Companies'            , u'http://moneynews.newsmax.com/xml/companies.xml'   )
+             ,(u'Markets'              , u'http://moneynews.newsmax.com/xml/Markets.xml'     )
+             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
+            ]
+
+            
+    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
+    
+    remove_tags = [
+                     dict(name='td'   , attrs={'id':'article_fontsize'})
+                    ,dict(name='table', attrs={'id':'toolbox'         })
+                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
+                  ]
+