Speedup for bibtex catalog generation. Only show HTML comment customization if the download plugin says it gets HTML comments

2025-08-11 09:13:57 -04:00 · 2010-11-22 12:40:56 -07:00 · 2010-11-22 12:40:56 -07:00 · 7ff09a5d0c
commit 7ff09a5d0c
parent 0503a2f652
5 changed files with 128 additions and 102 deletions
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -146,6 +146,7 @@ class MetadataSource(Plugin): # {{{
            cb.setChecked(c.get(x, True))
            w._layout.addWidget(cb)
        
+        if self.has_html_comments:
            cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
            setattr(w, '_textcomments', cb)
            cb.setChecked(c.get('textcomments', False))
@ -155,8 +156,10 @@ class MetadataSource(Plugin): # {{{

    def save_settings(self, w):
        dl_settings = {}
-        for x in ('rating', 'tags', 'comments', 'textcomments'):
+        for x in ('rating', 'tags', 'comments'):
            dl_settings[x] = getattr(w, '_'+x).isChecked()
+        if self.has_html_comments:
+            dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
        c = self.config_store()
        c.set(self.name, dl_settings)
        if hasattr(w, '_sc'):
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -90,10 +90,8 @@ def build_isbn(base_url, opts):
    return base_url + 'index1=isbn&value1='+opts.isbn

 def build_combined(base_url, opts):
-    query = ''
-    for e in (opts.title, opts.author, opts.publisher):
-        if e is not None:
-            query += ' ' + e
+    query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
+        if e is not None ])
    query = query.strip()
    if len(query) == 0:
        raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
@ -141,15 +139,8 @@ def create_books(opts, args, timeout=5.):
        print ('ISBNDB query: '+url)

    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
-    ans = []
-    for x in tans:
-        add = True
-        for y in ans:
-            if y.isbn == x.isbn:
-                add = False
-        if add:
-            ans.append(x)
-    return ans
+    #remove duplicates ISBN
+    return list(dict((book.isbn, book) for book in tans).values())

 def main(args=sys.argv):
    parser = option_parser()
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -278,10 +278,10 @@ class BIBTEX(CatalogPlugin):

        from calibre.library.save_to_disk import preprocess_template
        #Bibtex functions
-        from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey
+        from calibre.utils.bibtex import BibTeX

        def create_bibtex_entry(entry, fields, mode, template_citation,
-            asccii_bibtex = True, citation_bibtex = True):
+            bibtexdict, citation_bibtex = True):

            #Bibtex doesn't like UTF-8 but keep unicode until writing
            #Define starting chain or if book valid strict and not book return a Fail string
@ -297,7 +297,8 @@ class BIBTEX(CatalogPlugin):

            if citation_bibtex :
                # Citation tag
-                bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex))
+                bibtex_entry.append(make_bibtex_citation(entry, template_citation,
+                    bibtexdict))
                bibtex_entry = [u' '.join(bibtex_entry)]

            for field in fields:
@ -312,11 +313,11 @@ class BIBTEX(CatalogPlugin):
                    pass

                if field == 'authors' :
-                    bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item))
+                    bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))

                elif field in ['title', 'publisher', 'cover', 'uuid',
                        'author_sort', 'series'] :
-                    bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex)))
+                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))

                elif field == 'id' :
                    bibtex_entry.append(u'calibreid = "%s"' % int(item))
@ -329,13 +330,13 @@ class BIBTEX(CatalogPlugin):

                elif field == 'tags' :
                    #A list to flatten
-                    bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex))
+                    bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item)))

                elif field == 'comments' :
                    #\n removal
                    item = item.replace(u'\r\n',u' ')
                    item = item.replace(u'\n',u' ')
-                    bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex))
+                    bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item))

                elif field == 'isbn' :
                    # Could be 9, 10 or 13 digits
@ -353,8 +354,7 @@ class BIBTEX(CatalogPlugin):

                elif field == 'pubdate' :
                    bibtex_entry.append(u'year = "%s"' % item.year)
-                    bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item),
-                        asccii_bibtex))
+                    bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))

            bibtex_entry = u',\n    '.join(bibtex_entry)
            bibtex_entry += u' }\n\n'
@ -371,7 +371,7 @@ class BIBTEX(CatalogPlugin):
            else :
                return True

-        def make_bibtex_citation(entry, template_citation, asccii_bibtex):
+        def make_bibtex_citation(entry, template_citation, bibtexclass):

            #define a function to replace the template entry by its value
            def tpl_replace(objtplname) :
@ -392,8 +392,9 @@ class BIBTEX(CatalogPlugin):
                    return u''

            if len(template_citation) >0 :
-                tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}',
-                    tpl_replace, template_citation)), asccii_bibtex)
+                tpl_citation = bibtexclass.utf8ToBibtex(
+                    bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
+                        tpl_replace, template_citation)))

                if len(tpl_citation) >0 :
                    return tpl_citation
@ -404,10 +405,7 @@ class BIBTEX(CatalogPlugin):
            else :
                template_citation = u'%s' % str(entry["id"])

-            if asccii_bibtex :
-                return ValidateCitationKey(template_citation.encode('ascii', 'replace'))
-            else :
-                return ValidateCitationKey(template_citation)
+            return bibtexclass.ValidateCitationKey(template_citation)

        self.fmt = path_to_output.rpartition('.')[2]
        self.notification = notification
@ -475,13 +473,16 @@ class BIBTEX(CatalogPlugin):
        if not len(data):
            log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)

+        #Initialize BibTeX class
+        bibtexc = BibTeX()
+
        #Entries writing after Bibtex formating (or not)
        if bibfile_enc != 'ascii' :
-            asccii_bibtex = False
+            bibtexc.ascii_bibtex = False
        else :
-            asccii_bibtex = True
+            bibtexc.ascii_bibtex = True

-        #Check and go to default in case of bad CLI
+        #Check citation choice and go to default in case of bad CLI
        if isinstance(opts.impcit, (StringType, UnicodeType)) :
            if opts.impcit == 'False' :
                citation_bibtex= False
@ -493,6 +494,7 @@ class BIBTEX(CatalogPlugin):
        else :
            citation_bibtex= opts.impcit

+        #Preprocess for error and light correction
        template_citation = preprocess_template(opts.bib_cit)

        #Open output and write entries
@ -514,7 +516,7 @@ class BIBTEX(CatalogPlugin):

        for entry in data:
            outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
-                asccii_bibtex, citation_bibtex))
+                bibtexc, citation_bibtex))

        outfile.close()

--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """ Collection of python utility-methodes commonly used by other
    bibliograph packages.
    From http://pypi.python.org/pypi/bibliograph.core/
@ -62,11 +60,14 @@
      DAMAGE.
    """

-__docformat__ = 'reStructuredText'
 __author__  = 'sengian <sengian1 at gmail.com>'
+__docformat__ = 'restructuredtext en'

 import re, string

+from calibre.constants import preferred_encoding
+from calibre.utils.mreplace import MReplace
+
 utf8enc2latex_mapping = {
    # This is a mapping of Unicode characters to LaTeX equivalents.
    # The information has been extracted from
@ -2842,69 +2843,66 @@ entity_mapping = {
                '"':'{"}',
                }

-def ValidateCitationKey(text):
-    """
-    removes characters not allowed in BibTeX keys
-
-    >>> from bibliograph.core.utils import _validKey
-    >>> _validKey(DummyEntry('Foo Bar'))
-    'FooBar'
-
-    >>> _validKey(DummyEntry('my@id'))
-    'myid'
-
-    """
+class BibTeX:
+    def __init__(self):
+        self.rep_utf8 = MReplace(utf8enc2latex_mapping)
+        self.rep_ent = MReplace(entity_mapping)
+        #Set default conversion to ASCII BibTeX
+        self.ascii_bibtex = True
        # This substitution is based on the description of cite key restrictions at
        # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
-    return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text)
+        self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]')
+        self.upper = re.compile(u'[' +
+            string.uppercase.decode(preferred_encoding) + u']')
+        self.escape = re.compile(u'[~#&%_]')

-def BraceUppercase(text):
+    def ValidateCitationKey(self, text):
+        """
+        removes characters not allowed in BibTeX keys
+        >>> ValidateCitationKey(DummyEntry('my@id'))
+        'myid'
+        """
+        return self.invalid_cit.sub(u'', text)
+
+    def braceUppercase(self, text):
        """ Convert uppercase letters to bibtex encoded uppercase
-
-        >>> from bibliograph.core.utils import _braceUppercase
-        >>> _braceUppercase('foo bar')
-        'foo bar'
-
-        >>> _braceUppercase('Foo Bar')
+            >>> braceUppercase('Foo Bar')
            '{F}oo {B}ar'
        """
-    for uc in string.uppercase:
-        text = text.replace(uc, u'{%s}' % uc)
-    return text
+        return self.upper.sub(lambda m: u'{%s}' % m.group(), text)

-def resolveEntities(text):
-    for entity, entity_map in entity_mapping.iteritems():
-        text = text.replace(entity, entity_map)
-    return text
+    def resolveEntities(self, text):
+        #for entity, entity_map in entity_mapping.iteritems():
+        #   text = text.replace(entity, entity_map)
+        #return text
+        return self.rep_ent.mreplace(text)

-def resolveUnicode(text):
+    def resolveUnicode(self, text):
        #UTF-8 text as entry
-    for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
-        text = text.replace(unichar, latexenc)
+        #for unichar, latexenc in utf8enc2latex_mapping.iteritems() :
+        #    text = text.replace(unichar, latexenc)
+        text = self.rep_utf8.mreplace(text)
        return text.replace(u'$}{$', u'')

-def escapeSpecialCharacters(text):
+    def escapeSpecialCharacters(self, text):
        """
        latex escaping some (not all) special characters
        """
        text.replace('\\', '\\\\')
-    escape = ['~', '#', '&', '%', '_']
-    for c in escape:
-        text = text.replace(c, '\\' + c )
-    return text
+        return self.escape.sub(lambda m: u'\\%s' % m.group(), text)

-#Calibre functions
-#Go from an unicode entry to ASCII Bibtex format without encoding
-#Option to go to official ASCII Bibtex or unofficial UTF-8
-def utf8ToBibtex(text, asccii_bibtex = True):
+    #Calibre functions
+    #Option to go to official ASCII Bibtex or unofficial UTF-8
+    #Go from an unicode entry to ASCII Bibtex format without encoding
+    def utf8ToBibtex(self, text):
        if len(text) == 0:
            return ''
        text.replace('\\', '\\\\')
-    text = resolveEntities(text)
-    if asccii_bibtex :
-        text = resolveUnicode(text)
-    return escapeSpecialCharacters(text)
+        text = self.resolveEntities(text)
+        if self.ascii_bibtex :
+            text = self.resolveUnicode(text)
+        return self.escapeSpecialCharacters(text)

-def bibtex_author_format(item):
+    def bibtex_author_format(self, item):
        #Format authors for Bibtex compliance (get a list as input)
-    return utf8ToBibtex(u' and'.join([author for author in item]))
+        return self.utf8ToBibtex(u' and'.join([author for author in item]))
--- a/src/calibre/utils/mreplace.py
+++ b/src/calibre/utils/mreplace.py
@ -0,0 +1,32 @@
+#multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
+__license__   = 'GPL v3'
+__copyright__ = '2010, sengian <sengian1 @ gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from UserDict import UserDict
+
+class MReplace(UserDict):
+    def __init__(self, dict = None):
+        UserDict.__init__(self, dict)
+        self.re = None
+        self.regex = None
+        self.compile_regex()
+
+    def compile_regex(self): 
+        if len(self.data) > 0:
+            keys = sorted(self.data.keys(), key=len)
+            keys.reverse()
+            tmp = "(%s)" % "|".join(map(re.escape, keys))
+            if self.re != tmp:
+                self.re = tmp
+                self.regex = re.compile(self.re)
+
+    def __call__(self, mo): 
+        return self[mo.string[mo.start():mo.end()]]
+
+    def mreplace(self, text): 
+        #Replace without regex compile
+        if len(self.data) < 1 or self.re is None:
+            return text
+        return self.regex.sub(self, text)