Improve string_to_authors and use it in more places

2025-08-30 23:00:21 -04:00 · 2009-06-20 09:46:22 -07:00 · 2009-06-20 09:46:22 -07:00 · e6309590fc
commit e6309590fc
parent 50a1346611
11 changed files with 24 additions and 45 deletions
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@ -9,6 +9,7 @@ from itertools import cycle

 from calibre.devices.usbms.driver import USBMS
 from calibre import sanitize_file_name as sanitize
+from calibre.ebooks.metadata import string_to_authors

 class JETBOOK(USBMS):
    name           = 'Ectaco JetBook Device Interface'
@ -118,7 +119,7 @@ class JETBOOK(USBMS):
            match = cls.JETBOOK_FILE_NAME_PATTERN.match(fn)
            if match is not None:
                mi.title = check_unicode(match.group('title'))
-                authors = match.group('authors').split('&')
+                authors = string_to_authors(match.group('authors'))
                mi.authors = map(check_unicode, authors)

        return mi
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@ -19,7 +19,7 @@ import xml.dom.minidom as dom
 from functools import wraps

 from calibre.devices.prs500.prstypes import field
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors

 BYTE      = "<B"  #: Unsigned char little endian encoded in 1 byte 
 WORD      = "<H"  #: Unsigned short little endian encoded in 2 bytes 
@ -221,10 +221,7 @@ def get_metadata(stream):
    @param stream: A file like object or an instance of L{LRFMetaFile}
    """
    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
-    au = lrf.author.strip().split(',')
-    authors = []
-    for i in au:
-        authors.extend(i.split('&'))
+    authors = string_to_authors(lrf.author)
    mi = MetaInformation(lrf.title.strip(), authors)
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -13,8 +13,10 @@ from urlparse import urlparse

 from calibre import relpath

+_author_pat = re.compile(',?\s+and\s+', re.IGNORECASE)
 def string_to_authors(raw):
-    raw = raw.replace('&&', u'\uffff')
+    raw = _author_pat.sub('&', raw)
+    raw = raw.replace('&&', u'\uffff').replace(',', '&')
    authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
    return authors

--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'

 import sys, os

-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors

 MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']

@ -34,11 +34,7 @@ def get_metadata(stream):
        if title:
            mi.title = title
        if author:
-            src = author.split('&')
-            authors = []
-            for au in src:
-                authors += au.split(',')
-            mi.authors = authors
+            mi.authors = string_to_authors(author)
            mi.author = author
        if category:
            mi.category = category
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -9,7 +9,7 @@ from calibre.utils.config import prefs
 from calibre.ebooks.metadata.opf2 import OPF

 from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors

 _METADATA_PRIORITIES = [
                       'html', 'htm', 'xhtml', 'xhtm',
@ -132,10 +132,7 @@ def metadata_from_filename(name, pat=None):
            pass
        try:
            au = match.group('authors')
-            aus = au.split(',')
-            authors = []
-            for a in aus:
-                authors.extend(a.split('&'))
+            aus = string_to_authors(au)
            mi.authors = authors
        except IndexError:
            pass
--- a/src/calibre/ebooks/metadata/opf.py
+++ b/src/calibre/ebooks/metadata/opf.py
@ -7,7 +7,7 @@ import uuid
 from urllib import unquote, quote

 from calibre.constants import __appname__, __version__
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
 from calibre.ebooks.lrf import entity_to_unicode
 from calibre.ebooks.metadata import Resource, ResourceCollection
@ -270,11 +270,7 @@ class OPF(MetaInformation):
                role = 'aut'
            if role == 'aut' and elem.string:
                raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
-                au = raw.split(',')
-                ans = []
-                for i in au:
-                    ans.extend(i.split('&'))
-                return [a.strip() for a in ans]
+                return string_to_authors(raw)
        return []

    def get_author_sort(self):
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -15,7 +15,7 @@ try:
    _imagemagick_loaded = True
 except:
    _imagemagick_loaded = False
-from calibre.ebooks.metadata import MetaInformation, authors_to_string
+from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_to_string
 from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
 from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
    set_metadata as podofo_set_metadata, Unavailable, get_metadata_quick
@ -69,12 +69,8 @@ def get_metadata_pypdf(stream):
            if info.title:
                mi.title = info.title
            if info.author:
-                src = info.author.split('&')
-                authors = []
-                for au in src:
-                    authors += au.split(',')
-                mi.authors = authors
                mi.author = info.author
+                mi.authors = string_to_authors(info.author)
            if info.subject:
                mi.category = info.subject
    except Exception, err:
--- a/src/calibre/ebooks/metadata/rb.py
+++ b/src/calibre/ebooks/metadata/rb.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'

 import sys, struct

-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors

 MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'

@ -41,12 +41,8 @@ def get_metadata(stream):
            if key.strip() == 'TITLE':
                mi.title = value.strip()
            elif key.strip() == 'AUTHOR':
-                src = value.split('&')
-                authors = []
-                for au in src:
-                    authors += au.split(',')
-                mi.authors = authors
                mi.author = value
+                mi.authors = string_to_authors(value)
    except Exception, err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -5,7 +5,7 @@ Edit metadata in RTF files.
 """
 import re, cStringIO, sys

-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, string_to_authors

 title_pat    = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -76,10 +76,7 @@ def get_metadata(stream):
        category = category_match.group(1).strip()
    mi = MetaInformation(title, author)
    if author:
-        au = author.split(',')
-        mi.authors = []
-        for i in au:
-            mi.authors.extend(i.split('&'))
+        mi.authors = string_to_authors(author)
    mi.comments = comment
    mi.category = category
    return mi
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -9,6 +9,7 @@ from zlib import compress, decompress

 from calibre.ebooks.metadata import MetaInformation
 from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe
+from calibre.ebooks.metadata import string_to_authors

 class Concatenate(object):
    '''String concatenation aggregator for sqlite'''
@ -97,7 +98,7 @@ class LibraryDatabase(object):
            obj = conn.execute('INSERT INTO books(title, timestamp, author_sort) VALUES (?,?,?)',
                               (book['title'], book['timestamp'], authors))
            id = obj.lastrowid
-            authors = authors.split('&')
+            authors = string_to_authors(authors)
            for a in authors:
                author = conn.execute('SELECT id from authors WHERE name=?', (a,)).fetchone()
                if author:
@ -1103,7 +1104,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                item[col] = val
                break
        if column == 'authors':
-            val = val.split('&,')
+            val = string_to_authors(val)
            self.set_authors(id, val)
        elif column == 'title':
            self.set_title(id, val)
@ -1266,7 +1267,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                mi.authors = ['Unknown']
        authors = []
        for a in mi.authors:
-            authors += a.split('&')
+            authors += string_to_authors(a)
        self.set_authors(id, authors)
        if mi.author_sort:
            self.set_author_sort(id, mi.author_sort)
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -993,7 +993,7 @@ class LibraryDatabase2(LibraryDatabase):
                mi.authors = [_('Unknown')]
        authors = []
        for a in mi.authors:
-            authors += a.split('&')
+            authors += string_to_authors(a)
        self.set_authors(id, authors, notify=False)
        if mi.author_sort:
            self.set_author_sort(id, mi.author_sort, notify=False)