Improve string_to_authors and use it in more places

This commit is contained in:
Kovid Goyal 2009-06-20 09:46:22 -07:00
parent 50a1346611
commit e6309590fc
11 changed files with 24 additions and 45 deletions

View File

@ -9,6 +9,7 @@ from itertools import cycle
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import sanitize_file_name as sanitize from calibre import sanitize_file_name as sanitize
from calibre.ebooks.metadata import string_to_authors
class JETBOOK(USBMS): class JETBOOK(USBMS):
name = 'Ectaco JetBook Device Interface' name = 'Ectaco JetBook Device Interface'
@ -118,7 +119,7 @@ class JETBOOK(USBMS):
match = cls.JETBOOK_FILE_NAME_PATTERN.match(fn) match = cls.JETBOOK_FILE_NAME_PATTERN.match(fn)
if match is not None: if match is not None:
mi.title = check_unicode(match.group('title')) mi.title = check_unicode(match.group('title'))
authors = match.group('authors').split('&') authors = string_to_authors(match.group('authors'))
mi.authors = map(check_unicode, authors) mi.authors = map(check_unicode, authors)
return mi return mi

View File

@ -19,7 +19,7 @@ import xml.dom.minidom as dom
from functools import wraps from functools import wraps
from calibre.devices.prs500.prstypes import field from calibre.devices.prs500.prstypes import field
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
@ -221,10 +221,7 @@ def get_metadata(stream):
@param stream: A file like object or an instance of L{LRFMetaFile} @param stream: A file like object or an instance of L{LRFMetaFile}
""" """
lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
au = lrf.author.strip().split(',') authors = string_to_authors(lrf.author)
authors = []
for i in au:
authors.extend(i.split('&'))
mi = MetaInformation(lrf.title.strip(), authors) mi = MetaInformation(lrf.title.strip(), authors)
mi.author = lrf.author.strip() mi.author = lrf.author.strip()
mi.comments = lrf.free_text.strip() mi.comments = lrf.free_text.strip()

View File

@ -13,8 +13,10 @@ from urlparse import urlparse
from calibre import relpath from calibre import relpath
_author_pat = re.compile(',?\s+and\s+', re.IGNORECASE)
def string_to_authors(raw): def string_to_authors(raw):
raw = raw.replace('&&', u'\uffff') raw = _author_pat.sub('&', raw)
raw = raw.replace('&&', u'\uffff').replace(',', '&')
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')] authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
return authors return authors

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys, os import sys, os
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG'] MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
@ -34,11 +34,7 @@ def get_metadata(stream):
if title: if title:
mi.title = title mi.title = title
if author: if author:
src = author.split('&') mi.authors = string_to_authors(author)
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = author mi.author = author
if category: if category:
mi.category = category mi.category = category

View File

@ -9,7 +9,7 @@ from calibre.utils.config import prefs
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
_METADATA_PRIORITIES = [ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm', 'html', 'htm', 'xhtml', 'xhtm',
@ -132,10 +132,7 @@ def metadata_from_filename(name, pat=None):
pass pass
try: try:
au = match.group('authors') au = match.group('authors')
aus = au.split(',') aus = string_to_authors(au)
authors = []
for a in aus:
authors.extend(a.split('&'))
mi.authors = authors mi.authors = authors
except IndexError: except IndexError:
pass pass

View File

@ -7,7 +7,7 @@ import uuid
from urllib import unquote, quote from urllib import unquote, quote
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from calibre.ebooks.lrf import entity_to_unicode from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks.metadata import Resource, ResourceCollection from calibre.ebooks.metadata import Resource, ResourceCollection
@ -270,11 +270,7 @@ class OPF(MetaInformation):
role = 'aut' role = 'aut'
if role == 'aut' and elem.string: if role == 'aut' and elem.string:
raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string) raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
au = raw.split(',') return string_to_authors(raw)
ans = []
for i in au:
ans.extend(i.split('&'))
return [a.strip() for a in ans]
return [] return []
def get_author_sort(self): def get_author_sort(self):

View File

@ -15,7 +15,7 @@ try:
_imagemagick_loaded = True _imagemagick_loaded = True
except: except:
_imagemagick_loaded = False _imagemagick_loaded = False
from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_to_string
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
from calibre.utils.podofo import get_metadata as podofo_get_metadata, \ from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
set_metadata as podofo_set_metadata, Unavailable, get_metadata_quick set_metadata as podofo_set_metadata, Unavailable, get_metadata_quick
@ -69,12 +69,8 @@ def get_metadata_pypdf(stream):
if info.title: if info.title:
mi.title = info.title mi.title = info.title
if info.author: if info.author:
src = info.author.split('&')
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = info.author mi.author = info.author
mi.authors = string_to_authors(info.author)
if info.subject: if info.subject:
mi.category = info.subject mi.category = info.subject
except Exception, err: except Exception, err:

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys, struct import sys, struct
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
@ -41,12 +41,8 @@ def get_metadata(stream):
if key.strip() == 'TITLE': if key.strip() == 'TITLE':
mi.title = value.strip() mi.title = value.strip()
elif key.strip() == 'AUTHOR': elif key.strip() == 'AUTHOR':
src = value.split('&')
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = value mi.author = value
mi.authors = string_to_authors(value)
except Exception, err: except Exception, err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8') print >>sys.stderr, msg.encode('utf8')

View File

@ -5,7 +5,7 @@ Edit metadata in RTF files.
""" """
import re, cStringIO, sys import re, cStringIO, sys
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, string_to_authors
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL) title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -76,10 +76,7 @@ def get_metadata(stream):
category = category_match.group(1).strip() category = category_match.group(1).strip()
mi = MetaInformation(title, author) mi = MetaInformation(title, author)
if author: if author:
au = author.split(',') mi.authors = string_to_authors(author)
mi.authors = []
for i in au:
mi.authors.extend(i.split('&'))
mi.comments = comment mi.comments = comment
mi.category = category mi.category = category
return mi return mi

View File

@ -9,6 +9,7 @@ from zlib import compress, decompress
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe
from calibre.ebooks.metadata import string_to_authors
class Concatenate(object): class Concatenate(object):
'''String concatenation aggregator for sqlite''' '''String concatenation aggregator for sqlite'''
@ -97,7 +98,7 @@ class LibraryDatabase(object):
obj = conn.execute('INSERT INTO books(title, timestamp, author_sort) VALUES (?,?,?)', obj = conn.execute('INSERT INTO books(title, timestamp, author_sort) VALUES (?,?,?)',
(book['title'], book['timestamp'], authors)) (book['title'], book['timestamp'], authors))
id = obj.lastrowid id = obj.lastrowid
authors = authors.split('&') authors = string_to_authors(authors)
for a in authors: for a in authors:
author = conn.execute('SELECT id from authors WHERE name=?', (a,)).fetchone() author = conn.execute('SELECT id from authors WHERE name=?', (a,)).fetchone()
if author: if author:
@ -1103,7 +1104,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
item[col] = val item[col] = val
break break
if column == 'authors': if column == 'authors':
val = val.split('&,') val = string_to_authors(val)
self.set_authors(id, val) self.set_authors(id, val)
elif column == 'title': elif column == 'title':
self.set_title(id, val) self.set_title(id, val)
@ -1266,7 +1267,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
mi.authors = ['Unknown'] mi.authors = ['Unknown']
authors = [] authors = []
for a in mi.authors: for a in mi.authors:
authors += a.split('&') authors += string_to_authors(a)
self.set_authors(id, authors) self.set_authors(id, authors)
if mi.author_sort: if mi.author_sort:
self.set_author_sort(id, mi.author_sort) self.set_author_sort(id, mi.author_sort)

View File

@ -993,7 +993,7 @@ class LibraryDatabase2(LibraryDatabase):
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
authors = [] authors = []
for a in mi.authors: for a in mi.authors:
authors += a.split('&') authors += string_to_authors(a)
self.set_authors(id, authors, notify=False) self.set_authors(id, authors, notify=False)
if mi.author_sort: if mi.author_sort:
self.set_author_sort(id, mi.author_sort, notify=False) self.set_author_sort(id, mi.author_sort, notify=False)