mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
fcf3dd83f9
@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
||||
extra_css = """
|
||||
body {font-family: "Times New Roman",Times,serif}
|
||||
.articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
||||
.rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
||||
.articleauthor{color: #9F9F9F;
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: small;
|
||||
text-transform: uppercase}
|
||||
.rubric,.dd,h6#credit{color: #CD0021;
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: small;
|
||||
text-transform: uppercase}
|
||||
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||
.dd,h6#credit{color: gray}
|
||||
.c{display: block}
|
||||
.caption,h2#articleintro{font-style: italic}
|
||||
.caption{font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['meta','iframe','base','link','embed','object'])
|
||||
,dict(attrs={'class':['utils','articleRailLinks','icons'] })
|
||||
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] })
|
||||
,dict(attrs={'id':['show-header','show-footer'] })
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe):
|
||||
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
auth = soup.find(attrs={'id':'articleauthor'})
|
||||
if auth:
|
||||
alink = auth.find('a')
|
||||
if alink and alink.string is not None:
|
||||
txt = alink.string
|
||||
alink.replaceWith(txt)
|
||||
return soup
|
||||
|
@ -1,5 +1,5 @@
|
||||
" Project wide builtins
|
||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen"]
|
||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title"]
|
||||
|
||||
python << EOFPY
|
||||
import os
|
||||
|
@ -63,7 +63,8 @@ class Check(Command):
|
||||
|
||||
description = 'Check for errors in the calibre source code'
|
||||
|
||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen']
|
||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
|
||||
'icu_upper', 'icu_title']
|
||||
CACHE = '.check-cache.pickle'
|
||||
|
||||
def get_files(self, cache):
|
||||
|
@ -2637,7 +2637,7 @@ class ITUNES(DriverBase):
|
||||
lb_added.composer.set(metadata_x.uuid)
|
||||
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
lb_added.enabled.set(True)
|
||||
lb_added.sort_artist.set(metadata_x.author_sort.title())
|
||||
lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||
lb_added.sort_name.set(metadata.title_sort)
|
||||
|
||||
|
||||
@ -2648,7 +2648,7 @@ class ITUNES(DriverBase):
|
||||
db_added.composer.set(metadata_x.uuid)
|
||||
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
db_added.enabled.set(True)
|
||||
db_added.sort_artist.set(metadata_x.author_sort.title())
|
||||
db_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||
db_added.sort_name.set(metadata.title_sort)
|
||||
|
||||
if metadata_x.comments:
|
||||
@ -2729,7 +2729,7 @@ class ITUNES(DriverBase):
|
||||
lb_added.Composer = metadata_x.uuid
|
||||
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
lb_added.Enabled = True
|
||||
lb_added.SortArtist = metadata_x.author_sort.title()
|
||||
lb_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||
lb_added.SortName = metadata.title_sort
|
||||
|
||||
if db_added:
|
||||
@ -2739,7 +2739,7 @@ class ITUNES(DriverBase):
|
||||
db_added.Composer = metadata_x.uuid
|
||||
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
db_added.Enabled = True
|
||||
db_added.SortArtist = metadata_x.author_sort.title()
|
||||
db_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||
db_added.SortName = metadata.title_sort
|
||||
|
||||
if metadata_x.comments:
|
||||
|
@ -13,7 +13,7 @@ from calibre.devices.interface import BookList as _BookList
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre import isbytestring
|
||||
from calibre.utils.config import prefs, tweaks
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
|
||||
|
||||
class Book(Metadata):
|
||||
def __init__(self, prefix, lpath, size=None, other=None):
|
||||
@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
|
||||
if y is None:
|
||||
return -1
|
||||
if isinstance(x, (unicode, str)):
|
||||
c = cmp(sort_key(x), sort_key(y))
|
||||
c = strcmp(x, y)
|
||||
else:
|
||||
c = cmp(x, y)
|
||||
if c != 0:
|
||||
|
@ -20,7 +20,7 @@ from calibre import prepare_string_for_xml
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class FB2MLizer(object):
|
||||
@ -85,8 +85,8 @@ class FB2MLizer(object):
|
||||
metadata['version'] = __version__
|
||||
metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
|
||||
metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
|
||||
metadata['id'] = '%s' % uuid.uuid4()
|
||||
|
||||
metadata['id'] = None
|
||||
|
||||
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
||||
if len(author_parts) == 1:
|
||||
metadata['author_last'] = author_parts[0]
|
||||
@ -98,6 +98,15 @@ class FB2MLizer(object):
|
||||
metadata['author_middle'] = ' '.join(author_parts[1:-2])
|
||||
metadata['author_last'] = author_parts[-1]
|
||||
|
||||
identifiers = self.oeb_book.metadata['identifier']
|
||||
for x in identifiers:
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||
metadata['id'] = unicode(x).split(':')[-1]
|
||||
break
|
||||
if metadata['id'] is None:
|
||||
self.log.warn('No UUID identifier found')
|
||||
metadata['id'] = str(uuid.uuid4())
|
||||
|
||||
for key, value in metadata.items():
|
||||
metadata[key] = prepare_string_for_xml(value)
|
||||
|
||||
|
@ -18,9 +18,10 @@ def extract_alphanumeric(in_str=None):
|
||||
"""
|
||||
# I'm sure this is really inefficient and
|
||||
# could be done with a lambda/map()
|
||||
#x.strip().title().replace(' ', "")
|
||||
#x.strip(). title().replace(' ', "")
|
||||
out_str=[]
|
||||
for x in in_str.title():
|
||||
for x in in_str:
|
||||
x = icu_title(x)
|
||||
if x.isalnum(): out_str.append(x)
|
||||
return ''.join(out_str)
|
||||
|
||||
|
@ -607,7 +607,7 @@ class Metadata(object):
|
||||
key = barename(key)
|
||||
attrib[key] = prefixname(value, nsrmap)
|
||||
if namespace(self.term) == DC11_NS:
|
||||
name = DC(barename(self.term).title())
|
||||
name = DC(icu_title(barename(self.term)))
|
||||
elem = element(dcmeta, name, attrib=attrib)
|
||||
elem.text = self.value
|
||||
else:
|
||||
|
@ -50,11 +50,11 @@ class CaseMangler(object):
|
||||
|
||||
def text_transform(self, transform, text):
|
||||
if transform == 'capitalize':
|
||||
return text.title()
|
||||
return icu_title(text)
|
||||
elif transform == 'uppercase':
|
||||
return text.upper()
|
||||
return icu_upper(text)
|
||||
elif transform == 'lowercase':
|
||||
return text.lower()
|
||||
return icu_lower(text)
|
||||
return text
|
||||
|
||||
def split_text(self, text):
|
||||
|
@ -147,8 +147,13 @@ class EditMetadataAction(InterfaceAction):
|
||||
|
||||
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
|
||||
prev=prev, next_=next_)
|
||||
d.view_format.connect(lambda
|
||||
fmt:self.gui.iactions['View'].view_format(row_list[current_row],
|
||||
fmt))
|
||||
if d.exec_() != d.Accepted:
|
||||
d.view_format.disconnect()
|
||||
break
|
||||
d.view_format.disconnect()
|
||||
changed.add(d.id)
|
||||
if d.row_delta == 0:
|
||||
break
|
||||
|
@ -26,7 +26,6 @@ class ViewAction(InterfaceAction):
|
||||
|
||||
def genesis(self):
|
||||
self.persistent_files = []
|
||||
self.metadata_view_id = None
|
||||
self.qaction.triggered.connect(self.view_book)
|
||||
self.view_menu = QMenu()
|
||||
self.view_menu.addAction(_('View'), partial(self.view_book, False))
|
||||
@ -51,14 +50,6 @@ class ViewAction(InterfaceAction):
|
||||
if fmt_path:
|
||||
self._view_file(fmt_path)
|
||||
|
||||
def metadata_view_format(self, fmt):
|
||||
fmt_path = self.gui.library_view.model().db.\
|
||||
format_abspath(self.metadata_view_id,
|
||||
fmt, index_is_id=True)
|
||||
if fmt_path:
|
||||
self._view_file(fmt_path)
|
||||
|
||||
|
||||
def book_downloaded_for_viewing(self, job):
|
||||
if job.failed:
|
||||
self.gui.device_job_exception(job)
|
||||
|
@ -184,8 +184,8 @@ class MyBlockingBusy(QDialog):
|
||||
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
||||
|
||||
s_r_functions = { '' : lambda x: x,
|
||||
_('Lower Case') : lambda x: x.lower(),
|
||||
_('Upper Case') : lambda x: x.upper(),
|
||||
_('Lower Case') : lambda x: icu_lower(x),
|
||||
_('Upper Case') : lambda x: icu_upper(x),
|
||||
_('Title Case') : lambda x: titlecase(x),
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,15 @@ class Item:
|
||||
return 'name=%s, label=%s, index=%s, exists='%(self.name, self.label, self.index, self.exists)
|
||||
|
||||
class TagCategories(QDialog, Ui_TagCategories):
|
||||
'''
|
||||
The structure of user_categories stored in preferences is
|
||||
{cat_name: [ [name, category, v], [], []}, cat_name [ [name, cat, v] ...}
|
||||
where name is the item name, category is where it came from (series, etc),
|
||||
and v is a scratch area that this editor uses to keep track of categories.
|
||||
|
||||
If you add a category, it is permissible to set v to zero. If you delete
|
||||
a category, ensure that both the name and the category match.
|
||||
'''
|
||||
category_labels_orig = ['', 'authors', 'series', 'publisher', 'tags']
|
||||
|
||||
def __init__(self, window, db, on_category=None):
|
||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.config import tweaks, prefs
|
||||
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
|
||||
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
|
||||
@ -1023,8 +1023,8 @@ class DeviceBooksModel(BooksModel): # {{{
|
||||
x = ''
|
||||
if y == None:
|
||||
y = ''
|
||||
x, y = x.strip().lower(), y.strip().lower()
|
||||
return cmp(x, y)
|
||||
x, y = icu_lower(x.strip()), icu_lower(y.strip())
|
||||
return icu_strcmp(x, y)
|
||||
return _strcmp
|
||||
def datecmp(x, y):
|
||||
x = self.db[x].datetime
|
||||
|
@ -223,7 +223,7 @@ EQUALS_MATCH = 1
|
||||
REGEXP_MATCH = 2
|
||||
def _match(query, value, matchkind):
|
||||
for t in value:
|
||||
t = t.lower()
|
||||
t = icu_lower(t)
|
||||
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
|
||||
if ((matchkind == EQUALS_MATCH and query == t) or
|
||||
(matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
|
||||
@ -505,7 +505,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
query = query[1:]
|
||||
if matchkind != REGEXP_MATCH:
|
||||
# leave case in regexps because it can be significant e.g. \S \W \D
|
||||
query = query.lower()
|
||||
query = icu_lower(query)
|
||||
|
||||
if not isinstance(query, unicode):
|
||||
query = query.decode('utf-8')
|
||||
|
@ -1476,20 +1476,20 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
self.opts.log.warn(" '%s' != '%s'" % (author[1], current_author[1]))
|
||||
|
||||
# New author, save the previous author/sort/count
|
||||
unique_authors.append((current_author[0], current_author[1].title(),
|
||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||
books_by_current_author))
|
||||
current_author = author
|
||||
books_by_current_author = 1
|
||||
elif i==0 and len(authors) == 1:
|
||||
# Allow for single-book lists
|
||||
unique_authors.append((current_author[0], current_author[1].title(),
|
||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||
books_by_current_author))
|
||||
else:
|
||||
books_by_current_author += 1
|
||||
else:
|
||||
# Add final author to list or single-author dataset
|
||||
if (current_author == author and len(authors) > 1) or not multiple_authors:
|
||||
unique_authors.append((current_author[0], current_author[1].title(),
|
||||
unique_authors.append((current_author[0], icu_title(current_author[1]),
|
||||
books_by_current_author))
|
||||
|
||||
if False and self.verbose:
|
||||
|
@ -18,8 +18,9 @@ from functools import partial
|
||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.date import parse_date, isoformat
|
||||
from calibre import isbytestring
|
||||
from calibre import isbytestring, force_unicode
|
||||
from calibre.constants import iswindows, DEBUG
|
||||
from calibre.utils.icu import strcmp
|
||||
|
||||
global_lock = RLock()
|
||||
|
||||
@ -115,8 +116,8 @@ def pynocase(one, two, encoding='utf-8'):
|
||||
pass
|
||||
return cmp(one.lower(), two.lower())
|
||||
|
||||
def icu_collator(s1, s2, func=None):
|
||||
return cmp(func(unicode(s1)), func(unicode(s2)))
|
||||
def icu_collator(s1, s2):
|
||||
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
|
||||
|
||||
def load_c_extensions(conn, debug=DEBUG):
|
||||
try:
|
||||
@ -169,8 +170,7 @@ class DBThread(Thread):
|
||||
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
|
||||
# Dummy functions for dynamically created filters
|
||||
self.conn.create_function('books_list_filter', 1, lambda x: 1)
|
||||
from calibre.utils.icu import sort_key
|
||||
self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key))
|
||||
self.conn.create_collation('icucollate', icu_collator)
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
|
@ -199,6 +199,10 @@ if not _run_once:
|
||||
|
||||
__builtin__.__dict__['lopen'] = local_open
|
||||
|
||||
from calibre.utils.icu import title_case, lower as icu_lower, upper as icu_upper
|
||||
__builtin__.__dict__['icu_lower'] = icu_lower
|
||||
__builtin__.__dict__['icu_upper'] = icu_upper
|
||||
__builtin__.__dict__['icu_title'] = title_case
|
||||
|
||||
import mimetypes
|
||||
mimetypes.init([P('mime.types')])
|
||||
|
@ -40,6 +40,11 @@ def titlecase(text):
|
||||
|
||||
"""
|
||||
|
||||
def capitalize(w):
|
||||
w = icu_lower(w)
|
||||
w = w.replace(w[0], icu_upper(w[0]))
|
||||
return w
|
||||
|
||||
all_caps = ALL_CAPS.match(text)
|
||||
|
||||
words = re.split('\s', text)
|
||||
@ -50,29 +55,29 @@ def titlecase(text):
|
||||
line.append(word)
|
||||
continue
|
||||
else:
|
||||
word = word.lower()
|
||||
word = icu_lower(word)
|
||||
|
||||
if APOS_SECOND.match(word):
|
||||
word = word.replace(word[0], word[0].upper())
|
||||
word = word.replace(word[2], word[2].upper())
|
||||
word = word.replace(word[0], icu_upper(word[0]))
|
||||
word = word.replace(word[2], icu_upper(word[2]))
|
||||
line.append(word)
|
||||
continue
|
||||
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
|
||||
line.append(word)
|
||||
continue
|
||||
if SMALL_WORDS.match(word):
|
||||
line.append(word.lower())
|
||||
line.append(icu_lower(word))
|
||||
continue
|
||||
|
||||
match = MAC_MC.match(word)
|
||||
if match:
|
||||
line.append("%s%s" % (match.group(1).capitalize(),
|
||||
match.group(2).capitalize()))
|
||||
line.append("%s%s" % (capitalize(match.group(1)),
|
||||
capitalize(match.group(2))))
|
||||
continue
|
||||
|
||||
hyphenated = []
|
||||
for item in word.split('-'):
|
||||
hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item))
|
||||
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
|
||||
line.append("-".join(hyphenated))
|
||||
|
||||
|
||||
@ -80,14 +85,14 @@ def titlecase(text):
|
||||
|
||||
result = SMALL_FIRST.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
m.group(2).capitalize()
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
result = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), result)
|
||||
result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
|
||||
|
||||
result = SUBPHRASE.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
m.group(2).capitalize()
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
return result
|
||||
|
Loading…
x
Reference in New Issue
Block a user