mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use the more powerful regex engine for book list and metadata from filename
Fixes #1893371 [regex [[\w--[A-Z]] does not work](https://bugs.launchpad.net/calibre/+bug/1893371)
This commit is contained in:
parent
1b5bfd9078
commit
9f1e1e5a18
@ -5,6 +5,14 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
# changes for 5
|
||||
# viewer supports annotations
|
||||
# viewer works with RTL and vertical text
|
||||
# python upgraded to python 3 link to list of not ported plugins
|
||||
# regex engine used for searching book list and metadata from file names made more powerful
|
||||
# dark mode support in the content server and viewer UIs
|
||||
# content server viewer can now browse and create bookmarks
|
||||
|
||||
- version: 4.23.0
|
||||
date: 2020-08-21
|
||||
|
||||
|
@ -22,7 +22,9 @@ There are a few places calibre uses regular expressions. There's the
|
||||
:guilabel:`Search & replace` in conversion options, metadata detection from filenames in the import
|
||||
settings and Search & replace when editing the metadata of books in bulk. The
|
||||
calibre book editor can also use regular expressions in its search and replace
|
||||
feature.
|
||||
feature. Finally, you can use regular expressions when searching the calibre
|
||||
book list and when searching inside the calibre viewer.
|
||||
|
||||
|
||||
What on earth *is* a regular expression?
|
||||
------------------------------------------------
|
||||
|
@ -2,9 +2,7 @@ Quick reference for regexp syntax
|
||||
=================================================
|
||||
|
||||
This checklist summarizes the most commonly used/hard to remember parts of the
|
||||
regexp engine available in the calibre edit and conversion search/replace
|
||||
features. Note that this engine is more powerful than the basic regexp engine
|
||||
used throughout the rest of calibre.
|
||||
regexp engine available in most parts of calibre.
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
@ -334,4 +332,3 @@ Modes
|
||||
``(?m)``
|
||||
Makes the ``^`` and ``$`` anchors match the start and end of lines
|
||||
instead of the start and end of the entire string.
|
||||
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, weakref, operator
|
||||
import regex, weakref, operator
|
||||
from functools import partial
|
||||
from datetime import timedelta
|
||||
from collections import deque, OrderedDict
|
||||
@ -72,7 +72,8 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
|
||||
elif query == t:
|
||||
return True
|
||||
elif matchkind == REGEXP_MATCH:
|
||||
if re.search(query, t, re.UNICODE if case_sensitive else re.I|re.UNICODE):
|
||||
flags = regex.UNICODE | regex.VERSION1 | (0 if case_sensitive else regex.IGNORECASE)
|
||||
if regex.search(query, t, flags) is not None:
|
||||
return True
|
||||
elif matchkind == CONTAINS_MATCH:
|
||||
if not case_sensitive and use_primary_find_in_search:
|
||||
@ -80,7 +81,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
|
||||
return True
|
||||
elif query in t:
|
||||
return True
|
||||
except re.error:
|
||||
except regex.error:
|
||||
pass
|
||||
return False
|
||||
# }}}
|
||||
@ -100,7 +101,7 @@ class DateSearch(object): # {{{
|
||||
self.local_today = {'_today', 'today', icu_lower(_('today'))}
|
||||
self.local_yesterday = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
|
||||
self.local_thismonth = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
|
||||
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
|
||||
self.daysago_pat = regex.compile(r'(%s|daysago|_daysago)$'%_('daysago'), flags=regex.UNICODE | regex.VERSION1)
|
||||
|
||||
def eq(self, dbdate, query, field_count):
|
||||
if dbdate.year == query.year:
|
||||
|
@ -3,7 +3,7 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, re, collections
|
||||
import os, regex, collections
|
||||
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.constants import filesystem_encoding
|
||||
@ -105,8 +105,8 @@ def _get_metadata(stream, stream_type, use_libprs_metadata,
|
||||
|
||||
name = os.path.basename(getattr(stream, 'name', ''))
|
||||
# The fallback pattern matches the default filename format produced by calibre
|
||||
base = metadata_from_filename(name, pat=pattern, fallback_pat=re.compile(
|
||||
r'^(?P<title>.+) - (?P<author>[^-]+)$'))
|
||||
base = metadata_from_filename(name, pat=pattern, fallback_pat=regex.compile(
|
||||
r'^(?P<title>.+) - (?P<author>[^-]+)$', flags=regex.UNICODE | regex.VERSION1))
|
||||
if not base.authors:
|
||||
base.authors = [_('Unknown')]
|
||||
if not base.title:
|
||||
@ -133,7 +133,7 @@ def metadata_from_filename(name, pat=None, fallback_pat=None):
|
||||
name = name.rpartition('.')[0]
|
||||
mi = MetaInformation(None, None)
|
||||
if pat is None:
|
||||
pat = re.compile(prefs.get('filename_pattern'))
|
||||
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION1)
|
||||
name = name.replace('_', ' ')
|
||||
match = pat.search(name)
|
||||
if match is None and fallback_pat is not None:
|
||||
|
Loading…
x
Reference in New Issue
Block a user