mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use the more powerful regex engine for book list and metadata from filename
Fixes #1893371 [regex [[\w--[A-Z]] does not work](https://bugs.launchpad.net/calibre/+bug/1893371)
This commit is contained in:
parent
1b5bfd9078
commit
9f1e1e5a18
@ -5,6 +5,14 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
# changes for 5
|
||||||
|
# viewer supports annotations
|
||||||
|
# viewer works with RTL and vertical text
|
||||||
|
# python upgraded to python 3 link to list of not ported plugins
|
||||||
|
# regex engine used for searching book list and metadata from file names made more powerful
|
||||||
|
# dark mode support in the content server and viewer UIs
|
||||||
|
# content server viewer can now browse and create bookmarks
|
||||||
|
|
||||||
- version: 4.23.0
|
- version: 4.23.0
|
||||||
date: 2020-08-21
|
date: 2020-08-21
|
||||||
|
|
||||||
|
@ -22,7 +22,9 @@ There are a few places calibre uses regular expressions. There's the
|
|||||||
:guilabel:`Search & replace` in conversion options, metadata detection from filenames in the import
|
:guilabel:`Search & replace` in conversion options, metadata detection from filenames in the import
|
||||||
settings and Search & replace when editing the metadata of books in bulk. The
|
settings and Search & replace when editing the metadata of books in bulk. The
|
||||||
calibre book editor can also use regular expressions in its search and replace
|
calibre book editor can also use regular expressions in its search and replace
|
||||||
feature.
|
feature. Finally, you can use regular expressions when searching the calibre
|
||||||
|
book list and when searching inside the calibre viewer.
|
||||||
|
|
||||||
|
|
||||||
What on earth *is* a regular expression?
|
What on earth *is* a regular expression?
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
@ -2,9 +2,7 @@ Quick reference for regexp syntax
|
|||||||
=================================================
|
=================================================
|
||||||
|
|
||||||
This checklist summarizes the most commonly used/hard to remember parts of the
|
This checklist summarizes the most commonly used/hard to remember parts of the
|
||||||
regexp engine available in the calibre edit and conversion search/replace
|
regexp engine available in most parts of calibre.
|
||||||
features. Note that this engine is more powerful than the basic regexp engine
|
|
||||||
used throughout the rest of calibre.
|
|
||||||
|
|
||||||
.. contents:: Contents
|
.. contents:: Contents
|
||||||
:depth: 2
|
:depth: 2
|
||||||
@ -334,4 +332,3 @@ Modes
|
|||||||
``(?m)``
|
``(?m)``
|
||||||
Makes the ``^`` and ``$`` anchors match the start and end of lines
|
Makes the ``^`` and ``$`` anchors match the start and end of lines
|
||||||
instead of the start and end of the entire string.
|
instead of the start and end of the entire string.
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, weakref, operator
|
import regex, weakref, operator
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from collections import deque, OrderedDict
|
from collections import deque, OrderedDict
|
||||||
@ -72,7 +72,8 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
|
|||||||
elif query == t:
|
elif query == t:
|
||||||
return True
|
return True
|
||||||
elif matchkind == REGEXP_MATCH:
|
elif matchkind == REGEXP_MATCH:
|
||||||
if re.search(query, t, re.UNICODE if case_sensitive else re.I|re.UNICODE):
|
flags = regex.UNICODE | regex.VERSION1 | (0 if case_sensitive else regex.IGNORECASE)
|
||||||
|
if regex.search(query, t, flags) is not None:
|
||||||
return True
|
return True
|
||||||
elif matchkind == CONTAINS_MATCH:
|
elif matchkind == CONTAINS_MATCH:
|
||||||
if not case_sensitive and use_primary_find_in_search:
|
if not case_sensitive and use_primary_find_in_search:
|
||||||
@ -80,7 +81,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
|
|||||||
return True
|
return True
|
||||||
elif query in t:
|
elif query in t:
|
||||||
return True
|
return True
|
||||||
except re.error:
|
except regex.error:
|
||||||
pass
|
pass
|
||||||
return False
|
return False
|
||||||
# }}}
|
# }}}
|
||||||
@ -100,7 +101,7 @@ class DateSearch(object): # {{{
|
|||||||
self.local_today = {'_today', 'today', icu_lower(_('today'))}
|
self.local_today = {'_today', 'today', icu_lower(_('today'))}
|
||||||
self.local_yesterday = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
|
self.local_yesterday = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
|
||||||
self.local_thismonth = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
|
self.local_thismonth = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
|
||||||
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
|
self.daysago_pat = regex.compile(r'(%s|daysago|_daysago)$'%_('daysago'), flags=regex.UNICODE | regex.VERSION1)
|
||||||
|
|
||||||
def eq(self, dbdate, query, field_count):
|
def eq(self, dbdate, query, field_count):
|
||||||
if dbdate.year == query.year:
|
if dbdate.year == query.year:
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os, re, collections
|
import os, regex, collections
|
||||||
|
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
@ -105,8 +105,8 @@ def _get_metadata(stream, stream_type, use_libprs_metadata,
|
|||||||
|
|
||||||
name = os.path.basename(getattr(stream, 'name', ''))
|
name = os.path.basename(getattr(stream, 'name', ''))
|
||||||
# The fallback pattern matches the default filename format produced by calibre
|
# The fallback pattern matches the default filename format produced by calibre
|
||||||
base = metadata_from_filename(name, pat=pattern, fallback_pat=re.compile(
|
base = metadata_from_filename(name, pat=pattern, fallback_pat=regex.compile(
|
||||||
r'^(?P<title>.+) - (?P<author>[^-]+)$'))
|
r'^(?P<title>.+) - (?P<author>[^-]+)$', flags=regex.UNICODE | regex.VERSION1))
|
||||||
if not base.authors:
|
if not base.authors:
|
||||||
base.authors = [_('Unknown')]
|
base.authors = [_('Unknown')]
|
||||||
if not base.title:
|
if not base.title:
|
||||||
@ -133,7 +133,7 @@ def metadata_from_filename(name, pat=None, fallback_pat=None):
|
|||||||
name = name.rpartition('.')[0]
|
name = name.rpartition('.')[0]
|
||||||
mi = MetaInformation(None, None)
|
mi = MetaInformation(None, None)
|
||||||
if pat is None:
|
if pat is None:
|
||||||
pat = re.compile(prefs.get('filename_pattern'))
|
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION1)
|
||||||
name = name.replace('_', ' ')
|
name = name.replace('_', ' ')
|
||||||
match = pat.search(name)
|
match = pat.search(name)
|
||||||
if match is None and fallback_pat is not None:
|
if match is None and fallback_pat is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user