mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
pep8
This commit is contained in:
parent
70a131f04c
commit
f91b9c8e51
@ -162,7 +162,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
def scrape_article_date(self, soup):
|
def scrape_article_date(self, soup):
|
||||||
for span in soup.findAll('span'):
|
for span in soup.findAll('span'):
|
||||||
txt = self.text(span)
|
txt = self.text(span)
|
||||||
rgx = re.compile('Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')
|
rgx = re.compile(unicode(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*'))
|
||||||
hit = rgx.match(txt)
|
hit = rgx.match(txt)
|
||||||
if hit:
|
if hit:
|
||||||
return self.date_from_string(txt)
|
return self.date_from_string(txt)
|
||||||
|
@ -106,5 +106,5 @@ class AppledailyTW(BasicNewsRecipe):
|
|||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
|
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
|
||||||
raw_html = re.sub(
|
raw_html = re.sub(
|
||||||
unicode(r'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>'), raw_html)
|
unicode(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
|
||||||
return raw_html
|
return raw_html
|
||||||
|
@ -78,7 +78,7 @@ class AdvancedUserRecipe1390132023(BasicNewsRecipe):
|
|||||||
'http://cdn.images.express.co.uk/img/covers/')})
|
'http://cdn.images.express.co.uk/img/covers/')})
|
||||||
cov = str(cov)
|
cov = str(cov)
|
||||||
cov2 = re.findall(
|
cov2 = re.findall(
|
||||||
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||||
|
|
||||||
cov = str(cov2)
|
cov = str(cov2)
|
||||||
cov = cov[2:len(cov) - 2]
|
cov = cov[2:len(cov) - 2]
|
||||||
|
@ -22,7 +22,7 @@ class FilmWebPl(BasicNewsRecipe):
|
|||||||
'ul.sep-line > li + li::before {content: " | "} '
|
'ul.sep-line > li + li::before {content: " | "} '
|
||||||
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
|
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
|
||||||
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
|
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
|
||||||
(re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
(re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
||||||
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
|
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
|
||||||
]
|
]
|
||||||
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
|
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
|
||||||
|
@ -62,7 +62,7 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = []
|
remove_tags = []
|
||||||
remove_tags.append(dict(name='a', attrs={'href': re.compile(
|
remove_tags.append(dict(name='a', attrs={'href': re.compile(
|
||||||
'http://feedads\.g\.doubleclick.net.*')}))
|
'http://feedads\\.g\\.doubleclick.net.*')}))
|
||||||
remove_tags.append(dict(name='div', attrs={'class': 'feedflare'}))
|
remove_tags.append(dict(name='div', attrs={'class': 'feedflare'}))
|
||||||
remove_tags.append(dict(name='a', attrs={'class': 'home_pixie'}))
|
remove_tags.append(dict(name='a', attrs={'class': 'home_pixie'}))
|
||||||
remove_tags.append(dict(name='div', attrs={'id': [
|
remove_tags.append(dict(name='div', attrs={'id': [
|
||||||
|
@ -144,7 +144,7 @@ class LentaRURecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Place article date after header
|
# Place article date after header
|
||||||
dates = soup.findAll(text=re.compile(
|
dates = soup.findAll(text=re.compile(
|
||||||
'\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
r'\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
||||||
if dates:
|
if dates:
|
||||||
for date in dates:
|
for date in dates:
|
||||||
for string in date:
|
for string in date:
|
||||||
|
@ -121,7 +121,7 @@ class WeeklyLWN(BasicNewsRecipe):
|
|||||||
if article_anchor:
|
if article_anchor:
|
||||||
article_url = article_anchor.get('href')
|
article_url = article_anchor.get('href')
|
||||||
if not article_url:
|
if not article_url:
|
||||||
print('article_url is None for article_anchor "%s": "%s"' \
|
print('article_url is None for article_anchor "%s": "%s"'
|
||||||
% (str(article_anchor), article_title), file=sys.stderr)
|
% (str(article_anchor), article_title), file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -84,11 +84,11 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['iframe', 'script', 'noscript', 'style']),
|
dict(name=['iframe', 'script', 'noscript', 'style']),
|
||||||
dict(name='div', attrs={'class': ['fact-related-box', 'aside clearfix', 'aside clearfix middle-col-line', 'comments', 'share-tools', 'article-right-column', 'column-4-5', 'column-1-5', 'ad-msg', 'col-179 ', 'col-373 ', 'clear', 'ad', 'navigation', re.compile('share-tools(-top)?'), 'tools', 'metroCommentFormWrap', 'article-tools-below-title', 'related-links', 'padding-top-15', re.compile('^promo.*?$'), 'teaser-component', re.compile('fb(-comments|_iframe_widget)'), 'promos', 'header-links', 'promo-2']}), # noqa
|
dict(name='div', attrs={'class': ['fact-related-box', 'aside clearfix', 'aside clearfix middle-col-line', 'comments', 'share-tools', 'article-right-column', 'column-4-5', 'column-1-5', 'ad-msg', 'col-179 ', 'col-373 ', 'clear', 'ad', 'navigation', re.compile('share-tools(-top)?'), 'tools', 'metroCommentFormWrap', 'article-tools-below-title', 'related-links', 'padding-top-15', re.compile('^promo.*?$'), 'teaser-component', re.compile('fb(-comments|_iframe_widget)'), 'promos', 'header-links', 'promo-2']}), # noqa
|
||||||
dict(id=['super-carousel', 'article-2', 'googleads', 'column-1-5-bottom', 'column-4-5', re.compile('^ad(\d+|adcomp.*?)?$'), 'adadcomp-4', 'margin-5', 'sidebar', re.compile('^article-\d'), 'comments', 'gallery-1', 'sharez_container', 'ts-container', 'topshares', 'ts-title']), # noqa
|
dict(id=['super-carousel', 'article-2', 'googleads', 'column-1-5-bottom', 'column-4-5', re.compile('^ad(\\d+|adcomp.*?)?$'), 'adadcomp-4', 'margin-5', 'sidebar', re.compile('^article-\\d'), 'comments', 'gallery-1', 'sharez_container', 'ts-container', 'topshares', 'ts-title']), # noqa
|
||||||
dict(name='a', attrs={'name': 'comments'}),
|
dict(name='a', attrs={'name': 'comments'}),
|
||||||
dict(name='img', attrs={'class': 'top-line',
|
dict(name='img', attrs={'class': 'top-line',
|
||||||
'title': 'volledig scherm'}),
|
'title': 'volledig scherm'}),
|
||||||
dict(attrs={'style': re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'), 'title': 'volledig scherm'})]
|
dict(attrs={'style': re.compile('^(.*(display\\s?:\\s?none|img-mask|white)\\s?;?.*)$'), 'title': 'volledig scherm'})]
|
||||||
|
|
||||||
'''removed by before/after:
|
'''removed by before/after:
|
||||||
id:
|
id:
|
||||||
@ -223,7 +223,7 @@ class MerryProcess(BeautifulSoup):
|
|||||||
return self.myKiller.safeRemovePart(souparray, True)
|
return self.myKiller.safeRemovePart(souparray, True)
|
||||||
|
|
||||||
def removeEmptyTags(self, soup, run=0):
|
def removeEmptyTags(self, soup, run=0):
|
||||||
emptymatches = re.compile('^[ \s\n\r\t ]*$')
|
emptymatches = re.compile('^[ \\s\n\r\t ]*$')
|
||||||
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (
|
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (
|
||||||
tag.string is None or tag.string.strip() == "" or tag.string.strip() == emptymatches) and not tag.isSelfClosing)
|
tag.string is None or tag.string.strip() == "" or tag.string.strip() == emptymatches) and not tag.isSelfClosing)
|
||||||
if emptytags and not (emptytags is None or emptytags == []):
|
if emptytags and not (emptytags is None or emptytags == []):
|
||||||
|
@ -29,7 +29,7 @@ class ObservatorulCultural(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(
|
soup = self.index_to_soup(
|
||||||
'http://www.observatorcultural.ro/Arhiva*-archive.html')
|
'http://www.observatorcultural.ro/Arhiva*-archive.html')
|
||||||
issueTag = soup.find('a', href=re.compile(
|
issueTag = soup.find('a', href=re.compile(
|
||||||
"observatorcultural.ro\/Numarul"))
|
"observatorcultural.ro\\/Numarul"))
|
||||||
issueURL = issueTag['href']
|
issueURL = issueTag['href']
|
||||||
print(issueURL)
|
print(issueURL)
|
||||||
issueSoup = self.index_to_soup(issueURL)
|
issueSoup = self.index_to_soup(issueURL)
|
||||||
|
@ -83,6 +83,7 @@ def get_icons(zfp, name_or_list_of_names):
|
|||||||
ians = ians.pop(names[0])
|
ians = ians.pop(names[0])
|
||||||
return ians
|
return ians
|
||||||
|
|
||||||
|
|
||||||
_translations_cache = {}
|
_translations_cache = {}
|
||||||
|
|
||||||
|
|
||||||
@ -316,4 +317,3 @@ if __name__ == '__main__':
|
|||||||
zf.write(os.path.join(x, y))
|
zf.write(os.path.join(x, y))
|
||||||
add_plugin(f.name)
|
add_plugin(f.name)
|
||||||
print('Added plugin from', sys.argv[-1])
|
print('Added plugin from', sys.argv[-1])
|
||||||
|
|
||||||
|
@ -40,5 +40,6 @@ def main():
|
|||||||
show_stats(stats)
|
show_stats(stats)
|
||||||
print('Stats saved to', stats)
|
print('Stats saved to', stats)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -395,5 +395,6 @@ def main():
|
|||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -95,6 +95,6 @@ def main():
|
|||||||
|
|
||||||
print('Device connection shutdown')
|
print('Device connection shutdown')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -211,7 +211,6 @@ def test_udisks(ver=None):
|
|||||||
print('Ejecting:')
|
print('Ejecting:')
|
||||||
u.eject(dev)
|
u.eject(dev)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_udisks()
|
test_udisks()
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,6 +53,7 @@ class GUID(Structure):
|
|||||||
''.join(["%02x" % d for d in self.data4[2:]]),
|
''.join(["%02x" % d for d in self.data4[2:]]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
CONFIGRET = DWORD
|
CONFIGRET = DWORD
|
||||||
DEVINST = DWORD
|
DEVINST = DWORD
|
||||||
LPDWORD = POINTER(DWORD)
|
LPDWORD = POINTER(DWORD)
|
||||||
@ -70,6 +71,8 @@ def CTL_CODE(DeviceType, Function, Method, Access):
|
|||||||
def USB_CTL(id):
|
def USB_CTL(id):
|
||||||
# CTL_CODE(FILE_DEVICE_USB, (id), METHOD_BUFFERED, FILE_ANY_ACCESS)
|
# CTL_CODE(FILE_DEVICE_USB, (id), METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
return CTL_CODE(0x22, id, 0, 0)
|
return CTL_CODE(0x22, id, 0, 0)
|
||||||
|
|
||||||
|
|
||||||
IOCTL_USB_GET_ROOT_HUB_NAME = USB_CTL(258)
|
IOCTL_USB_GET_ROOT_HUB_NAME = USB_CTL(258)
|
||||||
IOCTL_USB_GET_NODE_INFORMATION = USB_CTL(258)
|
IOCTL_USB_GET_NODE_INFORMATION = USB_CTL(258)
|
||||||
IOCTL_USB_GET_NODE_CONNECTION_INFORMATION = USB_CTL(259)
|
IOCTL_USB_GET_NODE_CONNECTION_INFORMATION = USB_CTL(259)
|
||||||
@ -108,6 +111,7 @@ class SP_DEVINFO_DATA(Structure):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "ClassGuid:%s DevInst:%s" % (self.ClassGuid, self.DevInst)
|
return "ClassGuid:%s DevInst:%s" % (self.ClassGuid, self.DevInst)
|
||||||
|
|
||||||
|
|
||||||
PSP_DEVINFO_DATA = POINTER(SP_DEVINFO_DATA)
|
PSP_DEVINFO_DATA = POINTER(SP_DEVINFO_DATA)
|
||||||
|
|
||||||
|
|
||||||
@ -122,6 +126,7 @@ class SP_DEVICE_INTERFACE_DATA(Structure):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "InterfaceClassGuid:%s Flags:%s" % (self.InterfaceClassGuid, self.Flags)
|
return "InterfaceClassGuid:%s Flags:%s" % (self.InterfaceClassGuid, self.Flags)
|
||||||
|
|
||||||
|
|
||||||
ANYSIZE_ARRAY = 1
|
ANYSIZE_ARRAY = 1
|
||||||
|
|
||||||
|
|
||||||
@ -131,6 +136,7 @@ class SP_DEVICE_INTERFACE_DETAIL_DATA(Structure):
|
|||||||
("DevicePath", c_wchar*ANYSIZE_ARRAY)
|
("DevicePath", c_wchar*ANYSIZE_ARRAY)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
UCHAR = c_ubyte
|
UCHAR = c_ubyte
|
||||||
|
|
||||||
|
|
||||||
@ -216,6 +222,7 @@ class USB_DESCRIPTOR_REQUEST(Structure):
|
|||||||
('Data', USB_STRING_DESCRIPTOR),
|
('Data', USB_STRING_DESCRIPTOR),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
PUSB_DESCRIPTOR_REQUEST = POINTER(USB_DESCRIPTOR_REQUEST)
|
PUSB_DESCRIPTOR_REQUEST = POINTER(USB_DESCRIPTOR_REQUEST)
|
||||||
PSP_DEVICE_INTERFACE_DETAIL_DATA = POINTER(SP_DEVICE_INTERFACE_DETAIL_DATA)
|
PSP_DEVICE_INTERFACE_DETAIL_DATA = POINTER(SP_DEVICE_INTERFACE_DETAIL_DATA)
|
||||||
PSP_DEVICE_INTERFACE_DATA = POINTER(SP_DEVICE_INTERFACE_DATA)
|
PSP_DEVICE_INTERFACE_DATA = POINTER(SP_DEVICE_INTERFACE_DATA)
|
||||||
@ -390,6 +397,7 @@ def config_err_check(result, func, args):
|
|||||||
raise WindowsError(result, 'The cfgmgr32 function failed with err: %s' % CR_CODE_NAMES.get(result, result))
|
raise WindowsError(result, 'The cfgmgr32 function failed with err: %s' % CR_CODE_NAMES.get(result, result))
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
GetLogicalDrives = cwrap('GetLogicalDrives', DWORD, errcheck=bool_err_check, lib=kernel32)
|
GetLogicalDrives = cwrap('GetLogicalDrives', DWORD, errcheck=bool_err_check, lib=kernel32)
|
||||||
GetDriveType = cwrap('GetDriveTypeW', UINT, LPCWSTR, lib=kernel32)
|
GetDriveType = cwrap('GetDriveTypeW', UINT, LPCWSTR, lib=kernel32)
|
||||||
GetVolumeNameForVolumeMountPoint = cwrap('GetVolumeNameForVolumeMountPointW', BOOL, LPCWSTR, LPWSTR, DWORD, errcheck=bool_err_check, lib=kernel32)
|
GetVolumeNameForVolumeMountPoint = cwrap('GetVolumeNameForVolumeMountPointW', BOOL, LPCWSTR, LPWSTR, DWORD, errcheck=bool_err_check, lib=kernel32)
|
||||||
@ -675,6 +683,7 @@ def get_volume_pathnames(volume_id, buf=None):
|
|||||||
|
|
||||||
# def scan_usb_devices(): {{{
|
# def scan_usb_devices(): {{{
|
||||||
|
|
||||||
|
|
||||||
_USBDevice = namedtuple('USBDevice', 'vendor_id product_id bcd devid devinst')
|
_USBDevice = namedtuple('USBDevice', 'vendor_id product_id bcd devid devinst')
|
||||||
|
|
||||||
|
|
||||||
@ -1017,6 +1026,7 @@ def drives_for(vendor_id, product_id=None):
|
|||||||
pprint(get_drive_letters_for_device(usbdev, debug=True))
|
pprint(get_drive_letters_for_device(usbdev, debug=True))
|
||||||
print('USB info:', get_usb_info(usbdev, debug=True))
|
print('USB info:', get_usb_info(usbdev, debug=True))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
develop()
|
develop()
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -32,11 +32,11 @@ class HeuristicProcessor(object):
|
|||||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE)
|
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE)
|
||||||
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
||||||
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" # noqa
|
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" # noqa
|
||||||
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
self.line_close = "(</(?P=inner3)>)?\\s*(</(?P=inner2)>)?\\s*(</(?P=inner1)>)?\\s*</(?P=outer)>"
|
||||||
self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*</(p|div)>)', re.IGNORECASE)
|
self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*</(p|div)>)', re.IGNORECASE)
|
||||||
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
||||||
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
|
self.common_in_text_endings = u'[\"\'—’”,\\.!\\?\\…\\)„\\w]'
|
||||||
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
|
self.common_in_text_beginnings = u'[\\w\'\"“‘‛]'
|
||||||
|
|
||||||
def is_pdftohtml(self, src):
|
def is_pdftohtml(self, src):
|
||||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||||
@ -54,10 +54,10 @@ class HeuristicProcessor(object):
|
|||||||
" chapters. - " + unicode(chap))
|
" chapters. - " + unicode(chap))
|
||||||
return '<h2>'+chap+'</h2>\n'
|
return '<h2>'+chap+'</h2>\n'
|
||||||
else:
|
else:
|
||||||
delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
|
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
|
||||||
delete_quotes = re.compile('\'\"')
|
delete_quotes = re.compile('\'\"')
|
||||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
|
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
|
||||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
|
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
|
||||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
||||||
@ -216,24 +216,24 @@ class HeuristicProcessor(object):
|
|||||||
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" # noqa
|
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" # noqa
|
||||||
chapter_header_open = r"(?P<chap>"
|
chapter_header_open = r"(?P<chap>"
|
||||||
title_header_open = r"(?P<title>"
|
title_header_open = r"(?P<title>"
|
||||||
chapter_header_close = ")\s*"
|
chapter_header_close = ")\\s*"
|
||||||
title_header_close = ")"
|
title_header_close = ")"
|
||||||
chapter_line_close = self.line_close
|
chapter_line_close = self.line_close
|
||||||
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
|
title_line_close = "(</(?P=inner6)>)?\\s*(</(?P=inner5)>)?\\s*(</(?P=inner4)>)?\\s*</(?P=outer2)>"
|
||||||
|
|
||||||
is_pdftohtml = self.is_pdftohtml(html)
|
is_pdftohtml = self.is_pdftohtml(html)
|
||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
title_line_open = "<(?P<outer2>p)[^>]*>\s*"
|
title_line_open = "<(?P<outer2>p)[^>]*>\\s*"
|
||||||
title_line_close = "\s*</(?P=outer2)>"
|
title_line_close = "\\s*</(?P=outer2)>"
|
||||||
|
|
||||||
if blanks_between_paragraphs:
|
if blanks_between_paragraphs:
|
||||||
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
|
blank_lines = "(\\s*<p[^>]*>\\s*</p>){0,2}\\s*"
|
||||||
else:
|
else:
|
||||||
blank_lines = ""
|
blank_lines = ""
|
||||||
opt_title_open = "("
|
opt_title_open = "("
|
||||||
opt_title_close = ")?"
|
opt_title_close = ")?"
|
||||||
n_lookahead_open = "(?!\s*"
|
n_lookahead_open = "(?!\\s*"
|
||||||
n_lookahead_close = ")\s*"
|
n_lookahead_close = ")\\s*"
|
||||||
|
|
||||||
default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'’\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
|
default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'’\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
|
||||||
simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"
|
simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"
|
||||||
@ -358,12 +358,12 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# define the pieces of the regex
|
# define the pieces of the regex
|
||||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))"
|
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
||||||
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
||||||
soft_hyphen = u"\xad"
|
soft_hyphen = u"\xad"
|
||||||
line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?"
|
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
||||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
|
||||||
line_opening = "<(p|div)[^>]*>\s*(?P<style_open><(span|[iub])[^>]*>)?\s*"
|
line_opening = "<(p|div)[^>]*>\\s*(?P<style_open><(span|[iub])[^>]*>)?\\s*"
|
||||||
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||||
|
|
||||||
if format == 'txt':
|
if format == 'txt':
|
||||||
@ -414,8 +414,8 @@ class HeuristicProcessor(object):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def arrange_htm_line_endings(self, html):
|
def arrange_htm_line_endings(self, html):
|
||||||
html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html)
|
html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\\g<tag>"+">\n", html)
|
||||||
html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html)
|
html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\\g<tag>"+"\\g<style>"+">", html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def fix_nbsp_indents(self, html):
|
def fix_nbsp_indents(self, html):
|
||||||
@ -432,7 +432,7 @@ class HeuristicProcessor(object):
|
|||||||
# Get rid of empty <o:p> tags to simplify other processing
|
# Get rid of empty <o:p> tags to simplify other processing
|
||||||
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||||
# Delete microsoft 'smart' tags
|
# Delete microsoft 'smart' tags
|
||||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
html = re.sub('(?i)</?st1:\\w+>', '', html)
|
||||||
# Re-open self closing paragraph tags
|
# Re-open self closing paragraph tags
|
||||||
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
||||||
# Get rid of empty span, bold, font, em, & italics tags
|
# Get rid of empty span, bold, font, em, & italics tags
|
||||||
@ -443,7 +443,7 @@ class HeuristicProcessor(object):
|
|||||||
html = re.sub(
|
html = re.sub(
|
||||||
r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html)
|
r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html)
|
||||||
# delete surrounding divs from empty paragraphs
|
# delete surrounding divs from empty paragraphs
|
||||||
html = re.sub('<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
html = re.sub('<div[^>]*>\\s*<p[^>]*>\\s*</p>\\s*</div>', '<p> </p>', html)
|
||||||
# Empty heading tags
|
# Empty heading tags
|
||||||
html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
|
html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
|
||||||
self.deleted_nbsps = True
|
self.deleted_nbsps = True
|
||||||
@ -527,7 +527,7 @@ class HeuristicProcessor(object):
|
|||||||
elif content.find('scenebreak') != -1:
|
elif content.find('scenebreak') != -1:
|
||||||
return content
|
return content
|
||||||
else:
|
else:
|
||||||
content = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content)
|
content = re.sub('(?i)<h(?P<hnum>\\d+)[^>]*>', '\n\n<h'+'\\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
html = blanks_around_headings.sub(merge_header_whitespace, html)
|
html = blanks_around_headings.sub(merge_header_whitespace, html)
|
||||||
@ -540,15 +540,15 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
html = blanks_n_nopunct.sub(markup_whitespaces, html)
|
html = blanks_n_nopunct.sub(markup_whitespaces, html)
|
||||||
if self.html_preprocess_sections > self.min_chapters:
|
if self.html_preprocess_sections > self.min_chapters:
|
||||||
html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
|
html = re.sub('(?si)^.*?(?=<h\\d)', markup_whitespaces, html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def detect_soft_breaks(self, html):
|
def detect_soft_breaks(self, html):
|
||||||
line = '(?P<initline>'+self.line_open+'\s*(?P<init_content>.*?)'+self.line_close+')'
|
line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')'
|
||||||
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \
|
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \
|
||||||
'\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
'\\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
||||||
div_break_candidate_pattern = line+'\s*<div[^>]*>\s*</div>\s*'+line_two
|
div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two
|
||||||
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
|
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
|
||||||
|
|
||||||
def convert_div_softbreaks(match):
|
def convert_div_softbreaks(match):
|
||||||
@ -571,9 +571,9 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
def detect_scene_breaks(self, html):
|
def detect_scene_breaks(self, html):
|
||||||
scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \
|
scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \
|
||||||
'<))(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
|
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?)+)\\s*'+self.line_close
|
||||||
scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
||||||
html = scene_breaks.sub(self.scene_break_open+'\g<break>'+'</p>', html)
|
html = scene_breaks.sub(self.scene_break_open+'\\g<break>'+'</p>', html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def markup_user_break(self, replacement_break):
|
def markup_user_break(self, replacement_break):
|
||||||
@ -589,13 +589,13 @@ class HeuristicProcessor(object):
|
|||||||
if re.match('^<hr', replacement_break):
|
if re.match('^<hr', replacement_break):
|
||||||
if replacement_break.find('width') != -1:
|
if replacement_break.find('width') != -1:
|
||||||
try:
|
try:
|
||||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break))
|
||||||
except:
|
except:
|
||||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||||
self.log.warn('Invalid replacement scene break'
|
self.log.warn('Invalid replacement scene break'
|
||||||
' expression, using default')
|
' expression, using default')
|
||||||
else:
|
else:
|
||||||
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
|
||||||
divpercent = (100 - width) / 2
|
divpercent = (100 - width) / 2
|
||||||
hr_open = re.sub('45', unicode(divpercent), hr_open)
|
hr_open = re.sub('45', unicode(divpercent), hr_open)
|
||||||
scene_break = hr_open+replacement_break+'</div>'
|
scene_break = hr_open+replacement_break+'</div>'
|
||||||
@ -606,16 +606,16 @@ class HeuristicProcessor(object):
|
|||||||
else:
|
else:
|
||||||
from calibre.utils.html2text import html2text
|
from calibre.utils.html2text import html2text
|
||||||
replacement_break = html2text(replacement_break)
|
replacement_break = html2text(replacement_break)
|
||||||
replacement_break = re.sub('\s', ' ', replacement_break)
|
replacement_break = re.sub('\\s', ' ', replacement_break)
|
||||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
else:
|
else:
|
||||||
replacement_break = re.sub('\s', ' ', replacement_break)
|
replacement_break = re.sub('\\s', ' ', replacement_break)
|
||||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
|
|
||||||
return scene_break
|
return scene_break
|
||||||
|
|
||||||
def check_paragraph(self, content):
|
def check_paragraph(self, content):
|
||||||
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
content = re.sub('\\s*</?span[^>]*>\\s*', '', content)
|
||||||
if re.match('.*[\"\'.!?:]$', content):
|
if re.match('.*[\"\'.!?:]$', content):
|
||||||
# print "detected this as a paragraph"
|
# print "detected this as a paragraph"
|
||||||
return True
|
return True
|
||||||
@ -623,7 +623,7 @@ class HeuristicProcessor(object):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def abbyy_processor(self, html):
|
def abbyy_processor(self, html):
|
||||||
abbyy_line = re.compile('((?P<linestart><p\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
abbyy_line = re.compile('((?P<linestart><p\\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
||||||
empty_paragraph = '\n<p> </p>\n'
|
empty_paragraph = '\n<p> </p>\n'
|
||||||
self.in_blockquote = False
|
self.in_blockquote = False
|
||||||
self.previous_was_paragraph = False
|
self.previous_was_paragraph = False
|
||||||
@ -669,7 +669,7 @@ class HeuristicProcessor(object):
|
|||||||
if style == 'text-align' and setting != 'left':
|
if style == 'text-align' and setting != 'left':
|
||||||
text_align = style+':'+setting+';'
|
text_align = style+':'+setting+';'
|
||||||
if style == 'text-indent':
|
if style == 'text-indent':
|
||||||
setting = int(re.sub('\s*pt\s*', '', setting))
|
setting = int(re.sub('\\s*pt\\s*', '', setting))
|
||||||
if 9 < setting < 14:
|
if 9 < setting < 14:
|
||||||
text_indent = indented_text
|
text_indent = indented_text
|
||||||
else:
|
else:
|
||||||
@ -757,8 +757,8 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
is_pdftohtml = self.is_pdftohtml(html)
|
is_pdftohtml = self.is_pdftohtml(html)
|
||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
self.line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
|
self.line_open = "<(?P<outer>p)[^>]*>(\\s*<[ibu][^>]*>)?\\s*"
|
||||||
self.line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
|
self.line_close = "\\s*(</[ibu][^>]*>\\s*)?</(?P=outer)>"
|
||||||
|
|
||||||
# ADE doesn't render <br />, change to empty paragraphs
|
# ADE doesn't render <br />, change to empty paragraphs
|
||||||
# html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
# html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
||||||
@ -831,7 +831,7 @@ class HeuristicProcessor(object):
|
|||||||
# headings and titles, images, etc
|
# headings and titles, images, etc
|
||||||
doubleheading = re.compile(
|
doubleheading = re.compile(
|
||||||
r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
html = doubleheading.sub('\\g<firsthead>'+'\n<h3'+'\\g<secondhead>'+'</h3>', html)
|
||||||
|
|
||||||
# If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
|
# If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
|
||||||
# style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks.
|
# style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks.
|
||||||
@ -839,7 +839,7 @@ class HeuristicProcessor(object):
|
|||||||
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
|
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
|
||||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||||
self.log.debug('Formatting scene breaks')
|
self.log.debug('Formatting scene breaks')
|
||||||
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
|
html = re.sub('(?i)<div[^>]*>\\s*<br(\\s?/)?>\\s*</div>', '<p></p>', html)
|
||||||
html = self.detect_scene_breaks(html)
|
html = self.detect_scene_breaks(html)
|
||||||
html = self.detect_whitespace(html)
|
html = self.detect_whitespace(html)
|
||||||
html = self.detect_soft_breaks(html)
|
html = self.detect_soft_breaks(html)
|
||||||
@ -856,9 +856,9 @@ class HeuristicProcessor(object):
|
|||||||
replacement_break = self.markup_user_break(replacement_break)
|
replacement_break = self.markup_user_break(replacement_break)
|
||||||
if scene_break_count >= 1:
|
if scene_break_count >= 1:
|
||||||
html = detected_scene_break.sub(replacement_break, html)
|
html = detected_scene_break.sub(replacement_break, html)
|
||||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
|
html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html)
|
||||||
else:
|
else:
|
||||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
|
html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html)
|
||||||
|
|
||||||
if self.deleted_nbsps:
|
if self.deleted_nbsps:
|
||||||
# put back non-breaking spaces in empty paragraphs so they render correctly
|
# put back non-breaking spaces in empty paragraphs so they render correctly
|
||||||
|
@ -41,6 +41,6 @@ def dump(path):
|
|||||||
|
|
||||||
print(path, 'dumped to', dest)
|
print(path, 'dumped to', dest)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
dump(sys.argv[-1])
|
dump(sys.argv[-1])
|
||||||
|
|
||||||
|
@ -165,6 +165,7 @@ class Parser(object):
|
|||||||
ans['text_assertion'] = ta
|
ans['text_assertion'] = ta
|
||||||
return raw[1:]
|
return raw[1:]
|
||||||
|
|
||||||
|
|
||||||
_parser = None
|
_parser = None
|
||||||
|
|
||||||
|
|
||||||
@ -203,5 +204,3 @@ def cfi_sort_key(cfi, only_path=True):
|
|||||||
step = steps[-1] if steps else {}
|
step = steps[-1] if steps else {}
|
||||||
offsets = (step.get('temporal_offset', 0), tuple(reversed(step.get('spatial_offset', (0, 0)))), step.get('text_offset', 0), )
|
offsets = (step.get('temporal_offset', 0), tuple(reversed(step.get('spatial_offset', (0, 0)))), step.get('text_offset', 0), )
|
||||||
return (step_nums, offsets)
|
return (step_nums, offsets)
|
||||||
|
|
||||||
|
|
||||||
|
@ -100,5 +100,6 @@ class Tests(unittest.TestCase):
|
|||||||
def find_tests():
|
def find_tests():
|
||||||
return unittest.TestLoader().loadTestsFromTestCase(Tests)
|
return unittest.TestLoader().loadTestsFromTestCase(Tests)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.TextTestRunner(verbosity=2).run(find_tests())
|
unittest.TextTestRunner(verbosity=2).run(find_tests())
|
||||||
|
@ -62,5 +62,6 @@ def main(args=sys.argv):
|
|||||||
any2lit(opts, args[1])
|
any2lit(opts, args[1])
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -104,6 +104,7 @@ def f60_79(B, C, D):
|
|||||||
def f6_42(B, C, D):
|
def f6_42(B, C, D):
|
||||||
return (B + C) ^ C
|
return (B + C) ^ C
|
||||||
|
|
||||||
|
|
||||||
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
||||||
|
|
||||||
# ...and delightful changes
|
# ...and delightful changes
|
||||||
@ -321,6 +322,7 @@ def new(arg=None):
|
|||||||
|
|
||||||
return crypto
|
return crypto
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
def main():
|
def main():
|
||||||
import sys
|
import sys
|
||||||
|
@ -57,6 +57,7 @@ def invert_tag_map(tag_map):
|
|||||||
tattrs[0] = dattrs
|
tattrs[0] = dattrs
|
||||||
return tags, tattrs
|
return tags, tattrs
|
||||||
|
|
||||||
|
|
||||||
OPF_MAP = invert_tag_map(maps.OPF_MAP)
|
OPF_MAP = invert_tag_map(maps.OPF_MAP)
|
||||||
HTML_MAP = invert_tag_map(maps.HTML_MAP)
|
HTML_MAP = invert_tag_map(maps.HTML_MAP)
|
||||||
|
|
||||||
@ -76,6 +77,7 @@ def packguid(guid):
|
|||||||
values = [int(value, 16) for value in values]
|
values = [int(value, 16) for value in values]
|
||||||
return pack("<LHHBBBBBBBB", *values)
|
return pack("<LHHBBBBBBBB", *values)
|
||||||
|
|
||||||
|
|
||||||
FLAG_OPENING = (1 << 0)
|
FLAG_OPENING = (1 << 0)
|
||||||
FLAG_CLOSING = (1 << 1)
|
FLAG_CLOSING = (1 << 1)
|
||||||
FLAG_BLOCK = (1 << 2)
|
FLAG_BLOCK = (1 << 2)
|
||||||
|
@ -153,9 +153,9 @@ class HTMLConverter(object):
|
|||||||
(re.compile('<hr>', re.IGNORECASE),
|
(re.compile('<hr>', re.IGNORECASE),
|
||||||
lambda match : '<span style="page-break-after:always"> </span>'),
|
lambda match : '<span style="page-break-after:always"> </span>'),
|
||||||
# Create header tags
|
# Create header tags
|
||||||
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||||
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||||
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||||
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||||
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
|
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
|
||||||
@ -409,7 +409,7 @@ class HTMLConverter(object):
|
|||||||
selector name and the value is a dictionary of properties
|
selector name and the value is a dictionary of properties
|
||||||
"""
|
"""
|
||||||
sdict, pdict = {}, {}
|
sdict, pdict = {}, {}
|
||||||
style = re.sub('/\*.*?\*/', '', style) # Remove /*...*/ comments
|
style = re.sub('/\\*.*?\\*/', '', style) # Remove /*...*/ comments
|
||||||
for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
|
for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
|
||||||
for key in sel[0].split(','):
|
for key in sel[0].split(','):
|
||||||
val = self.parse_style_properties(sel[1])
|
val = self.parse_style_properties(sel[1])
|
||||||
|
@ -148,7 +148,7 @@ class OverDrive(Source):
|
|||||||
fix_slashes = re.compile(r'\\/')
|
fix_slashes = re.compile(r'\\/')
|
||||||
thumbimage = fix_slashes.sub('/', thumbimage)
|
thumbimage = fix_slashes.sub('/', thumbimage)
|
||||||
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
||||||
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
|
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\\g<img>100', thumbimage)
|
||||||
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
||||||
series_num = ''
|
series_num = ''
|
||||||
if not series:
|
if not series:
|
||||||
@ -254,7 +254,7 @@ class OverDrive(Source):
|
|||||||
|
|
||||||
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||||
close_matches = []
|
close_matches = []
|
||||||
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
raw = re.sub('.*?\\[\\[(?P<content>.*?)\\]\\].*', '[[\\g<content>]]', raw)
|
||||||
results = json.loads(raw)
|
results = json.loads(raw)
|
||||||
# log.error('raw results are:'+str(results))
|
# log.error('raw results are:'+str(results))
|
||||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||||
|
@ -149,7 +149,7 @@ class Ozon(Source):
|
|||||||
# Redirect page: trying to extract ozon_id from javascript data
|
# Redirect page: trying to extract ozon_id from javascript data
|
||||||
h = HTMLParser()
|
h = HTMLParser()
|
||||||
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
||||||
json_pat = re.compile(u'dataLayer\s*=\s*(.+)?;')
|
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
|
||||||
json_info = re.search(json_pat, entry_string)
|
json_info = re.search(json_pat, entry_string)
|
||||||
jsondata = json_info.group(1) if json_info else None
|
jsondata = json_info.group(1) if json_info else None
|
||||||
if jsondata:
|
if jsondata:
|
||||||
@ -344,7 +344,7 @@ class Ozon(Source):
|
|||||||
|
|
||||||
pub_year = None
|
pub_year = None
|
||||||
pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()')
|
pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()')
|
||||||
year_pattern = re.compile('\d{4}')
|
year_pattern = re.compile(r'\d{4}')
|
||||||
if pub_year_block:
|
if pub_year_block:
|
||||||
pub_year = re.search(year_pattern, pub_year_block[0])
|
pub_year = re.search(year_pattern, pub_year_block[0])
|
||||||
if pub_year:
|
if pub_year:
|
||||||
@ -625,8 +625,8 @@ def _translageLanguageToCode(displayLang): # {{{
|
|||||||
def _normalizeAuthorNameWithInitials(name): # {{{
|
def _normalizeAuthorNameWithInitials(name): # {{{
|
||||||
res = name
|
res = name
|
||||||
if name:
|
if name:
|
||||||
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||||
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||||
if not matcher:
|
if not matcher:
|
||||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||||
|
@ -370,6 +370,7 @@ def set_metadata(stream, mi):
|
|||||||
mu.update(mi)
|
mu.update(mi)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if False:
|
if False:
|
||||||
# Test get_metadata()
|
# Test get_metadata()
|
||||||
@ -388,4 +389,3 @@ if __name__ == '__main__':
|
|||||||
updated_data = open(tokens[0]+'-updated' + '.' + tokens[2],'wb')
|
updated_data = open(tokens[0]+'-updated' + '.' + tokens[2],'wb')
|
||||||
updated_data.write(stream.getvalue())
|
updated_data.write(stream.getvalue())
|
||||||
updated_data.close()
|
updated_data.close()
|
||||||
|
|
||||||
|
@ -45,6 +45,6 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
|||||||
def main():
|
def main():
|
||||||
inspect_mobi(sys.argv[1])
|
inspect_mobi(sys.argv[1])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -350,13 +350,13 @@ class MobiReader(object):
|
|||||||
# Swap inline and block level elements, and order block level elements according to priority
|
# Swap inline and block level elements, and order block level elements according to priority
|
||||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\\g<para>'+'\\g<styletags>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\\g<styletags>'+'\\g<para>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\\g<para>'+'\\g<blockquote>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\\g<blockquote>'+'\\g<para>', self.processed_html)
|
||||||
bods = htmls = 0
|
bods = htmls = 0
|
||||||
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
||||||
if x == '</body>':
|
if x == '</body>':
|
||||||
@ -692,7 +692,7 @@ class MobiReader(object):
|
|||||||
continue
|
continue
|
||||||
if reached and x.tag == 'a':
|
if reached and x.tag == 'a':
|
||||||
href = x.get('href', '')
|
href = x.get('href', '')
|
||||||
if href and re.match('\w+://', href) is None:
|
if href and re.match('\\w+://', href) is None:
|
||||||
try:
|
try:
|
||||||
text = u' '.join([t.strip() for t in
|
text = u' '.join([t.strip() for t in
|
||||||
x.xpath('descendant::text()')])
|
x.xpath('descendant::text()')])
|
||||||
|
@ -374,6 +374,7 @@ class NonLinearNCXIndex(NCXIndex):
|
|||||||
EndTagTable
|
EndTagTable
|
||||||
)))
|
)))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Generate a document with a large number of index entries using both
|
# Generate a document with a large number of index entries using both
|
||||||
# calibre and kindlegen and compare the output
|
# calibre and kindlegen and compare the output
|
||||||
@ -393,4 +394,3 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
from calibre.gui2.tweak_book.diff.main import main
|
from calibre.gui2.tweak_book.diff.main import main
|
||||||
main(['cdiff', 'decompiled_index/mobi8/ncx.record', 'x/ncx.record'])
|
main(['cdiff', 'decompiled_index/mobi8/ncx.record', 'x/ncx.record'])
|
||||||
|
|
||||||
|
@ -114,6 +114,7 @@ def normalize_simple_composition(name, cssvalue, composition, check_inherit=True
|
|||||||
break
|
break
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
|
||||||
font_composition = ('font-style', 'font-variant', 'font-weight', 'font-size', 'line-height', 'font-family')
|
font_composition = ('font-style', 'font-variant', 'font-weight', 'font-size', 'line-height', 'font-family')
|
||||||
|
|
||||||
|
|
||||||
@ -144,6 +145,7 @@ def normalize_border(name, cssvalue):
|
|||||||
style.update({k.replace(EDGES[0], edge):v for k, v in vals.iteritems()})
|
style.update({k.replace(EDGES[0], edge):v for k, v in vals.iteritems()})
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
|
||||||
normalizers = {
|
normalizers = {
|
||||||
'list-style': simple_normalizer('list-style', ('type', 'position', 'image')),
|
'list-style': simple_normalizer('list-style', ('type', 'position', 'image')),
|
||||||
'font': lambda prop, v: normalize_font(v),
|
'font': lambda prop, v: normalize_font(v),
|
||||||
@ -243,6 +245,7 @@ def condense_border(style, props):
|
|||||||
style.removeProperty(prop.name)
|
style.removeProperty(prop.name)
|
||||||
style.setProperty('border', edge_vals[0].value)
|
style.setProperty('border', edge_vals[0].value)
|
||||||
|
|
||||||
|
|
||||||
condensers = {'margin': simple_condenser('margin', condense_edge), 'padding': simple_condenser('padding', condense_edge), 'border': condense_border}
|
condensers = {'margin': simple_condenser('margin', condense_edge), 'padding': simple_condenser('padding', condense_edge), 'border': condense_border}
|
||||||
|
|
||||||
|
|
||||||
@ -430,5 +433,6 @@ def test_normalization(return_tests=False): # {{{
|
|||||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_normalization()
|
test_normalization()
|
||||||
|
@ -82,6 +82,7 @@ def iterrules(container, sheet_name, rules=None, media_rule_ok=media_allowed, ru
|
|||||||
|
|
||||||
importing.discard(sheet_name)
|
importing.discard(sheet_name)
|
||||||
|
|
||||||
|
|
||||||
StyleDeclaration = namedtuple('StyleDeclaration', 'index declaration pseudo_element')
|
StyleDeclaration = namedtuple('StyleDeclaration', 'index declaration pseudo_element')
|
||||||
Specificity = namedtuple('Specificity', 'is_style num_id num_class num_elem rule_index')
|
Specificity = namedtuple('Specificity', 'is_style num_id num_class num_elem rule_index')
|
||||||
|
|
||||||
@ -224,6 +225,7 @@ def resolve_styles(container, name, select=None, sheet_callback=None):
|
|||||||
|
|
||||||
return partial(resolve_property, style_map), partial(resolve_pseudo_property, style_map, pseudo_style_map), select
|
return partial(resolve_property, style_map), partial(resolve_pseudo_property, style_map, pseudo_style_map), select
|
||||||
|
|
||||||
|
|
||||||
_defvals = None
|
_defvals = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -115,6 +115,7 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
|
|||||||
zf.writestr(html_name, HTML)
|
zf.writestr(html_name, HTML)
|
||||||
zf.writestr(toc_name, ncx)
|
zf.writestr(toc_name, ncx)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
mi = Metadata('Test book', authors=('Kovid Goyal',))
|
mi = Metadata('Test book', authors=('Kovid Goyal',))
|
||||||
|
@ -140,5 +140,6 @@ def main(args=sys.argv):
|
|||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -134,7 +134,6 @@ def main():
|
|||||||
print('PDF written to:', pdf)
|
print('PDF written to:', pdf)
|
||||||
print('Image written to:', path)
|
print('Image written to:', path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ class PMLMLizer(object):
|
|||||||
text = text.replace('\\Q="%s"' % unused, '')
|
text = text.replace('\\Q="%s"' % unused, '')
|
||||||
|
|
||||||
# Remove \Cn tags that are within \x and \Xn tags
|
# Remove \Cn tags that are within \x and \Xn tags
|
||||||
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\g<t>\g<a>\g<b>\g<t>', text)
|
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\\g<t>\\g<a>\\g<b>\\g<t>', text)
|
||||||
|
|
||||||
# Replace bad characters.
|
# Replace bad characters.
|
||||||
text = text.replace(u'\xc2', '')
|
text = text.replace(u'\xc2', '')
|
||||||
@ -206,7 +206,7 @@ class PMLMLizer(object):
|
|||||||
text = re.sub('[ ]{2,}', ' ', text)
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
# Condense excessive \c empty line sequences.
|
# Condense excessive \c empty line sequences.
|
||||||
text = re.sub('(\\c\s*\\c\s*){2,}', '\\c \n\\c\n', text)
|
text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text)
|
||||||
|
|
||||||
# Remove excessive newlines.
|
# Remove excessive newlines.
|
||||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
@ -142,7 +142,7 @@ class RTFMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def remove_tabs(self, text):
|
def remove_tabs(self, text):
|
||||||
self.log.debug('\Replace tabs with space for processing...')
|
self.log.debug('Replace tabs with space for processing...')
|
||||||
text = text.replace('\t', ' ')
|
text = text.replace('\t', ' ')
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
@ -32,6 +32,7 @@ class CheckEncoding:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
check_encoding_obj = CheckEncoding()
|
check_encoding_obj = CheckEncoding()
|
||||||
check_encoding_obj.check_encoding(sys.argv[1])
|
check_encoding_obj.check_encoding(sys.argv[1])
|
||||||
|
@ -175,6 +175,7 @@ class DefaultEncoding:
|
|||||||
elif enc == 'pca':
|
elif enc == 'pca':
|
||||||
self.__code_page = '850'
|
self.__code_page = '850'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
encode_obj = DefaultEncoding(
|
encode_obj = DefaultEncoding(
|
||||||
|
@ -411,8 +411,6 @@ class FieldStrings:
|
|||||||
line -- the string to be parse
|
line -- the string to be parse
|
||||||
Retuns:
|
Retuns:
|
||||||
The name of the field
|
The name of the field
|
||||||
Logic:
|
|
||||||
self.__link_switch = re.compile(r'\\l\s{1,}(.*?)\s')
|
|
||||||
"""
|
"""
|
||||||
self.__link_switch = re.compile(r'\\l\s{1,}"{0,1}(.*?)"{0,1}\s')
|
self.__link_switch = re.compile(r'\\l\s{1,}"{0,1}(.*?)"{0,1}\s')
|
||||||
the_string = name
|
the_string = name
|
||||||
|
@ -562,6 +562,8 @@ class Hex2Utf8:
|
|||||||
self.__convert_preamble()
|
self.__convert_preamble()
|
||||||
else:
|
else:
|
||||||
self.__convert_body()
|
self.__convert_body()
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
how to swap case for non-capitals
|
how to swap case for non-capitals
|
||||||
my_string.swapcase()
|
my_string.swapcase()
|
||||||
|
@ -120,7 +120,7 @@ class ListTable:
|
|||||||
Requires: line -- line to process
|
Requires: line -- line to process
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
Logic:
|
Logic:
|
||||||
I have found \list.
|
I have found \\list.
|
||||||
Change the state to list
|
Change the state to list
|
||||||
Get the open bracket count so you know when this state ends.
|
Get the open bracket count so you know when this state ends.
|
||||||
Append an empty list to all lists.
|
Append an empty list to all lists.
|
||||||
@ -162,7 +162,7 @@ class ListTable:
|
|||||||
Requires: line -- line to process
|
Requires: line -- line to process
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
Logic:
|
Logic:
|
||||||
I have found \listlevel.
|
I have found \\listlevel.
|
||||||
Change the state to level
|
Change the state to level
|
||||||
Get the open bracket count so you know when this state ends.
|
Get the open bracket count so you know when this state ends.
|
||||||
Append an empty list to the last list inside all lists.
|
Append an empty list to the last list inside all lists.
|
||||||
@ -285,7 +285,7 @@ class ListTable:
|
|||||||
Returns:
|
Returns:
|
||||||
nothing
|
nothing
|
||||||
Logic:
|
Logic:
|
||||||
Method is used for to parse text in the \leveltext group.
|
Method is used for to parse text in the \\leveltext group.
|
||||||
"""
|
"""
|
||||||
num = line[18:]
|
num = line[18:]
|
||||||
the_num = int(num, 16)
|
the_num = int(num, 16)
|
||||||
|
@ -270,6 +270,8 @@ class ParseOptions:
|
|||||||
return options_dict, arguments
|
return options_dict, arguments
|
||||||
else:
|
else:
|
||||||
return 0,0
|
return 0,0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
this_dict = {
|
this_dict = {
|
||||||
'indents': [0, 'i'],
|
'indents': [0, 'i'],
|
||||||
|
@ -52,7 +52,7 @@ class OverrideTable:
|
|||||||
Returns:
|
Returns:
|
||||||
nothing
|
nothing
|
||||||
Logic:
|
Logic:
|
||||||
The group {\override has been found.
|
The group {\\override has been found.
|
||||||
Check for the end of the group.
|
Check for the end of the group.
|
||||||
Otherwise, add appropriate tokens to the override dictionary.
|
Otherwise, add appropriate tokens to the override dictionary.
|
||||||
"""
|
"""
|
||||||
|
@ -128,7 +128,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
'list-conti' : 'list-continue',
|
'list-conti' : 'list-continue',
|
||||||
'list-hang_' : 'list-hang',
|
'list-hang_' : 'list-hang',
|
||||||
# 'list-tebef' : 'list-text-before',
|
# 'list-tebef' : 'list-text-before',
|
||||||
'list-level' : 'level',
|
# 'list-level' : 'level',
|
||||||
'list-id___' : 'list-id',
|
'list-id___' : 'list-id',
|
||||||
'list-start' : 'list-start',
|
'list-start' : 'list-start',
|
||||||
'nest-level' : 'nest-level',
|
'nest-level' : 'nest-level',
|
||||||
@ -198,7 +198,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
'bor-cel-to' : 'border-cell-top',
|
'bor-cel-to' : 'border-cell-top',
|
||||||
'bor-cel-le' : 'border-cell-left',
|
'bor-cel-le' : 'border-cell-left',
|
||||||
'bor-cel-ri' : 'border-cell-right',
|
'bor-cel-ri' : 'border-cell-right',
|
||||||
'bor-par-bo' : 'border-paragraph-bottom',
|
# 'bor-par-bo' : 'border-paragraph-bottom',
|
||||||
'bor-par-to' : 'border-paragraph-top',
|
'bor-par-to' : 'border-paragraph-top',
|
||||||
'bor-par-le' : 'border-paragraph-left',
|
'bor-par-le' : 'border-paragraph-left',
|
||||||
'bor-par-ri' : 'border-paragraph-right',
|
'bor-par-ri' : 'border-paragraph-right',
|
||||||
@ -413,7 +413,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
|||||||
Returns:
|
Returns:
|
||||||
nothing
|
nothing
|
||||||
Logic:
|
Logic:
|
||||||
I have found a \pard while I am collecting tokens. I want to reset
|
I have found a \\pard while I am collecting tokens. I want to reset
|
||||||
the dectionary and do nothing else.
|
the dectionary and do nothing else.
|
||||||
"""
|
"""
|
||||||
# Change this
|
# Change this
|
||||||
|
@ -584,7 +584,7 @@ class ProcessTokens:
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
# unknown
|
# unknown
|
||||||
# These must get passed on because they occure after \*
|
# These must get passed on because they occure after \\*
|
||||||
'do' : ('un', 'unknown___', self.default_func),
|
'do' : ('un', 'unknown___', self.default_func),
|
||||||
'company' : ('un', 'company___', self.default_func),
|
'company' : ('un', 'company___', self.default_func),
|
||||||
'shpinst' : ('un', 'unknown___', self.default_func),
|
'shpinst' : ('un', 'unknown___', self.default_func),
|
||||||
@ -716,10 +716,10 @@ class ProcessTokens:
|
|||||||
def divide_num(self, numerator, denominator):
|
def divide_num(self, numerator, denominator):
|
||||||
try:
|
try:
|
||||||
# calibre why ignore negative number? Wrong in case of \fi
|
# calibre why ignore negative number? Wrong in case of \fi
|
||||||
numerator = float(re.search('[0-9.\-]+', numerator).group())
|
numerator = float(re.search('[0-9.\\-]+', numerator).group())
|
||||||
except TypeError as msg:
|
except TypeError as msg:
|
||||||
if self.__run_level > 3:
|
if self.__run_level > 3:
|
||||||
msg = ('No number to process?\nthis indicates that the token \(\\li\) \
|
msg = ('No number to process?\nthis indicates that the token \\(\\li\\) \
|
||||||
should have a number and does not\nnumerator is \
|
should have a number and does not\nnumerator is \
|
||||||
"%s"\ndenominator is "%s"\n') % (numerator, denominator)
|
"%s"\ndenominator is "%s"\n') % (numerator, denominator)
|
||||||
raise self.__bug_handler(msg)
|
raise self.__bug_handler(msg)
|
||||||
|
@ -27,19 +27,19 @@ class Sections:
|
|||||||
logic
|
logic
|
||||||
---------------
|
---------------
|
||||||
The tags for the first section breaks have already been written.
|
The tags for the first section breaks have already been written.
|
||||||
RTF stores section breaks with the \sect tag. Each time this tag is
|
RTF stores section breaks with the \\sect tag. Each time this tag is
|
||||||
encountered, add one to the counter.
|
encountered, add one to the counter.
|
||||||
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
When I encounter the \\sectd tag, I want to collect all the appropriate tokens
|
||||||
that describe the section. When I reach a \pard, I know I an stop collecting
|
that describe the section. When I reach a \\pard, I know I an stop collecting
|
||||||
tokens and write the section tags.
|
tokens and write the section tags.
|
||||||
The exception to this method occurs when sections occur in field blocks, such
|
The exception to this method occurs when sections occur in field blocks, such
|
||||||
as the index. Normally, two section break occur within the index and other
|
as the index. Normally, two section break occur within the index and other
|
||||||
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
field-blocks. (If less or more section breaks occur, this code may not work.)
|
||||||
I want the sections to occur outside of the index. That is, the index
|
I want the sections to occur outside of the index. That is, the index
|
||||||
should be nested inside one section tag. After the index is complete, a new
|
should be nested inside one section tag. After the index is complete, a new
|
||||||
section should begin.
|
section should begin.
|
||||||
In order to write the sections outside of the field blocks, I have to store
|
In order to write the sections outside of the field blocks, I have to store
|
||||||
all of the field block as a string. When I ecounter the \sect tag, add one to
|
all of the field block as a string. When I ecounter the \\sect tag, add one to
|
||||||
the section counter, but store this number in a list. Likewise, store the
|
the section counter, but store this number in a list. Likewise, store the
|
||||||
information describing the section in another list.
|
information describing the section in another list.
|
||||||
When I reach the end of the field block, choose the first item from the
|
When I reach the end of the field block, choose the first item from the
|
||||||
@ -243,7 +243,7 @@ class Sections:
|
|||||||
nothing
|
nothing
|
||||||
Logic:
|
Logic:
|
||||||
Text or control words indicating text have been found
|
Text or control words indicating text have been found
|
||||||
before \pard. This shoud indicate older RTF. Reset the state
|
before \\pard. This shoud indicate older RTF. Reset the state
|
||||||
Write the section defintion. Insert a paragraph definition.
|
Write the section defintion. Insert a paragraph definition.
|
||||||
Insert {} to mark the end of a paragraph defintion
|
Insert {} to mark the end of a paragraph defintion
|
||||||
"""
|
"""
|
||||||
|
@ -121,7 +121,7 @@ class Styles:
|
|||||||
'list-conti' : 'list-continue',
|
'list-conti' : 'list-continue',
|
||||||
'list-hang_' : 'list-hang',
|
'list-hang_' : 'list-hang',
|
||||||
# 'list-tebef' : 'list-text-before',
|
# 'list-tebef' : 'list-text-before',
|
||||||
'list-level' : 'level',
|
# 'list-level' : 'level',
|
||||||
'list-id___' : 'list-id',
|
'list-id___' : 'list-id',
|
||||||
'list-start' : 'list-start',
|
'list-start' : 'list-start',
|
||||||
'nest-level' : 'nest-level',
|
'nest-level' : 'nest-level',
|
||||||
@ -192,7 +192,7 @@ class Styles:
|
|||||||
'bor-cel-to' : 'border-cell-top',
|
'bor-cel-to' : 'border-cell-top',
|
||||||
'bor-cel-le' : 'border-cell-left',
|
'bor-cel-le' : 'border-cell-left',
|
||||||
'bor-cel-ri' : 'border-cell-right',
|
'bor-cel-ri' : 'border-cell-right',
|
||||||
'bor-par-bo' : 'border-paragraph-bottom',
|
# 'bor-par-bo' : 'border-paragraph-bottom',
|
||||||
'bor-par-to' : 'border-paragraph-top',
|
'bor-par-to' : 'border-paragraph-top',
|
||||||
'bor-par-le' : 'border-paragraph-left',
|
'bor-par-le' : 'border-paragraph-left',
|
||||||
'bor-par-ri' : 'border-paragraph-right',
|
'bor-par-ri' : 'border-paragraph-right',
|
||||||
|
@ -333,6 +333,7 @@ def main():
|
|||||||
return 1
|
return 1
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""SNB file unit test"""
|
"""SNB file unit test"""
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -160,6 +160,7 @@ class MainWindow(QMainWindow):
|
|||||||
self.window_unblocked.emit()
|
self.window_unblocked.emit()
|
||||||
return QMainWindow.event(self, ev)
|
return QMainWindow.event(self, ev)
|
||||||
|
|
||||||
|
|
||||||
app=QApplication([])
|
app=QApplication([])
|
||||||
app.setAttribute(Qt.AA_DontUseNativeMenuBar, False)
|
app.setAttribute(Qt.AA_DontUseNativeMenuBar, False)
|
||||||
app.setApplicationName('com.calibre-ebook.DBusExportDemo')
|
app.setApplicationName('com.calibre-ebook.DBusExportDemo')
|
||||||
|
@ -456,6 +456,7 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
|
|||||||
self.rule = ('', txt)
|
self.rule = ('', txt)
|
||||||
QDialog.accept(self)
|
QDialog.accept(self)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app = QApplication([])
|
app = QApplication([])
|
||||||
from calibre.ebooks.metadata.book.base import field_metadata
|
from calibre.ebooks.metadata.book.base import field_metadata
|
||||||
|
@ -581,5 +581,6 @@ class Word(object):
|
|||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -171,8 +171,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
info_dialog(self, _('Done'),
|
info_dialog(self, _('Done'),
|
||||||
_('Confirmation dialogs have all been reset'), show=True)
|
_('Confirmation dialogs have all been reset'), show=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from PyQt5.Qt import QApplication
|
from PyQt5.Qt import QApplication
|
||||||
app = QApplication([])
|
app = QApplication([])
|
||||||
test_widget('Interface', 'Behavior')
|
test_widget('Interface', 'Behavior')
|
||||||
|
|
||||||
|
@ -190,6 +190,7 @@ class Stores(OrderedDict):
|
|||||||
return cls(builtin.gui, builtin.name, config=builtin.config,
|
return cls(builtin.gui, builtin.name, config=builtin.config,
|
||||||
base_plugin=builtin.base_plugin), ver
|
base_plugin=builtin.base_plugin), ver
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
st = time.time()
|
st = time.time()
|
||||||
count = 0
|
count = 0
|
||||||
@ -199,5 +200,3 @@ if __name__ == '__main__':
|
|||||||
print(code.encode('utf-8'))
|
print(code.encode('utf-8'))
|
||||||
print('\n', '_'*80, '\n', sep='')
|
print('\n', '_'*80, '\n', sep='')
|
||||||
print ('Time to download all %d plugins: %.2f seconds'%(count, time.time() - st))
|
print ('Time to download all %d plugins: %.2f seconds'%(count, time.time() - st))
|
||||||
|
|
||||||
|
|
||||||
|
@ -153,15 +153,15 @@ class Matches(QAbstractItemModel):
|
|||||||
# Remove filter identifiers
|
# Remove filter identifiers
|
||||||
# Remove the prefix.
|
# Remove the prefix.
|
||||||
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
|
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
|
||||||
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
|
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, r'\g<a>', query)
|
||||||
query = query.replace('%s:' % loc, '')
|
query = query.replace('%s:' % loc, '')
|
||||||
# Remove the prefix and search text.
|
# Remove the prefix and search text.
|
||||||
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
|
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
|
||||||
query = re.sub(r'%s:"[^"]"' % loc, '', query)
|
query = re.sub(r'%s:"[^"]"' % loc, '', query)
|
||||||
query = re.sub(r'%s:[^\s]*' % loc, '', query)
|
query = re.sub(r'%s:[^\s]*' % loc, '', query)
|
||||||
# Remove whitespace
|
# Remove whitespace
|
||||||
query = re.sub('\s', '', query)
|
query = re.sub(r'\s', '', query)
|
||||||
mod_query = re.sub('\s', '', mod_query)
|
mod_query = re.sub(r'\s', '', mod_query)
|
||||||
# If mod_query and query are the same then there were no filter modifiers
|
# If mod_query and query are the same then there were no filter modifiers
|
||||||
# so this isn't a filterable query.
|
# so this isn't a filterable query.
|
||||||
if mod_query == query:
|
if mod_query == query:
|
||||||
|
@ -128,6 +128,7 @@ def set_use_primary_find_in_search(toWhat):
|
|||||||
global pref_use_primary_find_in_search
|
global pref_use_primary_find_in_search
|
||||||
pref_use_primary_find_in_search = toWhat
|
pref_use_primary_find_in_search = toWhat
|
||||||
|
|
||||||
|
|
||||||
y, c, n, u = map(icu_lower, (_('yes'), _('checked'), _('no'), _('unchecked')))
|
y, c, n, u = map(icu_lower, (_('yes'), _('checked'), _('no'), _('unchecked')))
|
||||||
yes_vals = {y, c, 'true'}
|
yes_vals = {y, c, 'true'}
|
||||||
no_vals = {n, u, 'false'}
|
no_vals = {n, u, 'false'}
|
||||||
@ -1215,5 +1216,3 @@ class SortKeyGenerator(object):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -241,7 +241,7 @@ class BIBTEX(CatalogPlugin):
|
|||||||
# define a function to replace the template entry by its value
|
# define a function to replace the template entry by its value
|
||||||
def tpl_replace(objtplname) :
|
def tpl_replace(objtplname) :
|
||||||
|
|
||||||
tpl_field = re.sub(u'[\{\}]', u'', objtplname.group())
|
tpl_field = re.sub(u'[\\{\\}]', u'', objtplname.group())
|
||||||
|
|
||||||
if tpl_field in TEMPLATE_ALLOWED_FIELDS :
|
if tpl_field in TEMPLATE_ALLOWED_FIELDS :
|
||||||
if tpl_field in ['pubdate', 'timestamp'] :
|
if tpl_field in ['pubdate', 'timestamp'] :
|
||||||
@ -258,14 +258,14 @@ class BIBTEX(CatalogPlugin):
|
|||||||
|
|
||||||
if len(template_citation) >0 :
|
if len(template_citation) >0 :
|
||||||
tpl_citation = bibtexclass.utf8ToBibtex(
|
tpl_citation = bibtexclass.utf8ToBibtex(
|
||||||
bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
|
bibtexclass.ValidateCitationKey(re.sub(u'\\{[^{}]*\\}',
|
||||||
tpl_replace, template_citation)))
|
tpl_replace, template_citation)))
|
||||||
|
|
||||||
if len(tpl_citation) >0 :
|
if len(tpl_citation) >0 :
|
||||||
return tpl_citation
|
return tpl_citation
|
||||||
|
|
||||||
if len(entry["isbn"]) > 0 :
|
if len(entry["isbn"]) > 0 :
|
||||||
template_citation = u'%s' % re.sub(u'[\D]',u'', entry["isbn"])
|
template_citation = u'%s' % re.sub(u'[\\D]',u'', entry["isbn"])
|
||||||
|
|
||||||
else :
|
else :
|
||||||
template_citation = u'%s' % str(entry["id"])
|
template_citation = u'%s' % str(entry["id"])
|
||||||
|
@ -154,9 +154,9 @@ class CSV_XML(CatalogPlugin):
|
|||||||
|
|
||||||
# Convert HTML to markdown text
|
# Convert HTML to markdown text
|
||||||
if type(item) is unicode:
|
if type(item) is unicode:
|
||||||
opening_tag = re.search('<(\w+)(\x20|>)', item)
|
opening_tag = re.search('<(\\w+)(\x20|>)', item)
|
||||||
if opening_tag:
|
if opening_tag:
|
||||||
closing_tag = re.search('<\/%s>$' % opening_tag.group(1), item)
|
closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
|
||||||
if closing_tag:
|
if closing_tag:
|
||||||
item = html2text(item)
|
item = html2text(item)
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
"Default: '%default'\n"
|
"Default: '%default'\n"
|
||||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||||
Option('--exclude-genre',
|
Option('--exclude-genre',
|
||||||
default='\[.+\]|^\+$',
|
default='\\[.+\\]|^\\+$',
|
||||||
dest='exclude_genre',
|
dest='exclude_genre',
|
||||||
action=None,
|
action=None,
|
||||||
help=_("Regex describing tags to exclude as genres.\n"
|
help=_("Regex describing tags to exclude as genres.\n"
|
||||||
|
@ -1209,11 +1209,11 @@ class CatalogBuilder(object):
|
|||||||
clipped to max_len
|
clipped to max_len
|
||||||
"""
|
"""
|
||||||
|
|
||||||
normalized = massaged = re.sub('\s', '', ascii_text(tag).lower())
|
normalized = massaged = re.sub('\\s', '', ascii_text(tag).lower())
|
||||||
if re.search('\W', normalized):
|
if re.search('\\W', normalized):
|
||||||
normalized = ''
|
normalized = ''
|
||||||
for c in massaged:
|
for c in massaged:
|
||||||
if re.search('\W', c):
|
if re.search('\\W', c):
|
||||||
normalized += self.generate_unicode_name(c)
|
normalized += self.generate_unicode_name(c)
|
||||||
else:
|
else:
|
||||||
normalized += c
|
normalized += c
|
||||||
@ -1376,7 +1376,7 @@ class CatalogBuilder(object):
|
|||||||
Return:
|
Return:
|
||||||
(str): asciized version of author
|
(str): asciized version of author
|
||||||
"""
|
"""
|
||||||
return re.sub("\W", "", ascii_text(author))
|
return re.sub("\\W", "", ascii_text(author))
|
||||||
|
|
||||||
def generate_format_args(self, book):
|
def generate_format_args(self, book):
|
||||||
""" Generate the format args for template substitution.
|
""" Generate the format args for template substitution.
|
||||||
@ -4209,9 +4209,9 @@ class CatalogBuilder(object):
|
|||||||
|
|
||||||
# Generate a legal XHTML id/href string
|
# Generate a legal XHTML id/href string
|
||||||
if self.letter_or_symbol(series) == self.SYMBOLS:
|
if self.letter_or_symbol(series) == self.SYMBOLS:
|
||||||
return "symbol_%s_series" % re.sub('\W', '', series).lower()
|
return "symbol_%s_series" % re.sub('\\W', '', series).lower()
|
||||||
else:
|
else:
|
||||||
return "%s_series" % re.sub('\W', '', ascii_text(series)).lower()
|
return "%s_series" % re.sub('\\W', '', ascii_text(series)).lower()
|
||||||
|
|
||||||
def generate_short_description(self, description, dest=None):
|
def generate_short_description(self, description, dest=None):
|
||||||
""" Generate a truncated version of the supplied string.
|
""" Generate a truncated version of the supplied string.
|
||||||
@ -4292,7 +4292,7 @@ class CatalogBuilder(object):
|
|||||||
else:
|
else:
|
||||||
if re.match('[0-9]+', word[0]):
|
if re.match('[0-9]+', word[0]):
|
||||||
word = word.replace(',', '')
|
word = word.replace(',', '')
|
||||||
suffix = re.search('[\D]', word)
|
suffix = re.search('[\\D]', word)
|
||||||
if suffix:
|
if suffix:
|
||||||
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
||||||
else:
|
else:
|
||||||
@ -4308,7 +4308,7 @@ class CatalogBuilder(object):
|
|||||||
else:
|
else:
|
||||||
if re.search('[0-9]+', word[0]):
|
if re.search('[0-9]+', word[0]):
|
||||||
word = word.replace(',', '')
|
word = word.replace(',', '')
|
||||||
suffix = re.search('[\D]', word)
|
suffix = re.search('[\\D]', word)
|
||||||
if suffix:
|
if suffix:
|
||||||
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
||||||
else:
|
else:
|
||||||
@ -4638,7 +4638,7 @@ class CatalogBuilder(object):
|
|||||||
# confusion with decimal points.
|
# confusion with decimal points.
|
||||||
|
|
||||||
# Explode lost CRs to \n\n
|
# Explode lost CRs to \n\n
|
||||||
for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])', comments):
|
for lost_cr in re.finditer('([a-z])([\\.\\?!])([A-Z])', comments):
|
||||||
comments = comments.replace(lost_cr.group(),
|
comments = comments.replace(lost_cr.group(),
|
||||||
'%s%s\n\n%s' % (lost_cr.group(1),
|
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||||
lost_cr.group(2),
|
lost_cr.group(2),
|
||||||
|
@ -90,8 +90,8 @@ class NumberToText(object): # {{{
|
|||||||
# Special case ordinals
|
# Special case ordinals
|
||||||
if re.search('[st|nd|rd|th]',self.number):
|
if re.search('[st|nd|rd|th]',self.number):
|
||||||
self.number = re.sub(',','',self.number)
|
self.number = re.sub(',','',self.number)
|
||||||
ordinal_suffix = re.search('[\D]', self.number)
|
ordinal_suffix = re.search('[\\D]', self.number)
|
||||||
ordinal_number = re.sub('\D','',re.sub(',','',self.number))
|
ordinal_number = re.sub('\\D','',re.sub(',','',self.number))
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("Ordinal: %s" % ordinal_number)
|
self.log("Ordinal: %s" % ordinal_number)
|
||||||
self.number_as_float = ordinal_number
|
self.number_as_float = ordinal_number
|
||||||
@ -120,7 +120,7 @@ class NumberToText(object): # {{{
|
|||||||
self.text = NumberToText(self.number.replace('%',' percent')).text
|
self.text = NumberToText(self.number.replace('%',' percent')).text
|
||||||
|
|
||||||
# Test for decimal
|
# Test for decimal
|
||||||
elif re.search('\.',self.number):
|
elif re.search('\\.',self.number):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("Decimal: %s" % self.number)
|
self.log("Decimal: %s" % self.number)
|
||||||
self.number_as_float = self.number
|
self.number_as_float = self.number
|
||||||
@ -151,12 +151,12 @@ class NumberToText(object): # {{{
|
|||||||
self.text = NumberToText(self.number_as_float).text
|
self.text = NumberToText(self.number_as_float).text
|
||||||
|
|
||||||
# Test for hybrid e.g., 'K2, 2nd, 10@10'
|
# Test for hybrid e.g., 'K2, 2nd, 10@10'
|
||||||
elif re.search('[\D]+', self.number):
|
elif re.search('[\\D]+', self.number):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("Hybrid: %s" % self.number)
|
self.log("Hybrid: %s" % self.number)
|
||||||
# Split the token into number/text
|
# Split the token into number/text
|
||||||
number_position = re.search('\d',self.number).start()
|
number_position = re.search('\\d',self.number).start()
|
||||||
text_position = re.search('\D',self.number).start()
|
text_position = re.search('\\D',self.number).start()
|
||||||
if number_position < text_position:
|
if number_position < text_position:
|
||||||
number = self.number[:text_position]
|
number = self.number[:text_position]
|
||||||
text = self.number[text_position:]
|
text = self.number[text_position:]
|
||||||
@ -225,4 +225,3 @@ class NumberToText(object): # {{{
|
|||||||
self.log(u'resultString: %s' % resultString)
|
self.log(u'resultString: %s' % resultString)
|
||||||
self.text = resultString.strip().capitalize()
|
self.text = resultString.strip().capitalize()
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from calibre.utils.html2text import html2text
|
|||||||
|
|
||||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||||
# confusion with decimal points.
|
# confusion with decimal points.
|
||||||
lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
|
lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
|
||||||
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
||||||
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
|
@ -657,7 +657,7 @@ class CustomColumns(object):
|
|||||||
editable=True, display={}):
|
editable=True, display={}):
|
||||||
if not label:
|
if not label:
|
||||||
raise ValueError(_('No label was provided'))
|
raise ValueError(_('No label was provided'))
|
||||||
if re.match('^\w*$', label) is None or not label[0].isalpha() or label.lower() != label:
|
if re.match('^\\w*$', label) is None or not label[0].isalpha() or label.lower() != label:
|
||||||
raise ValueError(_('The label must contain only lower case letters, digits and underscores, and start with a letter'))
|
raise ValueError(_('The label must contain only lower case letters, digits and underscores, and start with a letter'))
|
||||||
if datatype not in self.CUSTOM_DATA_TYPES:
|
if datatype not in self.CUSTOM_DATA_TYPES:
|
||||||
raise ValueError('%r is not a supported data type'%datatype)
|
raise ValueError('%r is not a supported data type'%datatype)
|
||||||
@ -809,5 +809,3 @@ class CustomColumns(object):
|
|||||||
self.conn.executescript(script)
|
self.conn.executescript(script)
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ def _connect(path):
|
|||||||
conn = sqlite.connect(path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
|
conn = sqlite.connect(path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
|
||||||
conn.row_factory = lambda cursor, row : list(row)
|
conn.row_factory = lambda cursor, row : list(row)
|
||||||
conn.create_aggregate('concat', 1, Concatenate)
|
conn.create_aggregate('concat', 1, Concatenate)
|
||||||
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
|
title_pat = re.compile('^(A|The|An)\\s+', re.IGNORECASE)
|
||||||
|
|
||||||
def title_sort(title):
|
def title_sort(title):
|
||||||
match = title_pat.search(title)
|
match = title_pat.search(title)
|
||||||
@ -1514,6 +1514,7 @@ def text_to_tokens(text):
|
|||||||
continue
|
continue
|
||||||
return ans, OR
|
return ans, OR
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sqlite.enable_callback_tracebacks(True)
|
sqlite.enable_callback_tracebacks(True)
|
||||||
db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
|
db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
|
||||||
|
@ -68,6 +68,7 @@ def _py_convert_timestamp(val):
|
|||||||
return parse_date(val, as_utc=False)
|
return parse_date(val, as_utc=False)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
convert_timestamp = _py_convert_timestamp if _c_speedup is None else \
|
convert_timestamp = _py_convert_timestamp if _c_speedup is None else \
|
||||||
_c_convert_timestamp
|
_c_convert_timestamp
|
||||||
|
|
||||||
@ -75,6 +76,7 @@ convert_timestamp = _py_convert_timestamp if _c_speedup is None else \
|
|||||||
def adapt_datetime(dt):
|
def adapt_datetime(dt):
|
||||||
return isoformat(dt, sep=' ')
|
return isoformat(dt, sep=' ')
|
||||||
|
|
||||||
|
|
||||||
sqlite.register_adapter(datetime, adapt_datetime)
|
sqlite.register_adapter(datetime, adapt_datetime)
|
||||||
sqlite.register_converter('timestamp', convert_timestamp)
|
sqlite.register_converter('timestamp', convert_timestamp)
|
||||||
|
|
||||||
@ -82,6 +84,7 @@ sqlite.register_converter('timestamp', convert_timestamp)
|
|||||||
def convert_bool(val):
|
def convert_bool(val):
|
||||||
return val != '0'
|
return val != '0'
|
||||||
|
|
||||||
|
|
||||||
sqlite.register_adapter(bool, lambda x : 1 if x else 0)
|
sqlite.register_adapter(bool, lambda x : 1 if x else 0)
|
||||||
sqlite.register_converter('bool', convert_bool)
|
sqlite.register_converter('bool', convert_bool)
|
||||||
sqlite.register_converter('BOOL', convert_bool)
|
sqlite.register_converter('BOOL', convert_bool)
|
||||||
@ -411,4 +414,3 @@ def test():
|
|||||||
c = sqlite.connect(':memory:')
|
c = sqlite.connect(':memory:')
|
||||||
if load_c_extensions(c, True):
|
if load_c_extensions(c, True):
|
||||||
print('Loaded C extension successfully')
|
print('Loaded C extension successfully')
|
||||||
|
|
||||||
|
@ -64,6 +64,7 @@ def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
|
|||||||
if match.search(name):
|
if match.search(name):
|
||||||
return name, zf.read(name)
|
return name, zf.read(name)
|
||||||
|
|
||||||
|
|
||||||
comic_exts = {'png', 'jpg', 'jpeg', 'gif', 'webp'}
|
comic_exts = {'png', 'jpg', 'jpeg', 'gif', 'webp'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,6 +14,8 @@ from calibre.constants import islinux
|
|||||||
|
|
||||||
def pre_activated_socket():
|
def pre_activated_socket():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
has_preactivated_support = False
|
has_preactivated_support = False
|
||||||
|
|
||||||
if islinux:
|
if islinux:
|
||||||
|
@ -73,7 +73,7 @@ def check_for_critical_bugs():
|
|||||||
print('WARNING: Translation errors detected')
|
print('WARNING: Translation errors detected')
|
||||||
print('See the .errors directory and http://translate.sourceforge.net/wiki/toolkit/using_pofilter')
|
print('See the .errors directory and http://translate.sourceforge.net/wiki/toolkit/using_pofilter')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
import_from_launchpad(sys.argv[1])
|
import_from_launchpad(sys.argv[1])
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ def make(filename, outfile):
|
|||||||
# This is a message with plural forms
|
# This is a message with plural forms
|
||||||
elif l.startswith('msgid_plural'):
|
elif l.startswith('msgid_plural'):
|
||||||
if section != ID:
|
if section != ID:
|
||||||
print('msgid_plural not preceeded by msgid on %s:%d' %\
|
print('msgid_plural not preceeded by msgid on %s:%d' %
|
||||||
(infile, lno), file=sys.stderr)
|
(infile, lno), file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
l = l[12:]
|
l = l[12:]
|
||||||
@ -157,7 +157,7 @@ def make(filename, outfile):
|
|||||||
section = STR
|
section = STR
|
||||||
if l.startswith('msgstr['):
|
if l.startswith('msgstr['):
|
||||||
if not is_plural:
|
if not is_plural:
|
||||||
print('plural without msgid_plural on %s:%d' %\
|
print('plural without msgid_plural on %s:%d' %
|
||||||
(infile, lno), file=sys.stderr)
|
(infile, lno), file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
l = l.split(']', 1)[1]
|
l = l.split(']', 1)[1]
|
||||||
@ -165,7 +165,7 @@ def make(filename, outfile):
|
|||||||
msgstr += '\0' # Separator of the various plural forms
|
msgstr += '\0' # Separator of the various plural forms
|
||||||
else:
|
else:
|
||||||
if is_plural:
|
if is_plural:
|
||||||
print('indexed msgstr required for plural on %s:%d' %\
|
print('indexed msgstr required for plural on %s:%d' %
|
||||||
(infile, lno), file=sys.stderr)
|
(infile, lno), file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
l = l[6:]
|
l = l[6:]
|
||||||
@ -180,7 +180,7 @@ def make(filename, outfile):
|
|||||||
elif section == STR:
|
elif section == STR:
|
||||||
msgstr += l
|
msgstr += l
|
||||||
else:
|
else:
|
||||||
print('Syntax error on %s:%d' % (infile, lno), \
|
print('Syntax error on %s:%d' % (infile, lno),
|
||||||
'before:', file=sys.stderr)
|
'before:', file=sys.stderr)
|
||||||
print(l, file=sys.stderr)
|
print(l, file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -1627,6 +1627,7 @@ class Zeroconf(object):
|
|||||||
# Test a few module features, including service registration, service
|
# Test a few module features, including service registration, service
|
||||||
# query (for Zoe), and service unregistration.
|
# query (for Zoe), and service unregistration.
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("Multicast DNS Service Discovery for Python, version", __version__)
|
print("Multicast DNS Service Discovery for Python, version", __version__)
|
||||||
r = Zeroconf()
|
r = Zeroconf()
|
||||||
|
@ -49,6 +49,7 @@ def allowed(x):
|
|||||||
def py_clean_xml_chars(unicode_string):
|
def py_clean_xml_chars(unicode_string):
|
||||||
return u''.join(filter(allowed, unicode_string))
|
return u''.join(filter(allowed, unicode_string))
|
||||||
|
|
||||||
|
|
||||||
clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
|
clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
|
||||||
|
|
||||||
|
|
||||||
@ -85,5 +86,4 @@ def unescape(text, rm=False, rchar=u''):
|
|||||||
if rm:
|
if rm:
|
||||||
return rchar # replace by char
|
return rchar # replace by char
|
||||||
return text # leave as is
|
return text # leave as is
|
||||||
return re.sub("&#?\w+;", fixup, text)
|
return re.sub("&#?\\w+;", fixup, text)
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ def same_thread(func):
|
|||||||
return func(self, *args, **kwargs)
|
return func(self, *args, **kwargs)
|
||||||
return check_thread
|
return check_thread
|
||||||
|
|
||||||
|
|
||||||
FreeTypeError = getattr(plugins['freetype'][0], 'FreeTypeError', Exception)
|
FreeTypeError = getattr(plugins['freetype'][0], 'FreeTypeError', Exception)
|
||||||
|
|
||||||
|
|
||||||
@ -80,5 +81,3 @@ class FreeType(object):
|
|||||||
@same_thread
|
@same_thread
|
||||||
def load_font(self, data):
|
def load_font(self, data):
|
||||||
return Face(self.ft.load_font(data))
|
return Face(self.ft.load_font(data))
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,6 +113,7 @@ class FontMetrics(object):
|
|||||||
'The width of the string at the specified pixel size and stretch, in pixels'
|
'The width of the string at the specified pixel size and stretch, in pixels'
|
||||||
return sum(self.advance_widths(string, pixel_size, stretch))
|
return sum(self.advance_widths(string, pixel_size, stretch))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
from calibre.utils.fonts.sfnt.container import Sfnt
|
from calibre.utils.fonts.sfnt.container import Sfnt
|
||||||
@ -127,4 +128,3 @@ if __name__ == '__main__':
|
|||||||
print('AvgWidth:', m.pdf_avg_width)
|
print('AvgWidth:', m.pdf_avg_width)
|
||||||
print('ItalicAngle', m.post.italic_angle)
|
print('ItalicAngle', m.post.italic_angle)
|
||||||
print('StemV', m.pdf_stemv)
|
print('StemV', m.pdf_stemv)
|
||||||
|
|
||||||
|
@ -640,7 +640,7 @@ class BuiltinReGroup(BuiltinFormatterFunction):
|
|||||||
'the template and the eval functions, you use [[ for { and ]] for }.'
|
'the template and the eval functions, you use [[ for { and ]] for }.'
|
||||||
' The following example in template program mode looks for series '
|
' The following example in template program mode looks for series '
|
||||||
'with more than one word and uppercases the first word: '
|
'with more than one word and uppercases the first word: '
|
||||||
"{series:'re_group($, \"(\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}")
|
"{series:'re_group($, \"(\\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}")
|
||||||
|
|
||||||
def evaluate(self, formatter, kwargs, mi, locals, val, pattern, *args):
|
def evaluate(self, formatter, kwargs, mi, locals, val, pattern, *args):
|
||||||
from formatter import EvalFormatter
|
from formatter import EvalFormatter
|
||||||
@ -924,9 +924,9 @@ class BuiltinSublist(BuiltinFormatterFunction):
|
|||||||
'of zero is assumed to be the length of the list. Examples using '
|
'of zero is assumed to be the length of the list. Examples using '
|
||||||
'basic template mode and assuming that the tags column (which is '
|
'basic template mode and assuming that the tags column (which is '
|
||||||
'comma-separated) contains "A, B, C": '
|
'comma-separated) contains "A, B, C": '
|
||||||
'{tags:sublist(0,1,\,)} returns "A". '
|
'{tags:sublist(0,1,\\,)} returns "A". '
|
||||||
'{tags:sublist(-1,0,\,)} returns "C". '
|
'{tags:sublist(-1,0,\\,)} returns "C". '
|
||||||
'{tags:sublist(0,-1,\,)} returns "A, B".'
|
'{tags:sublist(0,-1,\\,)} returns "A, B".'
|
||||||
)
|
)
|
||||||
|
|
||||||
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
|
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
|
||||||
|
@ -26,6 +26,7 @@ class DirTooLarge(ValueError):
|
|||||||
def __init__(self, bdir):
|
def __init__(self, bdir):
|
||||||
ValueError.__init__(self, 'The directory {0} is too large to monitor. Try increasing the value in /proc/sys/fs/inotify/max_user_watches'.format(bdir))
|
ValueError.__init__(self, 'The directory {0} is too large to monitor. Try increasing the value in /proc/sys/fs/inotify/max_user_watches'.format(bdir))
|
||||||
|
|
||||||
|
|
||||||
_inotify = None
|
_inotify = None
|
||||||
|
|
||||||
|
|
||||||
@ -320,6 +321,7 @@ class INotifyTreeWatcher(INotify):
|
|||||||
self.modified = set()
|
self.modified = set()
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
w = INotifyTreeWatcher(sys.argv[-1])
|
w = INotifyTreeWatcher(sys.argv[-1])
|
||||||
w()
|
w()
|
||||||
|
@ -87,6 +87,7 @@ class ConnectedWorker(Thread):
|
|||||||
class CriticalError(Exception):
|
class CriticalError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
_name_counter = itertools.count()
|
_name_counter = itertools.count()
|
||||||
|
|
||||||
if islinux:
|
if islinux:
|
||||||
@ -384,4 +385,3 @@ class Server(Thread):
|
|||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
|
@ -85,5 +85,6 @@ def main(args=sys.argv):
|
|||||||
f.close()
|
f.close()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -292,7 +292,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# this will be called when an exception occurs within a thread
|
# this will be called when an exception occurs within a thread
|
||||||
def handle_exception(request, exc_info):
|
def handle_exception(request, exc_info):
|
||||||
print("Exception occured in request #%s: %s" % \
|
print("Exception occured in request #%s: %s" %
|
||||||
(request.requestID, exc_info[1]))
|
(request.requestID, exc_info[1]))
|
||||||
|
|
||||||
# assemble the arguments for each job to a list...
|
# assemble the arguments for each job to a list...
|
||||||
|
@ -15,7 +15,7 @@ from calibre.utils.icu import capitalize, upper
|
|||||||
__all__ = ['titlecase']
|
__all__ = ['titlecase']
|
||||||
__version__ = '0.5'
|
__version__ = '0.5'
|
||||||
|
|
||||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
|
||||||
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||||
|
|
||||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||||
@ -54,7 +54,7 @@ def titlecase(text):
|
|||||||
|
|
||||||
all_caps = upper(text) == text
|
all_caps = upper(text) == text
|
||||||
|
|
||||||
words = re.split('\s+', text)
|
words = re.split('\\s+', text)
|
||||||
line = []
|
line = []
|
||||||
for word in words:
|
for word in words:
|
||||||
if all_caps:
|
if all_caps:
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement, print_function
|
||||||
from __future__ import print_function
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
@ -110,7 +109,7 @@ def default_is_link_wanted(url, tag):
|
|||||||
|
|
||||||
class RecursiveFetcher(object):
|
class RecursiveFetcher(object):
|
||||||
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
|
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
|
||||||
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
|
('.exe\\s*$', '.mp3\\s*$', '.ogg\\s*$', '^\\s*mailto:', '^\\s*$'))
|
||||||
# ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
|
# ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
|
||||||
# (
|
# (
|
||||||
#
|
#
|
||||||
|
Loading…
x
Reference in New Issue
Block a user