mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
pep8
This commit is contained in:
parent
70a131f04c
commit
f91b9c8e51
@ -162,7 +162,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
def scrape_article_date(self, soup):
|
||||
for span in soup.findAll('span'):
|
||||
txt = self.text(span)
|
||||
rgx = re.compile('Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')
|
||||
rgx = re.compile(unicode(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*'))
|
||||
hit = rgx.match(txt)
|
||||
if hit:
|
||||
return self.date_from_string(txt)
|
||||
|
@ -106,5 +106,5 @@ class AppledailyTW(BasicNewsRecipe):
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
|
||||
raw_html = re.sub(
|
||||
unicode(r'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>'), raw_html)
|
||||
unicode(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
|
||||
return raw_html
|
||||
|
@ -78,7 +78,7 @@ class AdvancedUserRecipe1390132023(BasicNewsRecipe):
|
||||
'http://cdn.images.express.co.uk/img/covers/')})
|
||||
cov = str(cov)
|
||||
cov2 = re.findall(
|
||||
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
|
||||
cov = str(cov2)
|
||||
cov = cov[2:len(cov) - 2]
|
||||
|
@ -22,7 +22,7 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
'ul.sep-line > li + li::before {content: " | "} '
|
||||
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
|
||||
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
|
||||
(re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
|
||||
]
|
||||
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
|
||||
|
@ -62,7 +62,7 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='a', attrs={'href': re.compile(
|
||||
'http://feedads\.g\.doubleclick.net.*')}))
|
||||
'http://feedads\\.g\\.doubleclick.net.*')}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'feedflare'}))
|
||||
remove_tags.append(dict(name='a', attrs={'class': 'home_pixie'}))
|
||||
remove_tags.append(dict(name='div', attrs={'id': [
|
||||
|
@ -144,7 +144,7 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
|
||||
# Place article date after header
|
||||
dates = soup.findAll(text=re.compile(
|
||||
'\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
||||
r'\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
||||
if dates:
|
||||
for date in dates:
|
||||
for string in date:
|
||||
|
@ -121,7 +121,7 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
if article_anchor:
|
||||
article_url = article_anchor.get('href')
|
||||
if not article_url:
|
||||
print('article_url is None for article_anchor "%s": "%s"' \
|
||||
print('article_url is None for article_anchor "%s": "%s"'
|
||||
% (str(article_anchor), article_title), file=sys.stderr)
|
||||
continue
|
||||
|
||||
|
@ -84,11 +84,11 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
remove_tags = [
|
||||
dict(name=['iframe', 'script', 'noscript', 'style']),
|
||||
dict(name='div', attrs={'class': ['fact-related-box', 'aside clearfix', 'aside clearfix middle-col-line', 'comments', 'share-tools', 'article-right-column', 'column-4-5', 'column-1-5', 'ad-msg', 'col-179 ', 'col-373 ', 'clear', 'ad', 'navigation', re.compile('share-tools(-top)?'), 'tools', 'metroCommentFormWrap', 'article-tools-below-title', 'related-links', 'padding-top-15', re.compile('^promo.*?$'), 'teaser-component', re.compile('fb(-comments|_iframe_widget)'), 'promos', 'header-links', 'promo-2']}), # noqa
|
||||
dict(id=['super-carousel', 'article-2', 'googleads', 'column-1-5-bottom', 'column-4-5', re.compile('^ad(\d+|adcomp.*?)?$'), 'adadcomp-4', 'margin-5', 'sidebar', re.compile('^article-\d'), 'comments', 'gallery-1', 'sharez_container', 'ts-container', 'topshares', 'ts-title']), # noqa
|
||||
dict(id=['super-carousel', 'article-2', 'googleads', 'column-1-5-bottom', 'column-4-5', re.compile('^ad(\\d+|adcomp.*?)?$'), 'adadcomp-4', 'margin-5', 'sidebar', re.compile('^article-\\d'), 'comments', 'gallery-1', 'sharez_container', 'ts-container', 'topshares', 'ts-title']), # noqa
|
||||
dict(name='a', attrs={'name': 'comments'}),
|
||||
dict(name='img', attrs={'class': 'top-line',
|
||||
'title': 'volledig scherm'}),
|
||||
dict(attrs={'style': re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'), 'title': 'volledig scherm'})]
|
||||
dict(attrs={'style': re.compile('^(.*(display\\s?:\\s?none|img-mask|white)\\s?;?.*)$'), 'title': 'volledig scherm'})]
|
||||
|
||||
'''removed by before/after:
|
||||
id:
|
||||
@ -223,7 +223,7 @@ class MerryProcess(BeautifulSoup):
|
||||
return self.myKiller.safeRemovePart(souparray, True)
|
||||
|
||||
def removeEmptyTags(self, soup, run=0):
|
||||
emptymatches = re.compile('^[ \s\n\r\t ]*$')
|
||||
emptymatches = re.compile('^[ \\s\n\r\t ]*$')
|
||||
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (
|
||||
tag.string is None or tag.string.strip() == "" or tag.string.strip() == emptymatches) and not tag.isSelfClosing)
|
||||
if emptytags and not (emptytags is None or emptytags == []):
|
||||
|
@ -29,7 +29,7 @@ class ObservatorulCultural(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.observatorcultural.ro/Arhiva*-archive.html')
|
||||
issueTag = soup.find('a', href=re.compile(
|
||||
"observatorcultural.ro\/Numarul"))
|
||||
"observatorcultural.ro\\/Numarul"))
|
||||
issueURL = issueTag['href']
|
||||
print(issueURL)
|
||||
issueSoup = self.index_to_soup(issueURL)
|
||||
|
@ -83,6 +83,7 @@ def get_icons(zfp, name_or_list_of_names):
|
||||
ians = ians.pop(names[0])
|
||||
return ians
|
||||
|
||||
|
||||
_translations_cache = {}
|
||||
|
||||
|
||||
@ -316,4 +317,3 @@ if __name__ == '__main__':
|
||||
zf.write(os.path.join(x, y))
|
||||
add_plugin(f.name)
|
||||
print('Added plugin from', sys.argv[-1])
|
||||
|
||||
|
@ -40,5 +40,6 @@ def main():
|
||||
show_stats(stats)
|
||||
print('Stats saved to', stats)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -221,7 +221,7 @@ def main():
|
||||
try:
|
||||
d.startup()
|
||||
except:
|
||||
print ('Startup failed for device plugin: %s'%d)
|
||||
print('Startup failed for device plugin: %s'%d)
|
||||
if d.MANAGES_DEVICE_PRESENCE:
|
||||
cd = d.detect_managed_devices(scanner.devices)
|
||||
if cd is not None:
|
||||
@ -395,5 +395,6 @@ def main():
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -93,8 +93,8 @@ def main():
|
||||
finally:
|
||||
dev.shutdown()
|
||||
|
||||
print ('Device connection shutdown')
|
||||
print('Device connection shutdown')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
@ -211,7 +211,6 @@ def test_udisks(ver=None):
|
||||
print('Ejecting:')
|
||||
u.eject(dev)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_udisks()
|
||||
|
||||
|
||||
|
@ -53,6 +53,7 @@ class GUID(Structure):
|
||||
''.join(["%02x" % d for d in self.data4[2:]]),
|
||||
)
|
||||
|
||||
|
||||
CONFIGRET = DWORD
|
||||
DEVINST = DWORD
|
||||
LPDWORD = POINTER(DWORD)
|
||||
@ -70,6 +71,8 @@ def CTL_CODE(DeviceType, Function, Method, Access):
|
||||
def USB_CTL(id):
|
||||
# CTL_CODE(FILE_DEVICE_USB, (id), METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||
return CTL_CODE(0x22, id, 0, 0)
|
||||
|
||||
|
||||
IOCTL_USB_GET_ROOT_HUB_NAME = USB_CTL(258)
|
||||
IOCTL_USB_GET_NODE_INFORMATION = USB_CTL(258)
|
||||
IOCTL_USB_GET_NODE_CONNECTION_INFORMATION = USB_CTL(259)
|
||||
@ -108,6 +111,7 @@ class SP_DEVINFO_DATA(Structure):
|
||||
def __str__(self):
|
||||
return "ClassGuid:%s DevInst:%s" % (self.ClassGuid, self.DevInst)
|
||||
|
||||
|
||||
PSP_DEVINFO_DATA = POINTER(SP_DEVINFO_DATA)
|
||||
|
||||
|
||||
@ -122,6 +126,7 @@ class SP_DEVICE_INTERFACE_DATA(Structure):
|
||||
def __str__(self):
|
||||
return "InterfaceClassGuid:%s Flags:%s" % (self.InterfaceClassGuid, self.Flags)
|
||||
|
||||
|
||||
ANYSIZE_ARRAY = 1
|
||||
|
||||
|
||||
@ -131,6 +136,7 @@ class SP_DEVICE_INTERFACE_DETAIL_DATA(Structure):
|
||||
("DevicePath", c_wchar*ANYSIZE_ARRAY)
|
||||
]
|
||||
|
||||
|
||||
UCHAR = c_ubyte
|
||||
|
||||
|
||||
@ -216,6 +222,7 @@ class USB_DESCRIPTOR_REQUEST(Structure):
|
||||
('Data', USB_STRING_DESCRIPTOR),
|
||||
)
|
||||
|
||||
|
||||
PUSB_DESCRIPTOR_REQUEST = POINTER(USB_DESCRIPTOR_REQUEST)
|
||||
PSP_DEVICE_INTERFACE_DETAIL_DATA = POINTER(SP_DEVICE_INTERFACE_DETAIL_DATA)
|
||||
PSP_DEVICE_INTERFACE_DATA = POINTER(SP_DEVICE_INTERFACE_DATA)
|
||||
@ -390,6 +397,7 @@ def config_err_check(result, func, args):
|
||||
raise WindowsError(result, 'The cfgmgr32 function failed with err: %s' % CR_CODE_NAMES.get(result, result))
|
||||
return args
|
||||
|
||||
|
||||
GetLogicalDrives = cwrap('GetLogicalDrives', DWORD, errcheck=bool_err_check, lib=kernel32)
|
||||
GetDriveType = cwrap('GetDriveTypeW', UINT, LPCWSTR, lib=kernel32)
|
||||
GetVolumeNameForVolumeMountPoint = cwrap('GetVolumeNameForVolumeMountPointW', BOOL, LPCWSTR, LPWSTR, DWORD, errcheck=bool_err_check, lib=kernel32)
|
||||
@ -675,6 +683,7 @@ def get_volume_pathnames(volume_id, buf=None):
|
||||
|
||||
# def scan_usb_devices(): {{{
|
||||
|
||||
|
||||
_USBDevice = namedtuple('USBDevice', 'vendor_id product_id bcd devid devinst')
|
||||
|
||||
|
||||
@ -1017,6 +1026,7 @@ def drives_for(vendor_id, product_id=None):
|
||||
pprint(get_drive_letters_for_device(usbdev, debug=True))
|
||||
print('USB info:', get_usb_info(usbdev, debug=True))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
develop()
|
||||
# }}}
|
||||
|
@ -32,11 +32,11 @@ class HeuristicProcessor(object):
|
||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE)
|
||||
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
||||
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" # noqa
|
||||
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
||||
self.line_close = "(</(?P=inner3)>)?\\s*(</(?P=inner2)>)?\\s*(</(?P=inner1)>)?\\s*</(?P=outer)>"
|
||||
self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*</(p|div)>)', re.IGNORECASE)
|
||||
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
||||
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
|
||||
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
|
||||
self.common_in_text_endings = u'[\"\'—’”,\\.!\\?\\…\\)„\\w]'
|
||||
self.common_in_text_beginnings = u'[\\w\'\"“‘‛]'
|
||||
|
||||
def is_pdftohtml(self, src):
|
||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||
@ -54,10 +54,10 @@ class HeuristicProcessor(object):
|
||||
" chapters. - " + unicode(chap))
|
||||
return '<h2>'+chap+'</h2>\n'
|
||||
else:
|
||||
delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
|
||||
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
|
||||
delete_quotes = re.compile('\'\"')
|
||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
|
||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
|
||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
|
||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
|
||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
||||
@ -216,24 +216,24 @@ class HeuristicProcessor(object):
|
||||
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" # noqa
|
||||
chapter_header_open = r"(?P<chap>"
|
||||
title_header_open = r"(?P<title>"
|
||||
chapter_header_close = ")\s*"
|
||||
chapter_header_close = ")\\s*"
|
||||
title_header_close = ")"
|
||||
chapter_line_close = self.line_close
|
||||
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
|
||||
title_line_close = "(</(?P=inner6)>)?\\s*(</(?P=inner5)>)?\\s*(</(?P=inner4)>)?\\s*</(?P=outer2)>"
|
||||
|
||||
is_pdftohtml = self.is_pdftohtml(html)
|
||||
if is_pdftohtml:
|
||||
title_line_open = "<(?P<outer2>p)[^>]*>\s*"
|
||||
title_line_close = "\s*</(?P=outer2)>"
|
||||
title_line_open = "<(?P<outer2>p)[^>]*>\\s*"
|
||||
title_line_close = "\\s*</(?P=outer2)>"
|
||||
|
||||
if blanks_between_paragraphs:
|
||||
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
|
||||
blank_lines = "(\\s*<p[^>]*>\\s*</p>){0,2}\\s*"
|
||||
else:
|
||||
blank_lines = ""
|
||||
opt_title_open = "("
|
||||
opt_title_close = ")?"
|
||||
n_lookahead_open = "(?!\s*"
|
||||
n_lookahead_close = ")\s*"
|
||||
n_lookahead_open = "(?!\\s*"
|
||||
n_lookahead_close = ")\\s*"
|
||||
|
||||
default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'’\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
|
||||
simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"
|
||||
@ -358,12 +358,12 @@ class HeuristicProcessor(object):
|
||||
|
||||
# define the pieces of the regex
|
||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))"
|
||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
||||
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
||||
soft_hyphen = u"\xad"
|
||||
line_ending = "\s*(?P<style_close></(span|[iub])>)?\s*(</(p|div)>)?"
|
||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
||||
line_opening = "<(p|div)[^>]*>\s*(?P<style_open><(span|[iub])[^>]*>)?\s*"
|
||||
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
||||
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
|
||||
line_opening = "<(p|div)[^>]*>\\s*(?P<style_open><(span|[iub])[^>]*>)?\\s*"
|
||||
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||
|
||||
if format == 'txt':
|
||||
@ -414,8 +414,8 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def arrange_htm_line_endings(self, html):
|
||||
html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html)
|
||||
html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html)
|
||||
html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\\g<tag>"+">\n", html)
|
||||
html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\\g<tag>"+"\\g<style>"+">", html)
|
||||
return html
|
||||
|
||||
def fix_nbsp_indents(self, html):
|
||||
@ -432,7 +432,7 @@ class HeuristicProcessor(object):
|
||||
# Get rid of empty <o:p> tags to simplify other processing
|
||||
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||
# Delete microsoft 'smart' tags
|
||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
||||
html = re.sub('(?i)</?st1:\\w+>', '', html)
|
||||
# Re-open self closing paragraph tags
|
||||
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
||||
# Get rid of empty span, bold, font, em, & italics tags
|
||||
@ -443,7 +443,7 @@ class HeuristicProcessor(object):
|
||||
html = re.sub(
|
||||
r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html)
|
||||
# delete surrounding divs from empty paragraphs
|
||||
html = re.sub('<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
||||
html = re.sub('<div[^>]*>\\s*<p[^>]*>\\s*</p>\\s*</div>', '<p> </p>', html)
|
||||
# Empty heading tags
|
||||
html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
|
||||
self.deleted_nbsps = True
|
||||
@ -527,7 +527,7 @@ class HeuristicProcessor(object):
|
||||
elif content.find('scenebreak') != -1:
|
||||
return content
|
||||
else:
|
||||
content = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content)
|
||||
content = re.sub('(?i)<h(?P<hnum>\\d+)[^>]*>', '\n\n<h'+'\\g<hnum>'+' style="'+top_margin+bottom_margin+'">', content)
|
||||
return content
|
||||
|
||||
html = blanks_around_headings.sub(merge_header_whitespace, html)
|
||||
@ -540,15 +540,15 @@ class HeuristicProcessor(object):
|
||||
|
||||
html = blanks_n_nopunct.sub(markup_whitespaces, html)
|
||||
if self.html_preprocess_sections > self.min_chapters:
|
||||
html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
|
||||
html = re.sub('(?si)^.*?(?=<h\\d)', markup_whitespaces, html)
|
||||
|
||||
return html
|
||||
|
||||
def detect_soft_breaks(self, html):
|
||||
line = '(?P<initline>'+self.line_open+'\s*(?P<init_content>.*?)'+self.line_close+')'
|
||||
line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')'
|
||||
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \
|
||||
'\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
||||
div_break_candidate_pattern = line+'\s*<div[^>]*>\s*</div>\s*'+line_two
|
||||
'\\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
||||
div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two
|
||||
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
|
||||
|
||||
def convert_div_softbreaks(match):
|
||||
@ -571,9 +571,9 @@ class HeuristicProcessor(object):
|
||||
|
||||
def detect_scene_breaks(self, html):
|
||||
scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \
|
||||
'<))(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
|
||||
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?)+)\\s*'+self.line_close
|
||||
scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
||||
html = scene_breaks.sub(self.scene_break_open+'\g<break>'+'</p>', html)
|
||||
html = scene_breaks.sub(self.scene_break_open+'\\g<break>'+'</p>', html)
|
||||
return html
|
||||
|
||||
def markup_user_break(self, replacement_break):
|
||||
@ -589,13 +589,13 @@ class HeuristicProcessor(object):
|
||||
if re.match('^<hr', replacement_break):
|
||||
if replacement_break.find('width') != -1:
|
||||
try:
|
||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break))
|
||||
except:
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
self.log.warn('Invalid replacement scene break'
|
||||
' expression, using default')
|
||||
else:
|
||||
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
||||
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
|
||||
divpercent = (100 - width) / 2
|
||||
hr_open = re.sub('45', unicode(divpercent), hr_open)
|
||||
scene_break = hr_open+replacement_break+'</div>'
|
||||
@ -606,16 +606,16 @@ class HeuristicProcessor(object):
|
||||
else:
|
||||
from calibre.utils.html2text import html2text
|
||||
replacement_break = html2text(replacement_break)
|
||||
replacement_break = re.sub('\s', ' ', replacement_break)
|
||||
replacement_break = re.sub('\\s', ' ', replacement_break)
|
||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||
else:
|
||||
replacement_break = re.sub('\s', ' ', replacement_break)
|
||||
replacement_break = re.sub('\\s', ' ', replacement_break)
|
||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||
|
||||
return scene_break
|
||||
|
||||
def check_paragraph(self, content):
|
||||
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
||||
content = re.sub('\\s*</?span[^>]*>\\s*', '', content)
|
||||
if re.match('.*[\"\'.!?:]$', content):
|
||||
# print "detected this as a paragraph"
|
||||
return True
|
||||
@ -623,7 +623,7 @@ class HeuristicProcessor(object):
|
||||
return False
|
||||
|
||||
def abbyy_processor(self, html):
|
||||
abbyy_line = re.compile('((?P<linestart><p\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
||||
abbyy_line = re.compile('((?P<linestart><p\\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
||||
empty_paragraph = '\n<p> </p>\n'
|
||||
self.in_blockquote = False
|
||||
self.previous_was_paragraph = False
|
||||
@ -669,7 +669,7 @@ class HeuristicProcessor(object):
|
||||
if style == 'text-align' and setting != 'left':
|
||||
text_align = style+':'+setting+';'
|
||||
if style == 'text-indent':
|
||||
setting = int(re.sub('\s*pt\s*', '', setting))
|
||||
setting = int(re.sub('\\s*pt\\s*', '', setting))
|
||||
if 9 < setting < 14:
|
||||
text_indent = indented_text
|
||||
else:
|
||||
@ -757,8 +757,8 @@ class HeuristicProcessor(object):
|
||||
|
||||
is_pdftohtml = self.is_pdftohtml(html)
|
||||
if is_pdftohtml:
|
||||
self.line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
|
||||
self.line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
|
||||
self.line_open = "<(?P<outer>p)[^>]*>(\\s*<[ibu][^>]*>)?\\s*"
|
||||
self.line_close = "\\s*(</[ibu][^>]*>\\s*)?</(?P=outer)>"
|
||||
|
||||
# ADE doesn't render <br />, change to empty paragraphs
|
||||
# html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
||||
@ -831,7 +831,7 @@ class HeuristicProcessor(object):
|
||||
# headings and titles, images, etc
|
||||
doubleheading = re.compile(
|
||||
r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||
html = doubleheading.sub('\\g<firsthead>'+'\n<h3'+'\\g<secondhead>'+'</h3>', html)
|
||||
|
||||
# If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
|
||||
# style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks.
|
||||
@ -839,7 +839,7 @@ class HeuristicProcessor(object):
|
||||
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
|
||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||
self.log.debug('Formatting scene breaks')
|
||||
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
|
||||
html = re.sub('(?i)<div[^>]*>\\s*<br(\\s?/)?>\\s*</div>', '<p></p>', html)
|
||||
html = self.detect_scene_breaks(html)
|
||||
html = self.detect_whitespace(html)
|
||||
html = self.detect_soft_breaks(html)
|
||||
@ -856,9 +856,9 @@ class HeuristicProcessor(object):
|
||||
replacement_break = self.markup_user_break(replacement_break)
|
||||
if scene_break_count >= 1:
|
||||
html = detected_scene_break.sub(replacement_break, html)
|
||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
|
||||
html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html)
|
||||
else:
|
||||
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
|
||||
html = re.sub('<p\\s+class="softbreak"[^>]*>\\s*</p>', replacement_break, html)
|
||||
|
||||
if self.deleted_nbsps:
|
||||
# put back non-breaking spaces in empty paragraphs so they render correctly
|
||||
|
@ -41,6 +41,6 @@ def dump(path):
|
||||
|
||||
print(path, 'dumped to', dest)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dump(sys.argv[-1])
|
||||
|
||||
|
@ -165,6 +165,7 @@ class Parser(object):
|
||||
ans['text_assertion'] = ta
|
||||
return raw[1:]
|
||||
|
||||
|
||||
_parser = None
|
||||
|
||||
|
||||
@ -203,5 +204,3 @@ def cfi_sort_key(cfi, only_path=True):
|
||||
step = steps[-1] if steps else {}
|
||||
offsets = (step.get('temporal_offset', 0), tuple(reversed(step.get('spatial_offset', (0, 0)))), step.get('text_offset', 0), )
|
||||
return (step_nums, offsets)
|
||||
|
||||
|
||||
|
@ -100,5 +100,6 @@ class Tests(unittest.TestCase):
|
||||
def find_tests():
|
||||
return unittest.TestLoader().loadTestsFromTestCase(Tests)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.TextTestRunner(verbosity=2).run(find_tests())
|
||||
|
@ -62,5 +62,6 @@ def main(args=sys.argv):
|
||||
any2lit(opts, args[1])
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -104,6 +104,7 @@ def f60_79(B, C, D):
|
||||
def f6_42(B, C, D):
|
||||
return (B + C) ^ C
|
||||
|
||||
|
||||
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
|
||||
|
||||
# ...and delightful changes
|
||||
@ -321,6 +322,7 @@ def new(arg=None):
|
||||
|
||||
return crypto
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def main():
|
||||
import sys
|
||||
|
@ -57,6 +57,7 @@ def invert_tag_map(tag_map):
|
||||
tattrs[0] = dattrs
|
||||
return tags, tattrs
|
||||
|
||||
|
||||
OPF_MAP = invert_tag_map(maps.OPF_MAP)
|
||||
HTML_MAP = invert_tag_map(maps.HTML_MAP)
|
||||
|
||||
@ -76,6 +77,7 @@ def packguid(guid):
|
||||
values = [int(value, 16) for value in values]
|
||||
return pack("<LHHBBBBBBBB", *values)
|
||||
|
||||
|
||||
FLAG_OPENING = (1 << 0)
|
||||
FLAG_CLOSING = (1 << 1)
|
||||
FLAG_BLOCK = (1 << 2)
|
||||
|
@ -153,9 +153,9 @@ class HTMLConverter(object):
|
||||
(re.compile('<hr>', re.IGNORECASE),
|
||||
lambda match : '<span style="page-break-after:always"> </span>'),
|
||||
# Create header tags
|
||||
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
|
||||
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
|
||||
@ -409,7 +409,7 @@ class HTMLConverter(object):
|
||||
selector name and the value is a dictionary of properties
|
||||
"""
|
||||
sdict, pdict = {}, {}
|
||||
style = re.sub('/\*.*?\*/', '', style) # Remove /*...*/ comments
|
||||
style = re.sub('/\\*.*?\\*/', '', style) # Remove /*...*/ comments
|
||||
for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
|
||||
for key in sel[0].split(','):
|
||||
val = self.parse_style_properties(sel[1])
|
||||
|
@ -148,7 +148,7 @@ class OverDrive(Source):
|
||||
fix_slashes = re.compile(r'\\/')
|
||||
thumbimage = fix_slashes.sub('/', thumbimage)
|
||||
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
||||
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
|
||||
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\\g<img>100', thumbimage)
|
||||
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
||||
series_num = ''
|
||||
if not series:
|
||||
@ -254,7 +254,7 @@ class OverDrive(Source):
|
||||
|
||||
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||
close_matches = []
|
||||
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
||||
raw = re.sub('.*?\\[\\[(?P<content>.*?)\\]\\].*', '[[\\g<content>]]', raw)
|
||||
results = json.loads(raw)
|
||||
# log.error('raw results are:'+str(results))
|
||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||
|
@ -149,7 +149,7 @@ class Ozon(Source):
|
||||
# Redirect page: trying to extract ozon_id from javascript data
|
||||
h = HTMLParser()
|
||||
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
||||
json_pat = re.compile(u'dataLayer\s*=\s*(.+)?;')
|
||||
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
|
||||
json_info = re.search(json_pat, entry_string)
|
||||
jsondata = json_info.group(1) if json_info else None
|
||||
if jsondata:
|
||||
@ -344,7 +344,7 @@ class Ozon(Source):
|
||||
|
||||
pub_year = None
|
||||
pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()')
|
||||
year_pattern = re.compile('\d{4}')
|
||||
year_pattern = re.compile(r'\d{4}')
|
||||
if pub_year_block:
|
||||
pub_year = re.search(year_pattern, pub_year_block[0])
|
||||
if pub_year:
|
||||
@ -625,8 +625,8 @@ def _translageLanguageToCode(displayLang): # {{{
|
||||
def _normalizeAuthorNameWithInitials(name): # {{{
|
||||
res = name
|
||||
if name:
|
||||
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||
if not matcher:
|
||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||
|
@ -370,6 +370,7 @@ def set_metadata(stream, mi):
|
||||
mu.update(mi)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if False:
|
||||
# Test get_metadata()
|
||||
@ -388,4 +389,3 @@ if __name__ == '__main__':
|
||||
updated_data = open(tokens[0]+'-updated' + '.' + tokens[2],'wb')
|
||||
updated_data.write(stream.getvalue())
|
||||
updated_data.close()
|
||||
|
||||
|
@ -45,6 +45,6 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
||||
def main():
|
||||
inspect_mobi(sys.argv[1])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
@ -350,13 +350,13 @@ class MobiReader(object):
|
||||
# Swap inline and block level elements, and order block level elements according to priority
|
||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||
self.processed_html = re.sub(
|
||||
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\\g<para>'+'\\g<styletags>', self.processed_html)
|
||||
self.processed_html = re.sub(
|
||||
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\\g<styletags>'+'\\g<para>', self.processed_html)
|
||||
self.processed_html = re.sub(
|
||||
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
||||
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\\g<para>'+'\\g<blockquote>', self.processed_html)
|
||||
self.processed_html = re.sub(
|
||||
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\\g<blockquote>'+'\\g<para>', self.processed_html)
|
||||
bods = htmls = 0
|
||||
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
||||
if x == '</body>':
|
||||
@ -692,7 +692,7 @@ class MobiReader(object):
|
||||
continue
|
||||
if reached and x.tag == 'a':
|
||||
href = x.get('href', '')
|
||||
if href and re.match('\w+://', href) is None:
|
||||
if href and re.match('\\w+://', href) is None:
|
||||
try:
|
||||
text = u' '.join([t.strip() for t in
|
||||
x.xpath('descendant::text()')])
|
||||
|
@ -374,6 +374,7 @@ class NonLinearNCXIndex(NCXIndex):
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Generate a document with a large number of index entries using both
|
||||
# calibre and kindlegen and compare the output
|
||||
@ -393,4 +394,3 @@ if __name__ == '__main__':
|
||||
|
||||
from calibre.gui2.tweak_book.diff.main import main
|
||||
main(['cdiff', 'decompiled_index/mobi8/ncx.record', 'x/ncx.record'])
|
||||
|
||||
|
@ -114,6 +114,7 @@ def normalize_simple_composition(name, cssvalue, composition, check_inherit=True
|
||||
break
|
||||
return style
|
||||
|
||||
|
||||
font_composition = ('font-style', 'font-variant', 'font-weight', 'font-size', 'line-height', 'font-family')
|
||||
|
||||
|
||||
@ -144,6 +145,7 @@ def normalize_border(name, cssvalue):
|
||||
style.update({k.replace(EDGES[0], edge):v for k, v in vals.iteritems()})
|
||||
return style
|
||||
|
||||
|
||||
normalizers = {
|
||||
'list-style': simple_normalizer('list-style', ('type', 'position', 'image')),
|
||||
'font': lambda prop, v: normalize_font(v),
|
||||
@ -243,6 +245,7 @@ def condense_border(style, props):
|
||||
style.removeProperty(prop.name)
|
||||
style.setProperty('border', edge_vals[0].value)
|
||||
|
||||
|
||||
condensers = {'margin': simple_condenser('margin', condense_edge), 'padding': simple_condenser('padding', condense_edge), 'border': condense_border}
|
||||
|
||||
|
||||
@ -430,5 +433,6 @@ def test_normalization(return_tests=False): # {{{
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
# }}}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_normalization()
|
||||
|
@ -82,6 +82,7 @@ def iterrules(container, sheet_name, rules=None, media_rule_ok=media_allowed, ru
|
||||
|
||||
importing.discard(sheet_name)
|
||||
|
||||
|
||||
StyleDeclaration = namedtuple('StyleDeclaration', 'index declaration pseudo_element')
|
||||
Specificity = namedtuple('Specificity', 'is_style num_id num_class num_elem rule_index')
|
||||
|
||||
@ -224,6 +225,7 @@ def resolve_styles(container, name, select=None, sheet_callback=None):
|
||||
|
||||
return partial(resolve_property, style_map), partial(resolve_pseudo_property, style_map, pseudo_style_map), select
|
||||
|
||||
|
||||
_defvals = None
|
||||
|
||||
|
||||
|
@ -115,6 +115,7 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
|
||||
zf.writestr(html_name, HTML)
|
||||
zf.writestr(toc_name, ncx)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
mi = Metadata('Test book', authors=('Kovid Goyal',))
|
||||
|
@ -140,5 +140,6 @@ def main(args=sys.argv):
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -134,7 +134,6 @@ def main():
|
||||
print('PDF written to:', pdf)
|
||||
print('Image written to:', path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
|
@ -188,7 +188,7 @@ class PMLMLizer(object):
|
||||
text = text.replace('\\Q="%s"' % unused, '')
|
||||
|
||||
# Remove \Cn tags that are within \x and \Xn tags
|
||||
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\g<t>\g<a>\g<b>\g<t>', text)
|
||||
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\\g<t>\\g<a>\\g<b>\\g<t>', text)
|
||||
|
||||
# Replace bad characters.
|
||||
text = text.replace(u'\xc2', '')
|
||||
@ -206,7 +206,7 @@ class PMLMLizer(object):
|
||||
text = re.sub('[ ]{2,}', ' ', text)
|
||||
|
||||
# Condense excessive \c empty line sequences.
|
||||
text = re.sub('(\\c\s*\\c\s*){2,}', '\\c \n\\c\n', text)
|
||||
text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text)
|
||||
|
||||
# Remove excessive newlines.
|
||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||
|
@ -142,7 +142,7 @@ class RTFMLizer(object):
|
||||
return text
|
||||
|
||||
def remove_tabs(self, text):
|
||||
self.log.debug('\Replace tabs with space for processing...')
|
||||
self.log.debug('Replace tabs with space for processing...')
|
||||
text = text.replace('\t', ' ')
|
||||
|
||||
return text
|
||||
|
@ -32,6 +32,7 @@ class CheckEncoding:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
check_encoding_obj = CheckEncoding()
|
||||
check_encoding_obj.check_encoding(sys.argv[1])
|
||||
|
@ -175,6 +175,7 @@ class DefaultEncoding:
|
||||
elif enc == 'pca':
|
||||
self.__code_page = '850'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
encode_obj = DefaultEncoding(
|
||||
|
@ -411,8 +411,6 @@ class FieldStrings:
|
||||
line -- the string to be parse
|
||||
Retuns:
|
||||
The name of the field
|
||||
Logic:
|
||||
self.__link_switch = re.compile(r'\\l\s{1,}(.*?)\s')
|
||||
"""
|
||||
self.__link_switch = re.compile(r'\\l\s{1,}"{0,1}(.*?)"{0,1}\s')
|
||||
the_string = name
|
||||
|
@ -562,6 +562,8 @@ class Hex2Utf8:
|
||||
self.__convert_preamble()
|
||||
else:
|
||||
self.__convert_body()
|
||||
|
||||
|
||||
"""
|
||||
how to swap case for non-capitals
|
||||
my_string.swapcase()
|
||||
|
@ -120,7 +120,7 @@ class ListTable:
|
||||
Requires: line -- line to process
|
||||
Returns: nothing
|
||||
Logic:
|
||||
I have found \list.
|
||||
I have found \\list.
|
||||
Change the state to list
|
||||
Get the open bracket count so you know when this state ends.
|
||||
Append an empty list to all lists.
|
||||
@ -162,7 +162,7 @@ class ListTable:
|
||||
Requires: line -- line to process
|
||||
Returns: nothing
|
||||
Logic:
|
||||
I have found \listlevel.
|
||||
I have found \\listlevel.
|
||||
Change the state to level
|
||||
Get the open bracket count so you know when this state ends.
|
||||
Append an empty list to the last list inside all lists.
|
||||
@ -285,7 +285,7 @@ class ListTable:
|
||||
Returns:
|
||||
nothing
|
||||
Logic:
|
||||
Method is used for to parse text in the \leveltext group.
|
||||
Method is used for to parse text in the \\leveltext group.
|
||||
"""
|
||||
num = line[18:]
|
||||
the_num = int(num, 16)
|
||||
|
@ -270,6 +270,8 @@ class ParseOptions:
|
||||
return options_dict, arguments
|
||||
else:
|
||||
return 0,0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
this_dict = {
|
||||
'indents': [0, 'i'],
|
||||
|
@ -52,7 +52,7 @@ class OverrideTable:
|
||||
Returns:
|
||||
nothing
|
||||
Logic:
|
||||
The group {\override has been found.
|
||||
The group {\\override has been found.
|
||||
Check for the end of the group.
|
||||
Otherwise, add appropriate tokens to the override dictionary.
|
||||
"""
|
||||
|
@ -128,7 +128,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
||||
'list-conti' : 'list-continue',
|
||||
'list-hang_' : 'list-hang',
|
||||
# 'list-tebef' : 'list-text-before',
|
||||
'list-level' : 'level',
|
||||
# 'list-level' : 'level',
|
||||
'list-id___' : 'list-id',
|
||||
'list-start' : 'list-start',
|
||||
'nest-level' : 'nest-level',
|
||||
@ -198,7 +198,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
||||
'bor-cel-to' : 'border-cell-top',
|
||||
'bor-cel-le' : 'border-cell-left',
|
||||
'bor-cel-ri' : 'border-cell-right',
|
||||
'bor-par-bo' : 'border-paragraph-bottom',
|
||||
# 'bor-par-bo' : 'border-paragraph-bottom',
|
||||
'bor-par-to' : 'border-paragraph-top',
|
||||
'bor-par-le' : 'border-paragraph-left',
|
||||
'bor-par-ri' : 'border-paragraph-right',
|
||||
@ -413,7 +413,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
|
||||
Returns:
|
||||
nothing
|
||||
Logic:
|
||||
I have found a \pard while I am collecting tokens. I want to reset
|
||||
I have found a \\pard while I am collecting tokens. I want to reset
|
||||
the dectionary and do nothing else.
|
||||
"""
|
||||
# Change this
|
||||
|
@ -584,7 +584,7 @@ class ProcessTokens:
|
||||
}
|
||||
"""
|
||||
# unknown
|
||||
# These must get passed on because they occure after \*
|
||||
# These must get passed on because they occure after \\*
|
||||
'do' : ('un', 'unknown___', self.default_func),
|
||||
'company' : ('un', 'company___', self.default_func),
|
||||
'shpinst' : ('un', 'unknown___', self.default_func),
|
||||
@ -716,10 +716,10 @@ class ProcessTokens:
|
||||
def divide_num(self, numerator, denominator):
|
||||
try:
|
||||
# calibre why ignore negative number? Wrong in case of \fi
|
||||
numerator = float(re.search('[0-9.\-]+', numerator).group())
|
||||
numerator = float(re.search('[0-9.\\-]+', numerator).group())
|
||||
except TypeError as msg:
|
||||
if self.__run_level > 3:
|
||||
msg = ('No number to process?\nthis indicates that the token \(\\li\) \
|
||||
msg = ('No number to process?\nthis indicates that the token \\(\\li\\) \
|
||||
should have a number and does not\nnumerator is \
|
||||
"%s"\ndenominator is "%s"\n') % (numerator, denominator)
|
||||
raise self.__bug_handler(msg)
|
||||
|
@ -27,19 +27,19 @@ class Sections:
|
||||
logic
|
||||
---------------
|
||||
The tags for the first section breaks have already been written.
|
||||
RTF stores section breaks with the \sect tag. Each time this tag is
|
||||
RTF stores section breaks with the \\sect tag. Each time this tag is
|
||||
encountered, add one to the counter.
|
||||
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
||||
that describe the section. When I reach a \pard, I know I an stop collecting
|
||||
When I encounter the \\sectd tag, I want to collect all the appropriate tokens
|
||||
that describe the section. When I reach a \\pard, I know I an stop collecting
|
||||
tokens and write the section tags.
|
||||
The exception to this method occurs when sections occur in field blocks, such
|
||||
as the index. Normally, two section break occur within the index and other
|
||||
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
||||
field-blocks. (If less or more section breaks occur, this code may not work.)
|
||||
I want the sections to occur outside of the index. That is, the index
|
||||
should be nested inside one section tag. After the index is complete, a new
|
||||
section should begin.
|
||||
In order to write the sections outside of the field blocks, I have to store
|
||||
all of the field block as a string. When I ecounter the \sect tag, add one to
|
||||
all of the field block as a string. When I ecounter the \\sect tag, add one to
|
||||
the section counter, but store this number in a list. Likewise, store the
|
||||
information describing the section in another list.
|
||||
When I reach the end of the field block, choose the first item from the
|
||||
@ -243,7 +243,7 @@ class Sections:
|
||||
nothing
|
||||
Logic:
|
||||
Text or control words indicating text have been found
|
||||
before \pard. This shoud indicate older RTF. Reset the state
|
||||
before \\pard. This shoud indicate older RTF. Reset the state
|
||||
Write the section defintion. Insert a paragraph definition.
|
||||
Insert {} to mark the end of a paragraph defintion
|
||||
"""
|
||||
|
@ -121,7 +121,7 @@ class Styles:
|
||||
'list-conti' : 'list-continue',
|
||||
'list-hang_' : 'list-hang',
|
||||
# 'list-tebef' : 'list-text-before',
|
||||
'list-level' : 'level',
|
||||
# 'list-level' : 'level',
|
||||
'list-id___' : 'list-id',
|
||||
'list-start' : 'list-start',
|
||||
'nest-level' : 'nest-level',
|
||||
@ -192,7 +192,7 @@ class Styles:
|
||||
'bor-cel-to' : 'border-cell-top',
|
||||
'bor-cel-le' : 'border-cell-left',
|
||||
'bor-cel-ri' : 'border-cell-right',
|
||||
'bor-par-bo' : 'border-paragraph-bottom',
|
||||
# 'bor-par-bo' : 'border-paragraph-bottom',
|
||||
'bor-par-to' : 'border-paragraph-top',
|
||||
'bor-par-le' : 'border-paragraph-left',
|
||||
'bor-par-ri' : 'border-paragraph-right',
|
||||
|
@ -333,6 +333,7 @@ def main():
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""SNB file unit test"""
|
||||
sys.exit(main())
|
||||
|
@ -160,6 +160,7 @@ class MainWindow(QMainWindow):
|
||||
self.window_unblocked.emit()
|
||||
return QMainWindow.event(self, ev)
|
||||
|
||||
|
||||
app=QApplication([])
|
||||
app.setAttribute(Qt.AA_DontUseNativeMenuBar, False)
|
||||
app.setApplicationName('com.calibre-ebook.DBusExportDemo')
|
||||
|
@ -456,6 +456,7 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
|
||||
self.rule = ('', txt)
|
||||
QDialog.accept(self)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app = QApplication([])
|
||||
from calibre.ebooks.metadata.book.base import field_metadata
|
||||
|
@ -581,5 +581,6 @@ class Word(object):
|
||||
def main(args=sys.argv):
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -171,8 +171,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
info_dialog(self, _('Done'),
|
||||
_('Confirmation dialogs have all been reset'), show=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from PyQt5.Qt import QApplication
|
||||
app = QApplication([])
|
||||
test_widget('Interface', 'Behavior')
|
||||
|
||||
|
@ -190,6 +190,7 @@ class Stores(OrderedDict):
|
||||
return cls(builtin.gui, builtin.name, config=builtin.config,
|
||||
base_plugin=builtin.base_plugin), ver
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
st = time.time()
|
||||
count = 0
|
||||
@ -199,5 +200,3 @@ if __name__ == '__main__':
|
||||
print(code.encode('utf-8'))
|
||||
print('\n', '_'*80, '\n', sep='')
|
||||
print ('Time to download all %d plugins: %.2f seconds'%(count, time.time() - st))
|
||||
|
||||
|
||||
|
@ -153,15 +153,15 @@ class Matches(QAbstractItemModel):
|
||||
# Remove filter identifiers
|
||||
# Remove the prefix.
|
||||
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
|
||||
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
|
||||
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, r'\g<a>', query)
|
||||
query = query.replace('%s:' % loc, '')
|
||||
# Remove the prefix and search text.
|
||||
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
|
||||
query = re.sub(r'%s:"[^"]"' % loc, '', query)
|
||||
query = re.sub(r'%s:[^\s]*' % loc, '', query)
|
||||
# Remove whitespace
|
||||
query = re.sub('\s', '', query)
|
||||
mod_query = re.sub('\s', '', mod_query)
|
||||
query = re.sub(r'\s', '', query)
|
||||
mod_query = re.sub(r'\s', '', mod_query)
|
||||
# If mod_query and query are the same then there were no filter modifiers
|
||||
# so this isn't a filterable query.
|
||||
if mod_query == query:
|
||||
|
@ -128,6 +128,7 @@ def set_use_primary_find_in_search(toWhat):
|
||||
global pref_use_primary_find_in_search
|
||||
pref_use_primary_find_in_search = toWhat
|
||||
|
||||
|
||||
y, c, n, u = map(icu_lower, (_('yes'), _('checked'), _('no'), _('unchecked')))
|
||||
yes_vals = {y, c, 'true'}
|
||||
no_vals = {n, u, 'false'}
|
||||
@ -1215,5 +1216,3 @@ class SortKeyGenerator(object):
|
||||
# }}}
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -241,7 +241,7 @@ class BIBTEX(CatalogPlugin):
|
||||
# define a function to replace the template entry by its value
|
||||
def tpl_replace(objtplname) :
|
||||
|
||||
tpl_field = re.sub(u'[\{\}]', u'', objtplname.group())
|
||||
tpl_field = re.sub(u'[\\{\\}]', u'', objtplname.group())
|
||||
|
||||
if tpl_field in TEMPLATE_ALLOWED_FIELDS :
|
||||
if tpl_field in ['pubdate', 'timestamp'] :
|
||||
@ -258,14 +258,14 @@ class BIBTEX(CatalogPlugin):
|
||||
|
||||
if len(template_citation) >0 :
|
||||
tpl_citation = bibtexclass.utf8ToBibtex(
|
||||
bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}',
|
||||
bibtexclass.ValidateCitationKey(re.sub(u'\\{[^{}]*\\}',
|
||||
tpl_replace, template_citation)))
|
||||
|
||||
if len(tpl_citation) >0 :
|
||||
return tpl_citation
|
||||
|
||||
if len(entry["isbn"]) > 0 :
|
||||
template_citation = u'%s' % re.sub(u'[\D]',u'', entry["isbn"])
|
||||
template_citation = u'%s' % re.sub(u'[\\D]',u'', entry["isbn"])
|
||||
|
||||
else :
|
||||
template_citation = u'%s' % str(entry["id"])
|
||||
|
@ -154,9 +154,9 @@ class CSV_XML(CatalogPlugin):
|
||||
|
||||
# Convert HTML to markdown text
|
||||
if type(item) is unicode:
|
||||
opening_tag = re.search('<(\w+)(\x20|>)', item)
|
||||
opening_tag = re.search('<(\\w+)(\x20|>)', item)
|
||||
if opening_tag:
|
||||
closing_tag = re.search('<\/%s>$' % opening_tag.group(1), item)
|
||||
closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
|
||||
if closing_tag:
|
||||
item = html2text(item)
|
||||
|
||||
|
@ -61,7 +61,7 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--exclude-genre',
|
||||
default='\[.+\]|^\+$',
|
||||
default='\\[.+\\]|^\\+$',
|
||||
dest='exclude_genre',
|
||||
action=None,
|
||||
help=_("Regex describing tags to exclude as genres.\n"
|
||||
|
@ -1209,11 +1209,11 @@ class CatalogBuilder(object):
|
||||
clipped to max_len
|
||||
"""
|
||||
|
||||
normalized = massaged = re.sub('\s', '', ascii_text(tag).lower())
|
||||
if re.search('\W', normalized):
|
||||
normalized = massaged = re.sub('\\s', '', ascii_text(tag).lower())
|
||||
if re.search('\\W', normalized):
|
||||
normalized = ''
|
||||
for c in massaged:
|
||||
if re.search('\W', c):
|
||||
if re.search('\\W', c):
|
||||
normalized += self.generate_unicode_name(c)
|
||||
else:
|
||||
normalized += c
|
||||
@ -1376,7 +1376,7 @@ class CatalogBuilder(object):
|
||||
Return:
|
||||
(str): asciized version of author
|
||||
"""
|
||||
return re.sub("\W", "", ascii_text(author))
|
||||
return re.sub("\\W", "", ascii_text(author))
|
||||
|
||||
def generate_format_args(self, book):
|
||||
""" Generate the format args for template substitution.
|
||||
@ -4209,9 +4209,9 @@ class CatalogBuilder(object):
|
||||
|
||||
# Generate a legal XHTML id/href string
|
||||
if self.letter_or_symbol(series) == self.SYMBOLS:
|
||||
return "symbol_%s_series" % re.sub('\W', '', series).lower()
|
||||
return "symbol_%s_series" % re.sub('\\W', '', series).lower()
|
||||
else:
|
||||
return "%s_series" % re.sub('\W', '', ascii_text(series)).lower()
|
||||
return "%s_series" % re.sub('\\W', '', ascii_text(series)).lower()
|
||||
|
||||
def generate_short_description(self, description, dest=None):
|
||||
""" Generate a truncated version of the supplied string.
|
||||
@ -4292,7 +4292,7 @@ class CatalogBuilder(object):
|
||||
else:
|
||||
if re.match('[0-9]+', word[0]):
|
||||
word = word.replace(',', '')
|
||||
suffix = re.search('[\D]', word)
|
||||
suffix = re.search('[\\D]', word)
|
||||
if suffix:
|
||||
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
||||
else:
|
||||
@ -4308,7 +4308,7 @@ class CatalogBuilder(object):
|
||||
else:
|
||||
if re.search('[0-9]+', word[0]):
|
||||
word = word.replace(',', '')
|
||||
suffix = re.search('[\D]', word)
|
||||
suffix = re.search('[\\D]', word)
|
||||
if suffix:
|
||||
word = '%10.0f%s' % (float(word[:suffix.start()]), word[suffix.start():])
|
||||
else:
|
||||
@ -4638,7 +4638,7 @@ class CatalogBuilder(object):
|
||||
# confusion with decimal points.
|
||||
|
||||
# Explode lost CRs to \n\n
|
||||
for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])', comments):
|
||||
for lost_cr in re.finditer('([a-z])([\\.\\?!])([A-Z])', comments):
|
||||
comments = comments.replace(lost_cr.group(),
|
||||
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||
lost_cr.group(2),
|
||||
|
@ -90,8 +90,8 @@ class NumberToText(object): # {{{
|
||||
# Special case ordinals
|
||||
if re.search('[st|nd|rd|th]',self.number):
|
||||
self.number = re.sub(',','',self.number)
|
||||
ordinal_suffix = re.search('[\D]', self.number)
|
||||
ordinal_number = re.sub('\D','',re.sub(',','',self.number))
|
||||
ordinal_suffix = re.search('[\\D]', self.number)
|
||||
ordinal_number = re.sub('\\D','',re.sub(',','',self.number))
|
||||
if self.verbose:
|
||||
self.log("Ordinal: %s" % ordinal_number)
|
||||
self.number_as_float = ordinal_number
|
||||
@ -120,7 +120,7 @@ class NumberToText(object): # {{{
|
||||
self.text = NumberToText(self.number.replace('%',' percent')).text
|
||||
|
||||
# Test for decimal
|
||||
elif re.search('\.',self.number):
|
||||
elif re.search('\\.',self.number):
|
||||
if self.verbose:
|
||||
self.log("Decimal: %s" % self.number)
|
||||
self.number_as_float = self.number
|
||||
@ -151,12 +151,12 @@ class NumberToText(object): # {{{
|
||||
self.text = NumberToText(self.number_as_float).text
|
||||
|
||||
# Test for hybrid e.g., 'K2, 2nd, 10@10'
|
||||
elif re.search('[\D]+', self.number):
|
||||
elif re.search('[\\D]+', self.number):
|
||||
if self.verbose:
|
||||
self.log("Hybrid: %s" % self.number)
|
||||
# Split the token into number/text
|
||||
number_position = re.search('\d',self.number).start()
|
||||
text_position = re.search('\D',self.number).start()
|
||||
number_position = re.search('\\d',self.number).start()
|
||||
text_position = re.search('\\D',self.number).start()
|
||||
if number_position < text_position:
|
||||
number = self.number[:text_position]
|
||||
text = self.number[text_position:]
|
||||
@ -225,4 +225,3 @@ class NumberToText(object): # {{{
|
||||
self.log(u'resultString: %s' % resultString)
|
||||
self.text = resultString.strip().capitalize()
|
||||
# }}}
|
||||
|
||||
|
@ -16,7 +16,7 @@ from calibre.utils.html2text import html2text
|
||||
|
||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||
# confusion with decimal points.
|
||||
lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
|
||||
lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
|
||||
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
||||
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
||||
re.IGNORECASE)
|
||||
|
@ -657,7 +657,7 @@ class CustomColumns(object):
|
||||
editable=True, display={}):
|
||||
if not label:
|
||||
raise ValueError(_('No label was provided'))
|
||||
if re.match('^\w*$', label) is None or not label[0].isalpha() or label.lower() != label:
|
||||
if re.match('^\\w*$', label) is None or not label[0].isalpha() or label.lower() != label:
|
||||
raise ValueError(_('The label must contain only lower case letters, digits and underscores, and start with a letter'))
|
||||
if datatype not in self.CUSTOM_DATA_TYPES:
|
||||
raise ValueError('%r is not a supported data type'%datatype)
|
||||
@ -809,5 +809,3 @@ class CustomColumns(object):
|
||||
self.conn.executescript(script)
|
||||
self.conn.commit()
|
||||
return num
|
||||
|
||||
|
||||
|
@ -50,7 +50,7 @@ def _connect(path):
|
||||
conn = sqlite.connect(path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
|
||||
conn.row_factory = lambda cursor, row : list(row)
|
||||
conn.create_aggregate('concat', 1, Concatenate)
|
||||
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
|
||||
title_pat = re.compile('^(A|The|An)\\s+', re.IGNORECASE)
|
||||
|
||||
def title_sort(title):
|
||||
match = title_pat.search(title)
|
||||
@ -1514,6 +1514,7 @@ def text_to_tokens(text):
|
||||
continue
|
||||
return ans, OR
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sqlite.enable_callback_tracebacks(True)
|
||||
db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
|
||||
|
@ -68,6 +68,7 @@ def _py_convert_timestamp(val):
|
||||
return parse_date(val, as_utc=False)
|
||||
return None
|
||||
|
||||
|
||||
convert_timestamp = _py_convert_timestamp if _c_speedup is None else \
|
||||
_c_convert_timestamp
|
||||
|
||||
@ -75,6 +76,7 @@ convert_timestamp = _py_convert_timestamp if _c_speedup is None else \
|
||||
def adapt_datetime(dt):
|
||||
return isoformat(dt, sep=' ')
|
||||
|
||||
|
||||
sqlite.register_adapter(datetime, adapt_datetime)
|
||||
sqlite.register_converter('timestamp', convert_timestamp)
|
||||
|
||||
@ -82,6 +84,7 @@ sqlite.register_converter('timestamp', convert_timestamp)
|
||||
def convert_bool(val):
|
||||
return val != '0'
|
||||
|
||||
|
||||
sqlite.register_adapter(bool, lambda x : 1 if x else 0)
|
||||
sqlite.register_converter('bool', convert_bool)
|
||||
sqlite.register_converter('BOOL', convert_bool)
|
||||
@ -411,4 +414,3 @@ def test():
|
||||
c = sqlite.connect(':memory:')
|
||||
if load_c_extensions(c, True):
|
||||
print('Loaded C extension successfully')
|
||||
|
||||
|
@ -64,6 +64,7 @@ def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
|
||||
if match.search(name):
|
||||
return name, zf.read(name)
|
||||
|
||||
|
||||
comic_exts = {'png', 'jpg', 'jpeg', 'gif', 'webp'}
|
||||
|
||||
|
||||
|
@ -14,6 +14,8 @@ from calibre.constants import islinux
|
||||
|
||||
def pre_activated_socket():
|
||||
return None
|
||||
|
||||
|
||||
has_preactivated_support = False
|
||||
|
||||
if islinux:
|
||||
|
@ -73,7 +73,7 @@ def check_for_critical_bugs():
|
||||
print('WARNING: Translation errors detected')
|
||||
print('See the .errors directory and http://translate.sourceforge.net/wiki/toolkit/using_pofilter')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
import_from_launchpad(sys.argv[1])
|
||||
|
||||
|
@ -146,7 +146,7 @@ def make(filename, outfile):
|
||||
# This is a message with plural forms
|
||||
elif l.startswith('msgid_plural'):
|
||||
if section != ID:
|
||||
print('msgid_plural not preceeded by msgid on %s:%d' %\
|
||||
print('msgid_plural not preceeded by msgid on %s:%d' %
|
||||
(infile, lno), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l[12:]
|
||||
@ -157,7 +157,7 @@ def make(filename, outfile):
|
||||
section = STR
|
||||
if l.startswith('msgstr['):
|
||||
if not is_plural:
|
||||
print('plural without msgid_plural on %s:%d' %\
|
||||
print('plural without msgid_plural on %s:%d' %
|
||||
(infile, lno), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l.split(']', 1)[1]
|
||||
@ -165,7 +165,7 @@ def make(filename, outfile):
|
||||
msgstr += '\0' # Separator of the various plural forms
|
||||
else:
|
||||
if is_plural:
|
||||
print('indexed msgstr required for plural on %s:%d' %\
|
||||
print('indexed msgstr required for plural on %s:%d' %
|
||||
(infile, lno), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
l = l[6:]
|
||||
@ -180,7 +180,7 @@ def make(filename, outfile):
|
||||
elif section == STR:
|
||||
msgstr += l
|
||||
else:
|
||||
print('Syntax error on %s:%d' % (infile, lno), \
|
||||
print('Syntax error on %s:%d' % (infile, lno),
|
||||
'before:', file=sys.stderr)
|
||||
print(l, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
@ -1627,6 +1627,7 @@ class Zeroconf(object):
|
||||
# Test a few module features, including service registration, service
|
||||
# query (for Zoe), and service unregistration.
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Multicast DNS Service Discovery for Python, version", __version__)
|
||||
r = Zeroconf()
|
||||
|
@ -49,6 +49,7 @@ def allowed(x):
|
||||
def py_clean_xml_chars(unicode_string):
|
||||
return u''.join(filter(allowed, unicode_string))
|
||||
|
||||
|
||||
clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
|
||||
|
||||
|
||||
@ -85,5 +86,4 @@ def unescape(text, rm=False, rchar=u''):
|
||||
if rm:
|
||||
return rchar # replace by char
|
||||
return text # leave as is
|
||||
return re.sub("&#?\w+;", fixup, text)
|
||||
|
||||
return re.sub("&#?\\w+;", fixup, text)
|
||||
|
@ -30,6 +30,7 @@ def same_thread(func):
|
||||
return func(self, *args, **kwargs)
|
||||
return check_thread
|
||||
|
||||
|
||||
FreeTypeError = getattr(plugins['freetype'][0], 'FreeTypeError', Exception)
|
||||
|
||||
|
||||
@ -80,5 +81,3 @@ class FreeType(object):
|
||||
@same_thread
|
||||
def load_font(self, data):
|
||||
return Face(self.ft.load_font(data))
|
||||
|
||||
|
||||
|
@ -113,6 +113,7 @@ class FontMetrics(object):
|
||||
'The width of the string at the specified pixel size and stretch, in pixels'
|
||||
return sum(self.advance_widths(string, pixel_size, stretch))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
from calibre.utils.fonts.sfnt.container import Sfnt
|
||||
@ -127,4 +128,3 @@ if __name__ == '__main__':
|
||||
print('AvgWidth:', m.pdf_avg_width)
|
||||
print('ItalicAngle', m.post.italic_angle)
|
||||
print('StemV', m.pdf_stemv)
|
||||
|
||||
|
@ -640,7 +640,7 @@ class BuiltinReGroup(BuiltinFormatterFunction):
|
||||
'the template and the eval functions, you use [[ for { and ]] for }.'
|
||||
' The following example in template program mode looks for series '
|
||||
'with more than one word and uppercases the first word: '
|
||||
"{series:'re_group($, \"(\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}")
|
||||
"{series:'re_group($, \"(\\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}")
|
||||
|
||||
def evaluate(self, formatter, kwargs, mi, locals, val, pattern, *args):
|
||||
from formatter import EvalFormatter
|
||||
@ -924,9 +924,9 @@ class BuiltinSublist(BuiltinFormatterFunction):
|
||||
'of zero is assumed to be the length of the list. Examples using '
|
||||
'basic template mode and assuming that the tags column (which is '
|
||||
'comma-separated) contains "A, B, C": '
|
||||
'{tags:sublist(0,1,\,)} returns "A". '
|
||||
'{tags:sublist(-1,0,\,)} returns "C". '
|
||||
'{tags:sublist(0,-1,\,)} returns "A, B".'
|
||||
'{tags:sublist(0,1,\\,)} returns "A". '
|
||||
'{tags:sublist(-1,0,\\,)} returns "C". '
|
||||
'{tags:sublist(0,-1,\\,)} returns "A, B".'
|
||||
)
|
||||
|
||||
def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):
|
||||
|
@ -26,6 +26,7 @@ class DirTooLarge(ValueError):
|
||||
def __init__(self, bdir):
|
||||
ValueError.__init__(self, 'The directory {0} is too large to monitor. Try increasing the value in /proc/sys/fs/inotify/max_user_watches'.format(bdir))
|
||||
|
||||
|
||||
_inotify = None
|
||||
|
||||
|
||||
@ -320,6 +321,7 @@ class INotifyTreeWatcher(INotify):
|
||||
self.modified = set()
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
w = INotifyTreeWatcher(sys.argv[-1])
|
||||
w()
|
||||
|
@ -87,6 +87,7 @@ class ConnectedWorker(Thread):
|
||||
class CriticalError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
_name_counter = itertools.count()
|
||||
|
||||
if islinux:
|
||||
@ -384,4 +385,3 @@ class Server(Thread):
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
|
@ -85,5 +85,6 @@ def main(args=sys.argv):
|
||||
f.close()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -292,7 +292,7 @@ if __name__ == '__main__':
|
||||
|
||||
# this will be called when an exception occurs within a thread
|
||||
def handle_exception(request, exc_info):
|
||||
print("Exception occured in request #%s: %s" % \
|
||||
print("Exception occured in request #%s: %s" %
|
||||
(request.requestID, exc_info[1]))
|
||||
|
||||
# assemble the arguments for each job to a list...
|
||||
|
@ -15,7 +15,7 @@ from calibre.utils.icu import capitalize, upper
|
||||
__all__ = ['titlecase']
|
||||
__version__ = '0.5'
|
||||
|
||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
|
||||
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||
|
||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||
@ -54,7 +54,7 @@ def titlecase(text):
|
||||
|
||||
all_caps = upper(text) == text
|
||||
|
||||
words = re.split('\s+', text)
|
||||
words = re.split('\\s+', text)
|
||||
line = []
|
||||
for word in words:
|
||||
if all_caps:
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import with_statement
|
||||
from __future__ import print_function
|
||||
from __future__ import with_statement, print_function
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
@ -110,7 +109,7 @@ def default_is_link_wanted(url, tag):
|
||||
|
||||
class RecursiveFetcher(object):
|
||||
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
|
||||
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
|
||||
('.exe\\s*$', '.mp3\\s*$', '.ogg\\s*$', '^\\s*mailto:', '^\\s*$'))
|
||||
# ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
|
||||
# (
|
||||
#
|
||||
|
Loading…
x
Reference in New Issue
Block a user