mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
e1c90d5be0
@ -585,7 +585,6 @@ application/vnd.osa.netdeploy
|
|||||||
application/vnd.osgi.bundle
|
application/vnd.osgi.bundle
|
||||||
application/vnd.osgi.dp dp
|
application/vnd.osgi.dp dp
|
||||||
application/vnd.otps.ct-kip+xml
|
application/vnd.otps.ct-kip+xml
|
||||||
application/vnd.palm oprc pdb pqa
|
|
||||||
application/vnd.paos.xml
|
application/vnd.paos.xml
|
||||||
application/vnd.pg.format str
|
application/vnd.pg.format str
|
||||||
application/vnd.pg.osasli ei6
|
application/vnd.pg.osasli ei6
|
||||||
@ -1082,7 +1081,6 @@ chemical/x-ncbi-asn1 asn
|
|||||||
chemical/x-ncbi-asn1-ascii ent prt
|
chemical/x-ncbi-asn1-ascii ent prt
|
||||||
chemical/x-ncbi-asn1-binary aso val
|
chemical/x-ncbi-asn1-binary aso val
|
||||||
chemical/x-ncbi-asn1-spec asn
|
chemical/x-ncbi-asn1-spec asn
|
||||||
chemical/x-pdb ent pdb
|
|
||||||
chemical/x-rosdal ros
|
chemical/x-rosdal ros
|
||||||
chemical/x-swissprot sw
|
chemical/x-swissprot sw
|
||||||
chemical/x-vamas-iso14976 vms
|
chemical/x-vamas-iso14976 vms
|
||||||
@ -1379,3 +1377,5 @@ application/x-cbr cbr
|
|||||||
application/x-cb7 cb7
|
application/x-cb7 cb7
|
||||||
application/x-koboreader-ebook kobo
|
application/x-koboreader-ebook kobo
|
||||||
image/wmf wmf
|
image/wmf wmf
|
||||||
|
application/ereader pdb
|
||||||
|
|
||||||
|
@ -10,7 +10,10 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = 'Wall Street Journal (free)'
|
title = 'Wall Street Journal (free)'
|
||||||
__author__ = 'Kovid Goyal, Sujata Raman, Joshua Oster-Morris, Starson17'
|
__author__ = 'Kovid Goyal, Sujata Raman, Joshua Oster-Morris, Starson17'
|
||||||
description = 'News and current affairs'
|
description = '''News and current affairs. This recipe only fetches complete
|
||||||
|
versions of the articles that are available free on the wsj.com website.
|
||||||
|
To get the rest of the articles, subscribe to the WSJ and use the other WSJ
|
||||||
|
recipe.'''
|
||||||
language = 'en'
|
language = 'en'
|
||||||
cover_url = 'http://dealbreaker.com/images/thumbs/Wall%20Street%20Journal%20A1.JPG'
|
cover_url = 'http://dealbreaker.com/images/thumbs/Wall%20Street%20Journal%20A1.JPG'
|
||||||
max_articles_per_feed = 1000
|
max_articles_per_feed = 1000
|
||||||
@ -151,6 +154,4 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
|
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
|
||||||
|
|
||||||
|
@ -22,13 +22,15 @@ Run an embedded python interpreter.
|
|||||||
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
|
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
|
||||||
help='Debug the specified device driver.')
|
help='Debug the specified device driver.')
|
||||||
parser.add_option('-g', '--gui', default=False, action='store_true',
|
parser.add_option('-g', '--gui', default=False, action='store_true',
|
||||||
help='Run the GUI',)
|
help='Run the GUI with debugging enabled. Debug output is '
|
||||||
|
'printed to stdout and stderr.')
|
||||||
parser.add_option('--gui-debug', default=None,
|
parser.add_option('--gui-debug', default=None,
|
||||||
help='Run the GUI with a debug console, logging to the'
|
help='Run the GUI with a debug console, logging to the'
|
||||||
' specified path',)
|
' specified path. For internal use only, use the -g'
|
||||||
|
' option to run the GUI in debug mode',)
|
||||||
parser.add_option('--show-gui-debug', default=None,
|
parser.add_option('--show-gui-debug', default=None,
|
||||||
help='Display the specified log file.',)
|
help='Display the specified log file. For internal use'
|
||||||
|
' only.',)
|
||||||
parser.add_option('-w', '--viewer', default=False, action='store_true',
|
parser.add_option('-w', '--viewer', default=False, action='store_true',
|
||||||
help='Run the ebook viewer',)
|
help='Run the ebook viewer',)
|
||||||
parser.add_option('--paths', default=False, action='store_true',
|
parser.add_option('--paths', default=False, action='store_true',
|
||||||
|
@ -46,7 +46,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
|
|||||||
'italicize_common_cases', 'fix_indents',
|
'italicize_common_cases', 'fix_indents',
|
||||||
'html_unwrap_factor', 'unwrap_lines',
|
'html_unwrap_factor', 'unwrap_lines',
|
||||||
'delete_blank_paragraphs', 'format_scene_breaks',
|
'delete_blank_paragraphs', 'format_scene_breaks',
|
||||||
'dehyphenate', 'renumber_headings']
|
'dehyphenate', 'renumber_headings',
|
||||||
|
'replace_scene_breaks']
|
||||||
|
|
||||||
def print_help(parser, log):
|
def print_help(parser, log):
|
||||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||||
|
@ -531,6 +531,11 @@ OptionRecommendation(name='format_scene_breaks',
|
|||||||
'Replace soft scene breaks that use multiple blank lines with'
|
'Replace soft scene breaks that use multiple blank lines with'
|
||||||
'horizontal rules.')),
|
'horizontal rules.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='replace_scene_breaks',
|
||||||
|
recommended_value='', level=OptionRecommendation.LOW,
|
||||||
|
help=_('Replace scene breaks with the specified text. By default, the '
|
||||||
|
'text from the input document is used.')),
|
||||||
|
|
||||||
OptionRecommendation(name='dehyphenate',
|
OptionRecommendation(name='dehyphenate',
|
||||||
recommended_value=True, level=OptionRecommendation.LOW,
|
recommended_value=True, level=OptionRecommendation.LOW,
|
||||||
help=_('Analyze hyphenated words throughout the document. The '
|
help=_('Analyze hyphenated words throughout the document. The '
|
||||||
|
@ -26,9 +26,14 @@ class HeuristicProcessor(object):
|
|||||||
self.blanks_deleted = False
|
self.blanks_deleted = False
|
||||||
self.blanks_between_paragraphs = False
|
self.blanks_between_paragraphs = False
|
||||||
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
||||||
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||||
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||||
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
|
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
|
||||||
|
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
|
||||||
|
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
||||||
|
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
||||||
|
self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
|
||||||
|
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
||||||
|
|
||||||
def is_pdftohtml(self, src):
|
def is_pdftohtml(self, src):
|
||||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||||
@ -187,19 +192,17 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# Build the Regular Expressions in pieces
|
# Build the Regular Expressions in pieces
|
||||||
init_lookahead = "(?=<(p|div))"
|
init_lookahead = "(?=<(p|div))"
|
||||||
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
chapter_line_open = self.line_open
|
||||||
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
|
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
|
||||||
chapter_header_open = r"(?P<chap>"
|
chapter_header_open = r"(?P<chap>"
|
||||||
title_header_open = r"(?P<title>"
|
title_header_open = r"(?P<title>"
|
||||||
chapter_header_close = ")\s*"
|
chapter_header_close = ")\s*"
|
||||||
title_header_close = ")"
|
title_header_close = ")"
|
||||||
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
chapter_line_close = self.line_close
|
||||||
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
|
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
|
||||||
|
|
||||||
is_pdftohtml = self.is_pdftohtml(html)
|
is_pdftohtml = self.is_pdftohtml(html)
|
||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
|
|
||||||
chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
|
|
||||||
title_line_open = "<(?P<outer2>p)[^>]*>\s*"
|
title_line_open = "<(?P<outer2>p)[^>]*>\s*"
|
||||||
title_line_close = "\s*</(?P=outer2)>"
|
title_line_close = "\s*</(?P=outer2)>"
|
||||||
|
|
||||||
@ -374,13 +377,15 @@ class HeuristicProcessor(object):
|
|||||||
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
||||||
# Delete microsoft 'smart' tags
|
# Delete microsoft 'smart' tags
|
||||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
html = re.sub('(?i)</?st1:\w+>', '', html)
|
||||||
# Delete self closing paragraph tags
|
# Re-open self closing paragraph tags
|
||||||
html = re.sub('<p\s?/>', '', html)
|
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
||||||
# Get rid of empty span, bold, font, em, & italics tags
|
# Get rid of empty span, bold, font, em, & italics tags
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||||
|
# Empty heading tags
|
||||||
|
html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
|
||||||
self.deleted_nbsps = True
|
self.deleted_nbsps = True
|
||||||
return html
|
return html
|
||||||
|
|
||||||
@ -419,32 +424,98 @@ class HeuristicProcessor(object):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def detect_blank_formatting(self, html):
|
def merge_blanks(self, html, blanks_count=None):
|
||||||
blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
|
base_em = .5 # Baseline is 1.5em per blank line, 1st line is .5 em css and 1em for the nbsp
|
||||||
blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
|
em_per_line = 1.5 # Add another 1.5 em for each additional blank
|
||||||
|
|
||||||
def markup_spacers(match):
|
def merge_matches(match):
|
||||||
blanks = match.group(0)
|
to_merge = match.group(0)
|
||||||
blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
|
lines = float(len(self.single_blank.findall(to_merge))) - 1.
|
||||||
return blanks
|
em = base_em + (em_per_line * lines)
|
||||||
html = blanks_before_headings.sub(markup_spacers, html)
|
if to_merge.find('whitespace'):
|
||||||
html = blanks_after_headings.sub(markup_spacers, html)
|
newline = self.any_multi_blank.sub('\n<p class="whitespace'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
|
||||||
|
else:
|
||||||
|
newline = self.any_multi_blank.sub('\n<p class="softbreak'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
|
||||||
|
return newline
|
||||||
|
|
||||||
|
html = self.any_multi_blank.sub(merge_matches, html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
def detect_whitespace(self, html):
|
||||||
|
blanks_around_headings = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
||||||
|
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
||||||
|
|
||||||
|
def merge_header_whitespace(match):
|
||||||
|
initblanks = match.group('initparas')
|
||||||
|
endblanks = match.group('initparas')
|
||||||
|
heading = match.group('heading')
|
||||||
|
top_margin = ''
|
||||||
|
bottom_margin = ''
|
||||||
|
if initblanks is not None:
|
||||||
|
top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
|
||||||
|
if endblanks is not None:
|
||||||
|
bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(initblanks)))+'em;'
|
||||||
|
|
||||||
|
if initblanks == None and endblanks == None:
|
||||||
|
return heading
|
||||||
|
else:
|
||||||
|
heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
|
||||||
|
return heading
|
||||||
|
|
||||||
|
html = blanks_around_headings.sub(merge_header_whitespace, html)
|
||||||
|
|
||||||
|
def markup_whitespaces(match):
|
||||||
|
blanks = match.group(0)
|
||||||
|
blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:0em; margin-bottom:0em"> </p>', blanks)
|
||||||
|
return blanks
|
||||||
|
|
||||||
|
html = blanks_n_nopunct.sub(markup_whitespaces, html)
|
||||||
if self.html_preprocess_sections > self.min_chapters:
|
if self.html_preprocess_sections > self.min_chapters:
|
||||||
html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
|
html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def detect_soft_breaks(self, html):
|
def detect_soft_breaks(self, html):
|
||||||
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
||||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
|
||||||
else:
|
else:
|
||||||
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
def markup_user_break(self, replacement_break):
|
||||||
|
'''
|
||||||
|
Takes string a user supplies and wraps it in markup that will be centered with
|
||||||
|
appropriate margins. <hr> and <img> tags are allowed. If the user specifies
|
||||||
|
a style with width attributes in the <hr> tag then the appropriate margins are
|
||||||
|
applied to wrapping divs. This is because many ebook devices don't support margin:auto
|
||||||
|
All other html is converted to text.
|
||||||
|
'''
|
||||||
|
hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
|
||||||
|
if re.findall('(<|>)', replacement_break):
|
||||||
|
if re.match('^<hr', replacement_break):
|
||||||
|
if replacement_break.find('width') != -1:
|
||||||
|
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||||
|
divpercent = (100 - width) / 2
|
||||||
|
hr_open = re.sub('45', str(divpercent), hr_open)
|
||||||
|
scene_break = hr_open+replacement_break+'</div>'
|
||||||
|
else:
|
||||||
|
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||||
|
elif re.match('^<img', replacement_break):
|
||||||
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
|
else:
|
||||||
|
from calibre.utils.html2text import html2text
|
||||||
|
replacement_break = html2text(replacement_break)
|
||||||
|
replacement_break = re.sub('\s', ' ', replacement_break)
|
||||||
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
|
else:
|
||||||
|
replacement_break = re.sub('\s', ' ', replacement_break)
|
||||||
|
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||||
|
|
||||||
|
return scene_break
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, html):
|
def __call__(self, html):
|
||||||
self.log.debug("********* Heuristic processing HTML *********")
|
self.log.debug("********* Heuristic processing HTML *********")
|
||||||
|
|
||||||
# Count the words in the document to estimate how many chapters to look for and whether
|
# Count the words in the document to estimate how many chapters to look for and whether
|
||||||
# other types of processing are attempted
|
# other types of processing are attempted
|
||||||
try:
|
try:
|
||||||
@ -458,7 +529,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
||||||
html = self.arrange_htm_line_endings(html)
|
html = self.arrange_htm_line_endings(html)
|
||||||
|
#self.dump(html, 'after_arrange_line_endings')
|
||||||
if self.cleanup_required():
|
if self.cleanup_required():
|
||||||
###### Check Markup ######
|
###### Check Markup ######
|
||||||
#
|
#
|
||||||
@ -478,6 +549,11 @@ class HeuristicProcessor(object):
|
|||||||
# fix indents must run before this step, as it removes non-breaking spaces
|
# fix indents must run before this step, as it removes non-breaking spaces
|
||||||
html = self.cleanup_markup(html)
|
html = self.cleanup_markup(html)
|
||||||
|
|
||||||
|
is_pdftohtml = self.is_pdftohtml(html)
|
||||||
|
if is_pdftohtml:
|
||||||
|
self.line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
|
||||||
|
self.line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
|
||||||
|
|
||||||
# ADE doesn't render <br />, change to empty paragraphs
|
# ADE doesn't render <br />, change to empty paragraphs
|
||||||
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
|
||||||
|
|
||||||
@ -489,6 +565,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
|
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
|
||||||
|
#self.dump(html, 'after_chapter_markup')
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
||||||
html = self.markup_italicis(html)
|
html = self.markup_italicis(html)
|
||||||
@ -498,7 +575,7 @@ class HeuristicProcessor(object):
|
|||||||
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
|
||||||
self.log.debug("deleting blank lines")
|
self.log.debug("deleting blank lines")
|
||||||
self.blanks_deleted = True
|
self.blanks_deleted = True
|
||||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
|
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
|
||||||
html = self.blankreg.sub('', html)
|
html = self.blankreg.sub('', html)
|
||||||
|
|
||||||
# Determine line ending type
|
# Determine line ending type
|
||||||
@ -539,7 +616,7 @@ class HeuristicProcessor(object):
|
|||||||
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
self.log.debug("Looking for more split points based on punctuation,"
|
self.log.debug("Looking for more split points based on punctuation,"
|
||||||
" currently have " + unicode(self.html_preprocess_sections))
|
" currently have " + unicode(self.html_preprocess_sections))
|
||||||
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
|
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
|
||||||
html = chapdetect3.sub(self.chapter_break, html)
|
html = chapdetect3.sub(self.chapter_break, html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'renumber_headings', False):
|
if getattr(self.extra_opts, 'renumber_headings', False):
|
||||||
@ -549,14 +626,32 @@ class HeuristicProcessor(object):
|
|||||||
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||||
|
|
||||||
|
# If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
|
||||||
|
# style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks.
|
||||||
|
# Multiple sequential blank paragraphs are merged with appropriate margins
|
||||||
|
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
|
||||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||||
html = self.detect_blank_formatting(html)
|
html = self.detect_whitespace(html)
|
||||||
html = self.detect_soft_breaks(html)
|
html = self.detect_soft_breaks(html)
|
||||||
# Center separator lines
|
blanks_count = len(self.any_multi_blank.findall(html))
|
||||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
|
if blanks_count >= 1:
|
||||||
#html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
|
html = self.merge_blanks(html, blanks_count)
|
||||||
|
scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
|
||||||
|
scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
||||||
|
# If the user has enabled scene break replacement, then either softbreaks
|
||||||
|
# or 'hard' scene breaks are replaced, depending on which is in use
|
||||||
|
# Otherwise separator lines are centered, use a bit larger margin in this case
|
||||||
|
replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
|
||||||
|
if replacement_break is not None:
|
||||||
|
replacement_break = self.markup_user_break(replacement_break)
|
||||||
|
if len(scene_break.findall(html)) >= 1:
|
||||||
|
html = scene_break.sub(replacement_break, html)
|
||||||
|
else:
|
||||||
|
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
|
||||||
|
else:
|
||||||
|
html = scene_break.sub(self.scene_break_open+'\g<break>'+'</p>', html)
|
||||||
|
|
||||||
if self.deleted_nbsps:
|
if self.deleted_nbsps:
|
||||||
# put back non-breaking spaces in empty paragraphs to preserve original formatting
|
# put back non-breaking spaces in empty paragraphs so they render correctly
|
||||||
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||||
return html
|
return html
|
||||||
|
@ -103,6 +103,8 @@ class EXTHHeader(object):
|
|||||||
pass
|
pass
|
||||||
elif id == 108:
|
elif id == 108:
|
||||||
pass # Producer
|
pass # Producer
|
||||||
|
elif id == 113:
|
||||||
|
pass # ASIN or UUID
|
||||||
#else:
|
#else:
|
||||||
# print 'unhandled metadata record', id, repr(content)
|
# print 'unhandled metadata record', id, repr(content)
|
||||||
|
|
||||||
|
@ -1547,6 +1547,31 @@ class MobiWriter(object):
|
|||||||
rights = 'Unknown'
|
rights = 'Unknown'
|
||||||
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||||
exth.write(rights)
|
exth.write(rights)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
# Write UUID as ASIN
|
||||||
|
uuid = None
|
||||||
|
from calibre.ebooks.oeb.base import OPF
|
||||||
|
for x in oeb.metadata['identifier']:
|
||||||
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||||
|
uuid = unicode(x).split(':')[-1]
|
||||||
|
break
|
||||||
|
if uuid is None:
|
||||||
|
from uuid import uuid4
|
||||||
|
uuid = str(uuid4())
|
||||||
|
|
||||||
|
if isinstance(uuid, unicode):
|
||||||
|
uuid = uuid.encode('utf-8')
|
||||||
|
exth.write(pack('>II', 113, len(uuid) + 8))
|
||||||
|
exth.write(uuid)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
# Write cdetype
|
||||||
|
if not self.opts.mobi_periodical:
|
||||||
|
data = 'EBOK'
|
||||||
|
exth.write(pack('>II', 501, len(data)+8))
|
||||||
|
exth.write(data)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
# Add a publication date entry
|
# Add a publication date entry
|
||||||
if oeb.metadata['date'] != [] :
|
if oeb.metadata['date'] != [] :
|
||||||
|
357
src/calibre/gui2/complete.py
Normal file
357
src/calibre/gui2/complete.py
Normal file
@ -0,0 +1,357 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
|
||||||
|
QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
|
||||||
|
QStyle, QEvent, pyqtSignal
|
||||||
|
|
||||||
|
from calibre.utils.icu import sort_key, lower
|
||||||
|
from calibre.gui2 import NONE
|
||||||
|
from calibre.gui2.widgets import EnComboBox
|
||||||
|
|
||||||
|
class CompleterItemDelegate(QItemDelegate): # {{{
|
||||||
|
|
||||||
|
''' Renders the current item as thought it were selected '''
|
||||||
|
|
||||||
|
def __init__(self, view):
|
||||||
|
self.view = view
|
||||||
|
QItemDelegate.__init__(self, view)
|
||||||
|
|
||||||
|
def paint(self, p, opt, idx):
|
||||||
|
opt = QStyleOptionViewItem(opt)
|
||||||
|
opt.showDecorationSelected = True
|
||||||
|
if self.view.currentIndex() == idx:
|
||||||
|
opt.state |= QStyle.State_HasFocus
|
||||||
|
QItemDelegate.paint(self, p, opt, idx)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class CompleteWindow(QListView): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
The completion popup. For keyboard and mouse handling see
|
||||||
|
:meth:`eventFilter`.
|
||||||
|
'''
|
||||||
|
|
||||||
|
#: This signal is emitted when the user selects one of the listed
|
||||||
|
#: completions, by mouse or keyboard
|
||||||
|
completion_selected = pyqtSignal(object)
|
||||||
|
|
||||||
|
def __init__(self, widget, model):
|
||||||
|
self.widget = widget
|
||||||
|
QListView.__init__(self)
|
||||||
|
self.setVisible(False)
|
||||||
|
self.setParent(None, Qt.Popup)
|
||||||
|
self.setAlternatingRowColors(True)
|
||||||
|
self.setFocusPolicy(Qt.NoFocus)
|
||||||
|
self._d = CompleterItemDelegate(self)
|
||||||
|
self.setItemDelegate(self._d)
|
||||||
|
self.setModel(model)
|
||||||
|
self.setFocusProxy(widget)
|
||||||
|
self.installEventFilter(self)
|
||||||
|
self.clicked.connect(self.do_selected)
|
||||||
|
self.entered.connect(self.do_entered)
|
||||||
|
self.setMouseTracking(True)
|
||||||
|
|
||||||
|
def do_entered(self, idx):
|
||||||
|
if idx.isValid():
|
||||||
|
self.setCurrentIndex(idx)
|
||||||
|
|
||||||
|
def do_selected(self, idx=None):
|
||||||
|
idx = self.currentIndex() if idx is None else idx
|
||||||
|
if not idx.isValid() and self.model().rowCount() > 0:
|
||||||
|
idx = self.model().index(0)
|
||||||
|
if idx.isValid():
|
||||||
|
data = unicode(self.model().data(idx, Qt.DisplayRole))
|
||||||
|
self.completion_selected.emit(data)
|
||||||
|
self.hide()
|
||||||
|
|
||||||
|
def eventFilter(self, o, e):
|
||||||
|
if o is not self:
|
||||||
|
return False
|
||||||
|
if e.type() == e.KeyPress:
|
||||||
|
key = e.key()
|
||||||
|
if key in (Qt.Key_Escape, Qt.Key_Backtab) or \
|
||||||
|
(key == Qt.Key_F4 and (e.modifiers() & Qt.AltModifier)):
|
||||||
|
self.hide()
|
||||||
|
return True
|
||||||
|
elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
|
||||||
|
self.do_selected()
|
||||||
|
return True
|
||||||
|
elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,
|
||||||
|
Qt.Key_PageDown):
|
||||||
|
return False
|
||||||
|
# Send key event to associated line edit
|
||||||
|
self.widget.eat_focus_out = False
|
||||||
|
try:
|
||||||
|
self.widget.event(e)
|
||||||
|
finally:
|
||||||
|
self.widget.eat_focus_out = True
|
||||||
|
if not self.widget.hasFocus():
|
||||||
|
# Line edit lost focus
|
||||||
|
self.hide()
|
||||||
|
if e.isAccepted():
|
||||||
|
# Line edit consumed event
|
||||||
|
return True
|
||||||
|
elif e.type() == e.MouseButtonPress:
|
||||||
|
# Hide popup if user clicks outside it, otherwise
|
||||||
|
# pass event to popup
|
||||||
|
if not self.underMouse():
|
||||||
|
self.hide()
|
||||||
|
return True
|
||||||
|
elif e.type() in (e.InputMethod, e.ShortcutOverride):
|
||||||
|
QApplication.sendEvent(self.widget, e)
|
||||||
|
|
||||||
|
return False # Do not filter this event
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class CompleteModel(QAbstractListModel):
|
||||||
|
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
QAbstractListModel.__init__(self, parent)
|
||||||
|
self.sep = ','
|
||||||
|
self.space_before_sep = False
|
||||||
|
self.items = []
|
||||||
|
self.lowered_items = []
|
||||||
|
self.matches = []
|
||||||
|
|
||||||
|
def set_items(self, items):
|
||||||
|
items = [unicode(x.strip()) for x in items]
|
||||||
|
self.items = list(sorted(items, key=lambda x: sort_key(x)))
|
||||||
|
self.lowered_items = [lower(x) for x in self.items]
|
||||||
|
self.matches = []
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def rowCount(self, *args):
|
||||||
|
return len(self.matches)
|
||||||
|
|
||||||
|
def data(self, index, role):
|
||||||
|
if role == Qt.DisplayRole:
|
||||||
|
r = index.row()
|
||||||
|
try:
|
||||||
|
return self.matches[r]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
return NONE
|
||||||
|
|
||||||
|
def get_matches(self, prefix):
|
||||||
|
'''
|
||||||
|
Return all matches that (case insensitively) start with prefix
|
||||||
|
'''
|
||||||
|
prefix = lower(prefix)
|
||||||
|
ans = []
|
||||||
|
if prefix:
|
||||||
|
for i, test in enumerate(self.lowered_items):
|
||||||
|
if test.startswith(prefix):
|
||||||
|
ans.append(self.items[i])
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def update_matches(self, matches):
|
||||||
|
self.matches = matches
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
class MultiCompleteLineEdit(QLineEdit):
|
||||||
|
'''
|
||||||
|
A line edit that completes on multiple items separated by a
|
||||||
|
separator. Use the :meth:`update_items_cache` to set the list of
|
||||||
|
all possible completions. Separator can be controlled with the
|
||||||
|
:meth:`set_separator` and :meth:`set_space_before_sep` methods.
|
||||||
|
|
||||||
|
A call to self.set_separator(None) will allow this widget to be used
|
||||||
|
to complete non multiple fields as well.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
self.eat_focus_out = True
|
||||||
|
self.max_visible_items = 7
|
||||||
|
self.current_prefix = None
|
||||||
|
QLineEdit.__init__(self, parent)
|
||||||
|
|
||||||
|
self._model = CompleteModel(parent=self)
|
||||||
|
self.complete_window = CompleteWindow(self, self._model)
|
||||||
|
self.textChanged.connect(self.text_changed)
|
||||||
|
self.cursorPositionChanged.connect(self.cursor_position_changed)
|
||||||
|
self.complete_window.completion_selected.connect(self.completion_selected)
|
||||||
|
|
||||||
|
# Interface {{{
|
||||||
|
def update_items_cache(self, complete_items):
|
||||||
|
self.all_items = complete_items
|
||||||
|
|
||||||
|
def set_separator(self, sep):
|
||||||
|
self.sep = sep
|
||||||
|
|
||||||
|
def set_space_before_sep(self, space_before):
|
||||||
|
self.space_before_sep = space_before
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def eventFilter(self, o, e):
|
||||||
|
if self.eat_focus_out and o is self and e.type() == QEvent.FocusOut:
|
||||||
|
if self.complete_window.isVisible():
|
||||||
|
return True # Filter this event since the cw is visible
|
||||||
|
return QLineEdit.eventFilter(self, o, e)
|
||||||
|
|
||||||
|
|
||||||
|
def text_changed(self, *args):
|
||||||
|
self.update_completions()
|
||||||
|
|
||||||
|
def cursor_position_changed(self, *args):
|
||||||
|
self.update_completions()
|
||||||
|
|
||||||
|
def update_completions(self):
|
||||||
|
' Update the list of completions '
|
||||||
|
cpos = self.cursorPosition()
|
||||||
|
text = unicode(self.text())
|
||||||
|
prefix = text[:cpos]
|
||||||
|
self.current_prefix = prefix
|
||||||
|
complete_prefix = prefix.lstrip()
|
||||||
|
if self.sep:
|
||||||
|
complete_prefix = prefix = prefix.split(self.sep)[-1].lstrip()
|
||||||
|
|
||||||
|
matches = self._model.get_matches(complete_prefix)
|
||||||
|
self.update_complete_window(matches)
|
||||||
|
|
||||||
|
def get_completed_text(self, text):
|
||||||
|
'''
|
||||||
|
Get completed text from current cursor position and the completion
|
||||||
|
text
|
||||||
|
'''
|
||||||
|
if self.sep is None:
|
||||||
|
return text
|
||||||
|
else:
|
||||||
|
cursor_pos = self.cursorPosition()
|
||||||
|
before_text = unicode(self.text())[:cursor_pos]
|
||||||
|
after_text = unicode(self.text())[cursor_pos:]
|
||||||
|
after_parts = after_text.split(self.sep)
|
||||||
|
if len(after_parts) < 3 and not after_parts[-1].strip():
|
||||||
|
after_text = u''
|
||||||
|
prefix_len = len(before_text.split(self.sep)[-1].lstrip())
|
||||||
|
if self.space_before_sep:
|
||||||
|
complete_text_pat = '%s%s %s %s'
|
||||||
|
len_extra = 3
|
||||||
|
else:
|
||||||
|
complete_text_pat = '%s%s%s %s'
|
||||||
|
len_extra = 2
|
||||||
|
return prefix_len, len_extra, complete_text_pat % (
|
||||||
|
before_text[:cursor_pos - prefix_len], text, self.sep, after_text)
|
||||||
|
|
||||||
|
def completion_selected(self, text):
|
||||||
|
prefix_len, len_extra, ctext = self.get_completed_text(text)
|
||||||
|
if self.sep is None:
|
||||||
|
self.setText(ctext)
|
||||||
|
self.setCursorPosition(len(ctext))
|
||||||
|
else:
|
||||||
|
cursor_pos = self.cursorPosition()
|
||||||
|
self.setText(ctext)
|
||||||
|
self.setCursorPosition(cursor_pos - prefix_len + len(text) + len_extra)
|
||||||
|
|
||||||
|
def update_complete_window(self, matches):
|
||||||
|
self._model.update_matches(matches)
|
||||||
|
if matches:
|
||||||
|
self.show_complete_window()
|
||||||
|
else:
|
||||||
|
self.complete_window.hide()
|
||||||
|
|
||||||
|
|
||||||
|
def position_complete_window(self):
|
||||||
|
popup = self.complete_window
|
||||||
|
screen = QApplication.desktop().availableGeometry(self)
|
||||||
|
h = (popup.sizeHintForRow(0) * min(self.max_visible_items,
|
||||||
|
popup.model().rowCount()) + 3) + 3
|
||||||
|
hsb = popup.horizontalScrollBar()
|
||||||
|
if hsb and hsb.isVisible():
|
||||||
|
h += hsb.sizeHint().height()
|
||||||
|
|
||||||
|
rh = self.height()
|
||||||
|
pos = self.mapToGlobal(QPoint(0, self.height() - 2))
|
||||||
|
w = self.width()
|
||||||
|
|
||||||
|
if w > screen.width():
|
||||||
|
w = screen.width()
|
||||||
|
if (pos.x() + w) > (screen.x() + screen.width()):
|
||||||
|
pos.setX(screen.x() + screen.width() - w)
|
||||||
|
if (pos.x() < screen.x()):
|
||||||
|
pos.setX(screen.x())
|
||||||
|
|
||||||
|
top = pos.y() - rh - screen.top() + 2
|
||||||
|
bottom = screen.bottom() - pos.y()
|
||||||
|
h = max(h, popup.minimumHeight())
|
||||||
|
if h > bottom:
|
||||||
|
h = min(max(top, bottom), h)
|
||||||
|
if top > bottom:
|
||||||
|
pos.setY(pos.y() - h - rh + 2)
|
||||||
|
|
||||||
|
popup.setGeometry(pos.x(), pos.y(), w, h)
|
||||||
|
|
||||||
|
|
||||||
|
def show_complete_window(self):
|
||||||
|
self.position_complete_window()
|
||||||
|
self.complete_window.show()
|
||||||
|
|
||||||
|
def moveEvent(self, ev):
|
||||||
|
ret = QLineEdit.moveEvent(self, ev)
|
||||||
|
QTimer.singleShot(0, self.position_complete_window)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def resizeEvent(self, ev):
|
||||||
|
ret = QLineEdit.resizeEvent(self, ev)
|
||||||
|
QTimer.singleShot(0, self.position_complete_window)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def all_items(self):
|
||||||
|
def fget(self):
|
||||||
|
return self._model.items
|
||||||
|
def fset(self, items):
|
||||||
|
self._model.set_items(items)
|
||||||
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def sep(self):
|
||||||
|
def fget(self):
|
||||||
|
return self._model.sep
|
||||||
|
def fset(self, val):
|
||||||
|
self._model.sep = val
|
||||||
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def space_before_sep(self):
|
||||||
|
def fget(self):
|
||||||
|
return self._model.space_before_sep
|
||||||
|
def fset(self, val):
|
||||||
|
self._model.space_before_sep = val
|
||||||
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
|
class MultiCompleteComboBox(EnComboBox):
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
EnComboBox.__init__(self, *args)
|
||||||
|
self.setLineEdit(MultiCompleteLineEdit(self))
|
||||||
|
|
||||||
|
def update_items_cache(self, complete_items):
|
||||||
|
self.lineEdit().update_items_cache(complete_items)
|
||||||
|
|
||||||
|
def set_separator(self, sep):
|
||||||
|
self.lineEdit().set_separator(sep)
|
||||||
|
|
||||||
|
def set_space_before_sep(self, space_before):
|
||||||
|
self.lineEdit().set_space_before_sep(space_before)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from PyQt4.Qt import QDialog, QVBoxLayout
|
||||||
|
app = QApplication([])
|
||||||
|
d = QDialog()
|
||||||
|
d.setLayout(QVBoxLayout())
|
||||||
|
le = MultiCompleteLineEdit(d)
|
||||||
|
d.layout().addWidget(le)
|
||||||
|
le.all_items = ['one', 'otwo', 'othree', 'ooone', 'ootwo', 'oothree']
|
||||||
|
d.exec_()
|
@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from PyQt4.Qt import Qt
|
from PyQt4.Qt import Qt
|
||||||
|
|
||||||
|
from calibre.gui2 import gprefs
|
||||||
from calibre.gui2.convert.heuristics_ui import Ui_Form
|
from calibre.gui2.convert.heuristics_ui import Ui_Form
|
||||||
from calibre.gui2.convert import Widget
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
@ -21,17 +22,38 @@ class HeuristicsWidget(Widget, Ui_Form):
|
|||||||
['enable_heuristics', 'markup_chapter_headings',
|
['enable_heuristics', 'markup_chapter_headings',
|
||||||
'italicize_common_cases', 'fix_indents',
|
'italicize_common_cases', 'fix_indents',
|
||||||
'html_unwrap_factor', 'unwrap_lines',
|
'html_unwrap_factor', 'unwrap_lines',
|
||||||
'delete_blank_paragraphs', 'format_scene_breaks',
|
'delete_blank_paragraphs',
|
||||||
|
'format_scene_breaks', 'replace_scene_breaks',
|
||||||
'dehyphenate', 'renumber_headings']
|
'dehyphenate', 'renumber_headings']
|
||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
|
self.rssb_defaults = [u'', u'<hr />', u'* * *', u'• • •', u'✦ ✦ ✦',
|
||||||
|
u'✮ ✮ ✮', u'☆ ☆ ☆', u'❂ ❂ ❂', u'✣ ✣ ✣', u'❖ ❖ ❖', u'☼ ☼ ☼', u'✠ ✠ ✠']
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
self.load_histories()
|
||||||
|
|
||||||
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
|
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
|
||||||
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
|
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
|
||||||
|
|
||||||
self.enable_heuristics(self.opt_enable_heuristics.checkState())
|
self.enable_heuristics(self.opt_enable_heuristics.checkState())
|
||||||
|
|
||||||
|
def restore_defaults(self, get_option):
|
||||||
|
Widget.restore_defaults(self, get_option)
|
||||||
|
|
||||||
|
self.save_histories()
|
||||||
|
rssb_hist = gprefs['replace_scene_breaks_history']
|
||||||
|
for x in self.rssb_defaults:
|
||||||
|
if x in rssb_hist:
|
||||||
|
del rssb_hist[rssb_hist.index(x)]
|
||||||
|
gprefs['replace_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_scene_breaks_history']
|
||||||
|
self.load_histories()
|
||||||
|
|
||||||
|
def commit_options(self, save_defaults=False):
|
||||||
|
self.save_histories()
|
||||||
|
|
||||||
|
return Widget.commit_options(self, save_defaults)
|
||||||
|
|
||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
Widget.break_cycles(self)
|
Widget.break_cycles(self)
|
||||||
|
|
||||||
@ -45,6 +67,33 @@ class HeuristicsWidget(Widget, Ui_Form):
|
|||||||
if val is None and g is self.opt_html_unwrap_factor:
|
if val is None and g is self.opt_html_unwrap_factor:
|
||||||
g.setValue(0.0)
|
g.setValue(0.0)
|
||||||
return True
|
return True
|
||||||
|
if not val and g is self.opt_replace_scene_breaks:
|
||||||
|
g.lineEdit().setText('')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def load_histories(self):
|
||||||
|
self.opt_replace_scene_breaks.clear()
|
||||||
|
self.opt_replace_scene_breaks.lineEdit().setText('')
|
||||||
|
|
||||||
|
val = unicode(self.opt_replace_scene_breaks.currentText())
|
||||||
|
rssb_hist = gprefs.get('replace_scene_breaks_history', self.rssb_defaults)
|
||||||
|
if val in rssb_hist:
|
||||||
|
del rssb_hist[rssb_hist.index(val)]
|
||||||
|
rssb_hist.insert(0, val)
|
||||||
|
for v in rssb_hist:
|
||||||
|
# Ensure we don't have duplicate items.
|
||||||
|
if self.opt_replace_scene_breaks.findText(v) == -1:
|
||||||
|
self.opt_replace_scene_breaks.addItem(v)
|
||||||
|
self.opt_replace_scene_breaks.setCurrentIndex(0)
|
||||||
|
|
||||||
|
def save_histories(self):
|
||||||
|
rssb_history = []
|
||||||
|
history_pats = [unicode(self.opt_replace_scene_breaks.lineEdit().text())] + [unicode(self.opt_replace_scene_breaks.itemText(i)) for i in xrange(self.opt_replace_scene_breaks.count())]
|
||||||
|
for p in history_pats[:10]:
|
||||||
|
# Ensure we don't have duplicate items.
|
||||||
|
if p not in rssb_history:
|
||||||
|
rssb_history.append(p)
|
||||||
|
gprefs['replace_scene_breaks_history'] = rssb_history
|
||||||
|
|
||||||
def enable_heuristics(self, state):
|
def enable_heuristics(self, state):
|
||||||
state = state == Qt.Checked
|
state = state == Qt.Checked
|
||||||
|
@ -150,6 +150,45 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||||
|
<property name="sizeConstraint">
|
||||||
|
<enum>QLayout::SetDefaultConstraint</enum>
|
||||||
|
</property>
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="label_2">
|
||||||
|
<property name="sizePolicy">
|
||||||
|
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||||
|
<horstretch>0</horstretch>
|
||||||
|
<verstretch>0</verstretch>
|
||||||
|
</sizepolicy>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Replace soft scene &breaks:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_replace_scene_breaks</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QComboBox" name="opt_replace_scene_breaks">
|
||||||
|
<property name="sizePolicy">
|
||||||
|
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||||
|
<horstretch>0</horstretch>
|
||||||
|
<verstretch>0</verstretch>
|
||||||
|
</sizepolicy>
|
||||||
|
</property>
|
||||||
|
<property name="editable">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<property name="insertPolicy">
|
||||||
|
<enum>QComboBox::InsertAtTop</enum>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_dehyphenate">
|
<widget class="QCheckBox" name="opt_dehyphenate">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
|
@ -264,8 +264,9 @@ class EmailMixin(object): # {{{
|
|||||||
if _auto_ids != []:
|
if _auto_ids != []:
|
||||||
for id in _auto_ids:
|
for id in _auto_ids:
|
||||||
if specific_format == None:
|
if specific_format == None:
|
||||||
formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')]
|
dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
|
||||||
formats = formats if formats != None else []
|
formats = [f.lower() for f in (dbfmts.split(',') if fmts else
|
||||||
|
[])]
|
||||||
if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []:
|
if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []:
|
||||||
auto.append(id)
|
auto.append(id)
|
||||||
else:
|
else:
|
||||||
|
@ -12,8 +12,8 @@ from PyQt4.Qt import Qt, QDateEdit, QDate, \
|
|||||||
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
|
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
|
||||||
QPushButton, QSpinBox, QLineEdit
|
QPushButton, QSpinBox, QLineEdit
|
||||||
|
|
||||||
from calibre.gui2.widgets import EnLineEdit, CompleteComboBox, \
|
from calibre.gui2.widgets import EnLineEdit, EnComboBox, FormatList, ImageView
|
||||||
EnComboBox, FormatList, ImageView, CompleteLineEdit
|
from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.utils.config import tweaks, prefs
|
from calibre.utils.config import tweaks, prefs
|
||||||
from calibre.ebooks.metadata import title_sort, authors_to_string, \
|
from calibre.ebooks.metadata import title_sort, authors_to_string, \
|
||||||
@ -149,14 +149,14 @@ class TitleSortEdit(TitleEdit):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Authors {{{
|
# Authors {{{
|
||||||
class AuthorsEdit(CompleteComboBox):
|
class AuthorsEdit(MultiCompleteComboBox):
|
||||||
|
|
||||||
TOOLTIP = ''
|
TOOLTIP = ''
|
||||||
LABEL = _('&Author(s):')
|
LABEL = _('&Author(s):')
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent):
|
||||||
self.dialog = parent
|
self.dialog = parent
|
||||||
CompleteComboBox.__init__(self, parent)
|
MultiCompleteComboBox.__init__(self, parent)
|
||||||
self.setToolTip(self.TOOLTIP)
|
self.setToolTip(self.TOOLTIP)
|
||||||
self.setWhatsThis(self.TOOLTIP)
|
self.setWhatsThis(self.TOOLTIP)
|
||||||
self.setEditable(True)
|
self.setEditable(True)
|
||||||
@ -814,14 +814,14 @@ class RatingEdit(QSpinBox): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class TagsEdit(CompleteLineEdit): # {{{
|
class TagsEdit(MultiCompleteLineEdit): # {{{
|
||||||
LABEL = _('Ta&gs:')
|
LABEL = _('Ta&gs:')
|
||||||
TOOLTIP = '<p>'+_('Tags categorize the book. This is particularly '
|
TOOLTIP = '<p>'+_('Tags categorize the book. This is particularly '
|
||||||
'useful while searching. <br><br>They can be any words'
|
'useful while searching. <br><br>They can be any words'
|
||||||
'or phrases, separated by commas.')
|
'or phrases, separated by commas.')
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent):
|
||||||
CompleteLineEdit.__init__(self, parent)
|
MultiCompleteLineEdit.__init__(self, parent)
|
||||||
self.setToolTip(self.TOOLTIP)
|
self.setToolTip(self.TOOLTIP)
|
||||||
self.setWhatsThis(self.TOOLTIP)
|
self.setWhatsThis(self.TOOLTIP)
|
||||||
|
|
||||||
@ -839,7 +839,7 @@ class TagsEdit(CompleteLineEdit): # {{{
|
|||||||
tags = db.tags(id_, index_is_id=True)
|
tags = db.tags(id_, index_is_id=True)
|
||||||
tags = tags.split(',') if tags else []
|
tags = tags.split(',') if tags else []
|
||||||
self.current_val = tags
|
self.current_val = tags
|
||||||
self.update_items_cache(db.all_tags())
|
self.all_items = db.all_tags()
|
||||||
self.original_val = self.current_val
|
self.original_val = self.current_val
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -860,7 +860,7 @@ class TagsEdit(CompleteLineEdit): # {{{
|
|||||||
d = TagEditor(self, db, id_)
|
d = TagEditor(self, db, id_)
|
||||||
if d.exec_() == TagEditor.Accepted:
|
if d.exec_() == TagEditor.Accepted:
|
||||||
self.current_val = d.tags
|
self.current_val = d.tags
|
||||||
self.update_items_cache(db.all_tags())
|
self.all_items = db.all_tags()
|
||||||
|
|
||||||
|
|
||||||
def commit(self, db, id_):
|
def commit(self, db, id_):
|
||||||
|
@ -430,8 +430,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
authors = self.authors(id, index_is_id=True)
|
authors = self.authors(id, index_is_id=True)
|
||||||
if not authors:
|
if not authors:
|
||||||
authors = _('Unknown')
|
authors = _('Unknown')
|
||||||
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
|
||||||
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
title = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
|
||||||
path = author + '/' + title + ' (%d)'%id
|
path = author + '/' + title + ' (%d)'%id
|
||||||
return path
|
return path
|
||||||
|
|
||||||
@ -442,8 +442,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
authors = self.authors(id, index_is_id=True)
|
authors = self.authors(id, index_is_id=True)
|
||||||
if not authors:
|
if not authors:
|
||||||
authors = _('Unknown')
|
authors = _('Unknown')
|
||||||
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
|
||||||
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
title = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
|
||||||
name = title + ' - ' + author
|
name = title + ' - ' + author
|
||||||
while name.endswith('.'):
|
while name.endswith('.'):
|
||||||
name = name[:-1]
|
name = name[:-1]
|
||||||
|
@ -311,9 +311,14 @@ remove all non-breaking-space entities, or may include false positive matches re
|
|||||||
|
|
||||||
:guilabel:`Ensure scene breaks are consistently formatted`
|
:guilabel:`Ensure scene breaks are consistently formatted`
|
||||||
With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.
|
With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.
|
||||||
It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the
|
'Soft' scene break markers, i.e. scene breaks only defined by extra white space, are styled to ensure that they
|
||||||
page width. Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and
|
will not be displayed in conjunction with page breaks.
|
||||||
thus become difficult to distinguish.
|
|
||||||
|
:guilabel:`Replace scene breaks`
|
||||||
|
If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
|
||||||
|
user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup. <hr />
|
||||||
|
tags, i.e. horizontal rules, are an exception. These can optionally be specified with styles, if you choose to add your own
|
||||||
|
style be sure to include the 'width' setting, otherwise the style information will be discarded.
|
||||||
|
|
||||||
:guilabel:`Remove unnecessary hyphens`
|
:guilabel:`Remove unnecessary hyphens`
|
||||||
|app| will analyze all hyphenated content in the document when this option is enabled. The document itself is used
|
|app| will analyze all hyphenated content in the document when this option is enabled. The document itself is used
|
||||||
@ -628,7 +633,7 @@ between 0 and 1. The default is 0.45, just under the median line length. Lower t
|
|||||||
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
|
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
|
||||||
|
|
||||||
Also, they often have headers and footers as part of the document that will become included with the text.
|
Also, they often have headers and footers as part of the document that will become included with the text.
|
||||||
Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
|
Use the Search and Replace panel to remove headers and footers to mitigate this issue. If the headers and footers are not
|
||||||
removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read
|
removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read
|
||||||
:ref:`regexptutorial`.
|
:ref:`regexptutorial`.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user