Merge from trunk

This commit is contained in:
Charles Haley 2011-02-02 09:03:50 +00:00
commit e1c90d5be0
16 changed files with 641 additions and 59 deletions

View File

@ -585,7 +585,6 @@ application/vnd.osa.netdeploy
application/vnd.osgi.bundle application/vnd.osgi.bundle
application/vnd.osgi.dp dp application/vnd.osgi.dp dp
application/vnd.otps.ct-kip+xml application/vnd.otps.ct-kip+xml
application/vnd.palm oprc pdb pqa
application/vnd.paos.xml application/vnd.paos.xml
application/vnd.pg.format str application/vnd.pg.format str
application/vnd.pg.osasli ei6 application/vnd.pg.osasli ei6
@ -1082,7 +1081,6 @@ chemical/x-ncbi-asn1 asn
chemical/x-ncbi-asn1-ascii ent prt chemical/x-ncbi-asn1-ascii ent prt
chemical/x-ncbi-asn1-binary aso val chemical/x-ncbi-asn1-binary aso val
chemical/x-ncbi-asn1-spec asn chemical/x-ncbi-asn1-spec asn
chemical/x-pdb ent pdb
chemical/x-rosdal ros chemical/x-rosdal ros
chemical/x-swissprot sw chemical/x-swissprot sw
chemical/x-vamas-iso14976 vms chemical/x-vamas-iso14976 vms
@ -1379,3 +1377,5 @@ application/x-cbr cbr
application/x-cb7 cb7 application/x-cb7 cb7
application/x-koboreader-ebook kobo application/x-koboreader-ebook kobo
image/wmf wmf image/wmf wmf
application/ereader pdb

View File

@ -10,7 +10,10 @@ class WallStreetJournal(BasicNewsRecipe):
title = 'Wall Street Journal (free)' title = 'Wall Street Journal (free)'
__author__ = 'Kovid Goyal, Sujata Raman, Joshua Oster-Morris, Starson17' __author__ = 'Kovid Goyal, Sujata Raman, Joshua Oster-Morris, Starson17'
description = 'News and current affairs' description = '''News and current affairs. This recipe only fetches complete
versions of the articles that are available free on the wsj.com website.
To get the rest of the articles, subscribe to the WSJ and use the other WSJ
recipe.'''
language = 'en' language = 'en'
cover_url = 'http://dealbreaker.com/images/thumbs/Wall%20Street%20Journal%20A1.JPG' cover_url = 'http://dealbreaker.com/images/thumbs/Wall%20Street%20Journal%20A1.JPG'
max_articles_per_feed = 1000 max_articles_per_feed = 1000
@ -151,6 +154,4 @@ class WallStreetJournal(BasicNewsRecipe):
return articles return articles
def cleanup(self):
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')

View File

@ -22,13 +22,15 @@ Run an embedded python interpreter.
parser.add_option('-d', '--debug-device-driver', default=False, action='store_true', parser.add_option('-d', '--debug-device-driver', default=False, action='store_true',
help='Debug the specified device driver.') help='Debug the specified device driver.')
parser.add_option('-g', '--gui', default=False, action='store_true', parser.add_option('-g', '--gui', default=False, action='store_true',
help='Run the GUI',) help='Run the GUI with debugging enabled. Debug output is '
'printed to stdout and stderr.')
parser.add_option('--gui-debug', default=None, parser.add_option('--gui-debug', default=None,
help='Run the GUI with a debug console, logging to the' help='Run the GUI with a debug console, logging to the'
' specified path',) ' specified path. For internal use only, use the -g'
' option to run the GUI in debug mode',)
parser.add_option('--show-gui-debug', default=None, parser.add_option('--show-gui-debug', default=None,
help='Display the specified log file.',) help='Display the specified log file. For internal use'
' only.',)
parser.add_option('-w', '--viewer', default=False, action='store_true', parser.add_option('-w', '--viewer', default=False, action='store_true',
help='Run the ebook viewer',) help='Run the ebook viewer',)
parser.add_option('--paths', default=False, action='store_true', parser.add_option('--paths', default=False, action='store_true',

View File

@ -46,7 +46,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
'italicize_common_cases', 'fix_indents', 'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines', 'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks', 'delete_blank_paragraphs', 'format_scene_breaks',
'dehyphenate', 'renumber_headings'] 'dehyphenate', 'renumber_headings',
'replace_scene_breaks']
def print_help(parser, log): def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace') help = parser.format_help().encode(preferred_encoding, 'replace')

View File

@ -531,6 +531,11 @@ OptionRecommendation(name='format_scene_breaks',
'Replace soft scene breaks that use multiple blank lines with' 'Replace soft scene breaks that use multiple blank lines with'
'horizontal rules.')), 'horizontal rules.')),
OptionRecommendation(name='replace_scene_breaks',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Replace scene breaks with the specified text. By default, the '
'text from the input document is used.')),
OptionRecommendation(name='dehyphenate', OptionRecommendation(name='dehyphenate',
recommended_value=True, level=OptionRecommendation.LOW, recommended_value=True, level=OptionRecommendation.LOW,
help=_('Analyze hyphenated words throughout the document. The ' help=_('Analyze hyphenated words throughout the document. The '

View File

@ -26,9 +26,14 @@ class HeuristicProcessor(object):
self.blanks_deleted = False self.blanks_deleted = False
self.blanks_between_paragraphs = False self.blanks_between_paragraphs = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
def is_pdftohtml(self, src): def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000] return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@ -187,19 +192,17 @@ class HeuristicProcessor(object):
# Build the Regular Expressions in pieces # Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))" init_lookahead = "(?=<(p|div))"
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" chapter_line_open = self.line_open
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
chapter_header_open = r"(?P<chap>" chapter_header_open = r"(?P<chap>"
title_header_open = r"(?P<title>" title_header_open = r"(?P<title>"
chapter_header_close = ")\s*" chapter_header_close = ")\s*"
title_header_close = ")" title_header_close = ")"
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>" chapter_line_close = self.line_close
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>" title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
is_pdftohtml = self.is_pdftohtml(html) is_pdftohtml = self.is_pdftohtml(html)
if is_pdftohtml: if is_pdftohtml:
chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
title_line_open = "<(?P<outer2>p)[^>]*>\s*" title_line_open = "<(?P<outer2>p)[^>]*>\s*"
title_line_close = "\s*</(?P=outer2)>" title_line_close = "\s*</(?P=outer2)>"
@ -374,13 +377,15 @@ class HeuristicProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\w+>', '', html) html = re.sub('(?i)</?st1:\w+>', '', html)
# Delete self closing paragraph tags # Re-open self closing paragraph tags
html = re.sub('<p\s?/>', '', html) html = re.sub('<p[^>/]*/>', '<p> </p>', html)
# Get rid of empty span, bold, font, em, & italics tags # Get rid of empty span, bold, font, em, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
# Empty heading tags
html = re.sub(r'(?i)<h\d+>\s*</h\d+>', '', html)
self.deleted_nbsps = True self.deleted_nbsps = True
return html return html
@ -419,32 +424,98 @@ class HeuristicProcessor(object):
return True return True
return False return False
def detect_blank_formatting(self, html): def merge_blanks(self, html, blanks_count=None):
blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE) base_em = .5 # Baseline is 1.5em per blank line, 1st line is .5 em css and 1em for the nbsp
blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE) em_per_line = 1.5 # Add another 1.5 em for each additional blank
def markup_spacers(match): def merge_matches(match):
to_merge = match.group(0)
lines = float(len(self.single_blank.findall(to_merge))) - 1.
em = base_em + (em_per_line * lines)
if to_merge.find('whitespace'):
newline = self.any_multi_blank.sub('\n<p class="whitespace'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
else:
newline = self.any_multi_blank.sub('\n<p class="softbreak'+str(int(em * 10))+'" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
return newline
html = self.any_multi_blank.sub(merge_matches, html)
return html
def detect_whitespace(self, html):
blanks_around_headings = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
def merge_header_whitespace(match):
initblanks = match.group('initparas')
endblanks = match.group('initparas')
heading = match.group('heading')
top_margin = ''
bottom_margin = ''
if initblanks is not None:
top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
if endblanks is not None:
bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(initblanks)))+'em;'
if initblanks == None and endblanks == None:
return heading
else:
heading = re.sub('(?i)<h(?P<hnum>\d+)[^>]*>', '\n\n<h'+'\g<hnum>'+' style="'+top_margin+bottom_margin+'">', heading)
return heading
html = blanks_around_headings.sub(merge_header_whitespace, html)
def markup_whitespaces(match):
blanks = match.group(0) blanks = match.group(0)
blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks) blanks = self.blankreg.sub('\n<p class="whitespace" style="text-align:center; margin-top:0em; margin-bottom:0em"> </p>', blanks)
return blanks return blanks
html = blanks_before_headings.sub(markup_spacers, html)
html = blanks_after_headings.sub(markup_spacers, html) html = blanks_n_nopunct.sub(markup_whitespaces, html)
if self.html_preprocess_sections > self.min_chapters: if self.html_preprocess_sections > self.min_chapters:
html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html) html = re.sub('(?si)^.*?(?=<h\d)', markup_whitespaces, html)
return html return html
def detect_soft_breaks(self, html): def detect_soft_breaks(self, html):
if not self.blanks_deleted and self.blanks_between_paragraphs: if not self.blanks_deleted and self.blanks_between_paragraphs:
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html) html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
else: else:
html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html) html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
return html return html
def markup_user_break(self, replacement_break):
'''
Takes string a user supplies and wraps it in markup that will be centered with
appropriate margins. <hr> and <img> tags are allowed. If the user specifies
a style with width attributes in the <hr> tag then the appropriate margins are
applied to wrapping divs. This is because many ebook devices don't support margin:auto
All other html is converted to text.
'''
hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
if re.findall('(<|>)', replacement_break):
if re.match('^<hr', replacement_break):
if replacement_break.find('width') != -1:
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
divpercent = (100 - width) / 2
hr_open = re.sub('45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>'
else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
elif re.match('^<img', replacement_break):
scene_break = self.scene_break_open+replacement_break+'</p>'
else:
from calibre.utils.html2text import html2text
replacement_break = html2text(replacement_break)
replacement_break = re.sub('\s', '&nbsp;', replacement_break)
scene_break = self.scene_break_open+replacement_break+'</p>'
else:
replacement_break = re.sub('\s', '&nbsp;', replacement_break)
scene_break = self.scene_break_open+replacement_break+'</p>'
return scene_break
def __call__(self, html): def __call__(self, html):
self.log.debug("********* Heuristic processing HTML *********") self.log.debug("********* Heuristic processing HTML *********")
# Count the words in the document to estimate how many chapters to look for and whether # Count the words in the document to estimate how many chapters to look for and whether
# other types of processing are attempted # other types of processing are attempted
try: try:
@ -458,7 +529,7 @@ class HeuristicProcessor(object):
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = self.arrange_htm_line_endings(html) html = self.arrange_htm_line_endings(html)
#self.dump(html, 'after_arrange_line_endings')
if self.cleanup_required(): if self.cleanup_required():
###### Check Markup ###### ###### Check Markup ######
# #
@ -478,6 +549,11 @@ class HeuristicProcessor(object):
# fix indents must run before this step, as it removes non-breaking spaces # fix indents must run before this step, as it removes non-breaking spaces
html = self.cleanup_markup(html) html = self.cleanup_markup(html)
is_pdftohtml = self.is_pdftohtml(html)
if is_pdftohtml:
self.line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
self.line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
# ADE doesn't render <br />, change to empty paragraphs # ADE doesn't render <br />, change to empty paragraphs
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html) #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
@ -489,6 +565,7 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'markup_chapter_headings', False): if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs) html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
#self.dump(html, 'after_chapter_markup')
if getattr(self.extra_opts, 'italicize_common_cases', False): if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html) html = self.markup_italicis(html)
@ -498,7 +575,7 @@ class HeuristicProcessor(object):
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines") self.log.debug("deleting blank lines")
self.blanks_deleted = True self.blanks_deleted = True
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html) html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>', html)
html = self.blankreg.sub('', html) html = self.blankreg.sub('', html)
# Determine line ending type # Determine line ending type
@ -539,7 +616,7 @@ class HeuristicProcessor(object):
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
self.log.debug("Looking for more split points based on punctuation," self.log.debug("Looking for more split points based on punctuation,"
" currently have " + unicode(self.html_preprocess_sections)) " currently have " + unicode(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html) html = chapdetect3.sub(self.chapter_break, html)
if getattr(self.extra_opts, 'renumber_headings', False): if getattr(self.extra_opts, 'renumber_headings', False):
@ -549,14 +626,32 @@ class HeuristicProcessor(object):
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html) html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
# If scene break formatting is enabled, find all blank paragraphs that definitely aren't scenebreaks,
# style it with the 'whitespace' class. All remaining blank lines are styled as softbreaks.
# Multiple sequential blank paragraphs are merged with appropriate margins
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
if getattr(self.extra_opts, 'format_scene_breaks', False): if getattr(self.extra_opts, 'format_scene_breaks', False):
html = self.detect_blank_formatting(html) html = self.detect_whitespace(html)
html = self.detect_soft_breaks(html) html = self.detect_soft_breaks(html)
# Center separator lines blanks_count = len(self.any_multi_blank.findall(html))
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html) if blanks_count >= 1:
#html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html) html = self.merge_blanks(html, blanks_count)
scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
# If the user has enabled scene break replacement, then either softbreaks
# or 'hard' scene breaks are replaced, depending on which is in use
# Otherwise separator lines are centered, use a bit larger margin in this case
replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
if replacement_break is not None:
replacement_break = self.markup_user_break(replacement_break)
if len(scene_break.findall(html)) >= 1:
html = scene_break.sub(replacement_break, html)
else:
html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', replacement_break, html)
else:
html = scene_break.sub(self.scene_break_open+'\g<break>'+'</p>', html)
if self.deleted_nbsps: if self.deleted_nbsps:
# put back non-breaking spaces in empty paragraphs to preserve original formatting # put back non-breaking spaces in empty paragraphs so they render correctly
html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html) html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
return html return html

View File

@ -103,6 +103,8 @@ class EXTHHeader(object):
pass pass
elif id == 108: elif id == 108:
pass # Producer pass # Producer
elif id == 113:
pass # ASIN or UUID
#else: #else:
# print 'unhandled metadata record', id, repr(content) # print 'unhandled metadata record', id, repr(content)

View File

@ -1547,6 +1547,31 @@ class MobiWriter(object):
rights = 'Unknown' rights = 'Unknown'
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
exth.write(rights) exth.write(rights)
nrecs += 1
# Write UUID as ASIN
uuid = None
from calibre.ebooks.oeb.base import OPF
for x in oeb.metadata['identifier']:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
uuid = unicode(x).split(':')[-1]
break
if uuid is None:
from uuid import uuid4
uuid = str(uuid4())
if isinstance(uuid, unicode):
uuid = uuid.encode('utf-8')
exth.write(pack('>II', 113, len(uuid) + 8))
exth.write(uuid)
nrecs += 1
# Write cdetype
if not self.opts.mobi_periodical:
data = 'EBOK'
exth.write(pack('>II', 501, len(data)+8))
exth.write(data)
nrecs += 1
# Add a publication date entry # Add a publication date entry
if oeb.metadata['date'] != [] : if oeb.metadata['date'] != [] :

View File

@ -0,0 +1,357 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
QStyle, QEvent, pyqtSignal
from calibre.utils.icu import sort_key, lower
from calibre.gui2 import NONE
from calibre.gui2.widgets import EnComboBox
class CompleterItemDelegate(QItemDelegate): # {{{
''' Renders the current item as thought it were selected '''
def __init__(self, view):
self.view = view
QItemDelegate.__init__(self, view)
def paint(self, p, opt, idx):
opt = QStyleOptionViewItem(opt)
opt.showDecorationSelected = True
if self.view.currentIndex() == idx:
opt.state |= QStyle.State_HasFocus
QItemDelegate.paint(self, p, opt, idx)
# }}}
class CompleteWindow(QListView): # {{{
'''
The completion popup. For keyboard and mouse handling see
:meth:`eventFilter`.
'''
#: This signal is emitted when the user selects one of the listed
#: completions, by mouse or keyboard
completion_selected = pyqtSignal(object)
def __init__(self, widget, model):
self.widget = widget
QListView.__init__(self)
self.setVisible(False)
self.setParent(None, Qt.Popup)
self.setAlternatingRowColors(True)
self.setFocusPolicy(Qt.NoFocus)
self._d = CompleterItemDelegate(self)
self.setItemDelegate(self._d)
self.setModel(model)
self.setFocusProxy(widget)
self.installEventFilter(self)
self.clicked.connect(self.do_selected)
self.entered.connect(self.do_entered)
self.setMouseTracking(True)
def do_entered(self, idx):
if idx.isValid():
self.setCurrentIndex(idx)
def do_selected(self, idx=None):
idx = self.currentIndex() if idx is None else idx
if not idx.isValid() and self.model().rowCount() > 0:
idx = self.model().index(0)
if idx.isValid():
data = unicode(self.model().data(idx, Qt.DisplayRole))
self.completion_selected.emit(data)
self.hide()
def eventFilter(self, o, e):
if o is not self:
return False
if e.type() == e.KeyPress:
key = e.key()
if key in (Qt.Key_Escape, Qt.Key_Backtab) or \
(key == Qt.Key_F4 and (e.modifiers() & Qt.AltModifier)):
self.hide()
return True
elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
self.do_selected()
return True
elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,
Qt.Key_PageDown):
return False
# Send key event to associated line edit
self.widget.eat_focus_out = False
try:
self.widget.event(e)
finally:
self.widget.eat_focus_out = True
if not self.widget.hasFocus():
# Line edit lost focus
self.hide()
if e.isAccepted():
# Line edit consumed event
return True
elif e.type() == e.MouseButtonPress:
# Hide popup if user clicks outside it, otherwise
# pass event to popup
if not self.underMouse():
self.hide()
return True
elif e.type() in (e.InputMethod, e.ShortcutOverride):
QApplication.sendEvent(self.widget, e)
return False # Do not filter this event
# }}}
class CompleteModel(QAbstractListModel):
def __init__(self, parent=None):
QAbstractListModel.__init__(self, parent)
self.sep = ','
self.space_before_sep = False
self.items = []
self.lowered_items = []
self.matches = []
def set_items(self, items):
items = [unicode(x.strip()) for x in items]
self.items = list(sorted(items, key=lambda x: sort_key(x)))
self.lowered_items = [lower(x) for x in self.items]
self.matches = []
self.reset()
def rowCount(self, *args):
return len(self.matches)
def data(self, index, role):
if role == Qt.DisplayRole:
r = index.row()
try:
return self.matches[r]
except IndexError:
pass
return NONE
def get_matches(self, prefix):
'''
Return all matches that (case insensitively) start with prefix
'''
prefix = lower(prefix)
ans = []
if prefix:
for i, test in enumerate(self.lowered_items):
if test.startswith(prefix):
ans.append(self.items[i])
return ans
def update_matches(self, matches):
self.matches = matches
self.reset()
class MultiCompleteLineEdit(QLineEdit):
'''
A line edit that completes on multiple items separated by a
separator. Use the :meth:`update_items_cache` to set the list of
all possible completions. Separator can be controlled with the
:meth:`set_separator` and :meth:`set_space_before_sep` methods.
A call to self.set_separator(None) will allow this widget to be used
to complete non multiple fields as well.
'''
def __init__(self, parent=None):
self.eat_focus_out = True
self.max_visible_items = 7
self.current_prefix = None
QLineEdit.__init__(self, parent)
self._model = CompleteModel(parent=self)
self.complete_window = CompleteWindow(self, self._model)
self.textChanged.connect(self.text_changed)
self.cursorPositionChanged.connect(self.cursor_position_changed)
self.complete_window.completion_selected.connect(self.completion_selected)
# Interface {{{
def update_items_cache(self, complete_items):
self.all_items = complete_items
def set_separator(self, sep):
self.sep = sep
def set_space_before_sep(self, space_before):
self.space_before_sep = space_before
# }}}
def eventFilter(self, o, e):
if self.eat_focus_out and o is self and e.type() == QEvent.FocusOut:
if self.complete_window.isVisible():
return True # Filter this event since the cw is visible
return QLineEdit.eventFilter(self, o, e)
def text_changed(self, *args):
self.update_completions()
def cursor_position_changed(self, *args):
self.update_completions()
def update_completions(self):
' Update the list of completions '
cpos = self.cursorPosition()
text = unicode(self.text())
prefix = text[:cpos]
self.current_prefix = prefix
complete_prefix = prefix.lstrip()
if self.sep:
complete_prefix = prefix = prefix.split(self.sep)[-1].lstrip()
matches = self._model.get_matches(complete_prefix)
self.update_complete_window(matches)
def get_completed_text(self, text):
'''
Get completed text from current cursor position and the completion
text
'''
if self.sep is None:
return text
else:
cursor_pos = self.cursorPosition()
before_text = unicode(self.text())[:cursor_pos]
after_text = unicode(self.text())[cursor_pos:]
after_parts = after_text.split(self.sep)
if len(after_parts) < 3 and not after_parts[-1].strip():
after_text = u''
prefix_len = len(before_text.split(self.sep)[-1].lstrip())
if self.space_before_sep:
complete_text_pat = '%s%s %s %s'
len_extra = 3
else:
complete_text_pat = '%s%s%s %s'
len_extra = 2
return prefix_len, len_extra, complete_text_pat % (
before_text[:cursor_pos - prefix_len], text, self.sep, after_text)
def completion_selected(self, text):
prefix_len, len_extra, ctext = self.get_completed_text(text)
if self.sep is None:
self.setText(ctext)
self.setCursorPosition(len(ctext))
else:
cursor_pos = self.cursorPosition()
self.setText(ctext)
self.setCursorPosition(cursor_pos - prefix_len + len(text) + len_extra)
def update_complete_window(self, matches):
self._model.update_matches(matches)
if matches:
self.show_complete_window()
else:
self.complete_window.hide()
def position_complete_window(self):
popup = self.complete_window
screen = QApplication.desktop().availableGeometry(self)
h = (popup.sizeHintForRow(0) * min(self.max_visible_items,
popup.model().rowCount()) + 3) + 3
hsb = popup.horizontalScrollBar()
if hsb and hsb.isVisible():
h += hsb.sizeHint().height()
rh = self.height()
pos = self.mapToGlobal(QPoint(0, self.height() - 2))
w = self.width()
if w > screen.width():
w = screen.width()
if (pos.x() + w) > (screen.x() + screen.width()):
pos.setX(screen.x() + screen.width() - w)
if (pos.x() < screen.x()):
pos.setX(screen.x())
top = pos.y() - rh - screen.top() + 2
bottom = screen.bottom() - pos.y()
h = max(h, popup.minimumHeight())
if h > bottom:
h = min(max(top, bottom), h)
if top > bottom:
pos.setY(pos.y() - h - rh + 2)
popup.setGeometry(pos.x(), pos.y(), w, h)
def show_complete_window(self):
self.position_complete_window()
self.complete_window.show()
def moveEvent(self, ev):
ret = QLineEdit.moveEvent(self, ev)
QTimer.singleShot(0, self.position_complete_window)
return ret
def resizeEvent(self, ev):
ret = QLineEdit.resizeEvent(self, ev)
QTimer.singleShot(0, self.position_complete_window)
return ret
@dynamic_property
def all_items(self):
def fget(self):
return self._model.items
def fset(self, items):
self._model.set_items(items)
return property(fget=fget, fset=fset)
@dynamic_property
def sep(self):
def fget(self):
return self._model.sep
def fset(self, val):
self._model.sep = val
return property(fget=fget, fset=fset)
@dynamic_property
def space_before_sep(self):
def fget(self):
return self._model.space_before_sep
def fset(self, val):
self._model.space_before_sep = val
return property(fget=fget, fset=fset)
class MultiCompleteComboBox(EnComboBox):
def __init__(self, *args):
EnComboBox.__init__(self, *args)
self.setLineEdit(MultiCompleteLineEdit(self))
def update_items_cache(self, complete_items):
self.lineEdit().update_items_cache(complete_items)
def set_separator(self, sep):
self.lineEdit().set_separator(sep)
def set_space_before_sep(self, space_before):
self.lineEdit().set_space_before_sep(space_before)
if __name__ == '__main__':
from PyQt4.Qt import QDialog, QVBoxLayout
app = QApplication([])
d = QDialog()
d.setLayout(QVBoxLayout())
le = MultiCompleteLineEdit(d)
d.layout().addWidget(le)
le.all_items = ['one', 'otwo', 'othree', 'ooone', 'ootwo', 'oothree']
d.exec_()

View File

@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import Qt from PyQt4.Qt import Qt
from calibre.gui2 import gprefs
from calibre.gui2.convert.heuristics_ui import Ui_Form from calibre.gui2.convert.heuristics_ui import Ui_Form
from calibre.gui2.convert import Widget from calibre.gui2.convert import Widget
@ -21,17 +22,38 @@ class HeuristicsWidget(Widget, Ui_Form):
['enable_heuristics', 'markup_chapter_headings', ['enable_heuristics', 'markup_chapter_headings',
'italicize_common_cases', 'fix_indents', 'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines', 'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks', 'delete_blank_paragraphs',
'format_scene_breaks', 'replace_scene_breaks',
'dehyphenate', 'renumber_headings'] 'dehyphenate', 'renumber_headings']
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
self.rssb_defaults = [u'', u'<hr />', u'* * *', u'• • •', u'✦ ✦ ✦',
u'✮ ✮ ✮', u'☆ ☆ ☆', u'❂ ❂ ❂', u'✣ ✣ ✣', u'❖ ❖ ❖', u'☼ ☼ ☼', u'✠ ✠ ✠']
self.initialize_options(get_option, get_help, db, book_id) self.initialize_options(get_option, get_help, db, book_id)
self.load_histories()
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics) self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap) self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
self.enable_heuristics(self.opt_enable_heuristics.checkState()) self.enable_heuristics(self.opt_enable_heuristics.checkState())
def restore_defaults(self, get_option):
Widget.restore_defaults(self, get_option)
self.save_histories()
rssb_hist = gprefs['replace_scene_breaks_history']
for x in self.rssb_defaults:
if x in rssb_hist:
del rssb_hist[rssb_hist.index(x)]
gprefs['replace_scene_breaks_history'] = self.rssb_defaults + gprefs['replace_scene_breaks_history']
self.load_histories()
def commit_options(self, save_defaults=False):
self.save_histories()
return Widget.commit_options(self, save_defaults)
def break_cycles(self): def break_cycles(self):
Widget.break_cycles(self) Widget.break_cycles(self)
@ -45,6 +67,33 @@ class HeuristicsWidget(Widget, Ui_Form):
if val is None and g is self.opt_html_unwrap_factor: if val is None and g is self.opt_html_unwrap_factor:
g.setValue(0.0) g.setValue(0.0)
return True return True
if not val and g is self.opt_replace_scene_breaks:
g.lineEdit().setText('')
return True
def load_histories(self):
self.opt_replace_scene_breaks.clear()
self.opt_replace_scene_breaks.lineEdit().setText('')
val = unicode(self.opt_replace_scene_breaks.currentText())
rssb_hist = gprefs.get('replace_scene_breaks_history', self.rssb_defaults)
if val in rssb_hist:
del rssb_hist[rssb_hist.index(val)]
rssb_hist.insert(0, val)
for v in rssb_hist:
# Ensure we don't have duplicate items.
if self.opt_replace_scene_breaks.findText(v) == -1:
self.opt_replace_scene_breaks.addItem(v)
self.opt_replace_scene_breaks.setCurrentIndex(0)
def save_histories(self):
rssb_history = []
history_pats = [unicode(self.opt_replace_scene_breaks.lineEdit().text())] + [unicode(self.opt_replace_scene_breaks.itemText(i)) for i in xrange(self.opt_replace_scene_breaks.count())]
for p in history_pats[:10]:
# Ensure we don't have duplicate items.
if p not in rssb_history:
rssb_history.append(p)
gprefs['replace_scene_breaks_history'] = rssb_history
def enable_heuristics(self, state): def enable_heuristics(self, state):
state = state == Qt.Checked state = state == Qt.Checked

View File

@ -150,6 +150,45 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<property name="sizeConstraint">
<enum>QLayout::SetDefaultConstraint</enum>
</property>
<item>
<widget class="QLabel" name="label_2">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Replace soft scene &amp;breaks:</string>
</property>
<property name="buddy">
<cstring>opt_replace_scene_breaks</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="opt_replace_scene_breaks">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="editable">
<bool>true</bool>
</property>
<property name="insertPolicy">
<enum>QComboBox::InsertAtTop</enum>
</property>
</widget>
</item>
</layout>
</item>
<item> <item>
<widget class="QCheckBox" name="opt_dehyphenate"> <widget class="QCheckBox" name="opt_dehyphenate">
<property name="text"> <property name="text">

View File

@ -264,8 +264,9 @@ class EmailMixin(object): # {{{
if _auto_ids != []: if _auto_ids != []:
for id in _auto_ids: for id in _auto_ids:
if specific_format == None: if specific_format == None:
formats = [f.lower() for f in self.library_view.model().db.formats(id, index_is_id=True).split(',')] dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
formats = formats if formats != None else [] formats = [f.lower() for f in (dbfmts.split(',') if fmts else
[])]
if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []: if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []:
auto.append(id) auto.append(id)
else: else:

View File

@ -12,8 +12,8 @@ from PyQt4.Qt import Qt, QDateEdit, QDate, \
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \ QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
QPushButton, QSpinBox, QLineEdit QPushButton, QSpinBox, QLineEdit
from calibre.gui2.widgets import EnLineEdit, CompleteComboBox, \ from calibre.gui2.widgets import EnLineEdit, EnComboBox, FormatList, ImageView
EnComboBox, FormatList, ImageView, CompleteLineEdit from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.config import tweaks, prefs from calibre.utils.config import tweaks, prefs
from calibre.ebooks.metadata import title_sort, authors_to_string, \ from calibre.ebooks.metadata import title_sort, authors_to_string, \
@ -149,14 +149,14 @@ class TitleSortEdit(TitleEdit):
# }}} # }}}
# Authors {{{ # Authors {{{
class AuthorsEdit(CompleteComboBox): class AuthorsEdit(MultiCompleteComboBox):
TOOLTIP = '' TOOLTIP = ''
LABEL = _('&Author(s):') LABEL = _('&Author(s):')
def __init__(self, parent): def __init__(self, parent):
self.dialog = parent self.dialog = parent
CompleteComboBox.__init__(self, parent) MultiCompleteComboBox.__init__(self, parent)
self.setToolTip(self.TOOLTIP) self.setToolTip(self.TOOLTIP)
self.setWhatsThis(self.TOOLTIP) self.setWhatsThis(self.TOOLTIP)
self.setEditable(True) self.setEditable(True)
@ -814,14 +814,14 @@ class RatingEdit(QSpinBox): # {{{
# }}} # }}}
class TagsEdit(CompleteLineEdit): # {{{ class TagsEdit(MultiCompleteLineEdit): # {{{
LABEL = _('Ta&gs:') LABEL = _('Ta&gs:')
TOOLTIP = '<p>'+_('Tags categorize the book. This is particularly ' TOOLTIP = '<p>'+_('Tags categorize the book. This is particularly '
'useful while searching. <br><br>They can be any words' 'useful while searching. <br><br>They can be any words'
'or phrases, separated by commas.') 'or phrases, separated by commas.')
def __init__(self, parent): def __init__(self, parent):
CompleteLineEdit.__init__(self, parent) MultiCompleteLineEdit.__init__(self, parent)
self.setToolTip(self.TOOLTIP) self.setToolTip(self.TOOLTIP)
self.setWhatsThis(self.TOOLTIP) self.setWhatsThis(self.TOOLTIP)
@ -839,7 +839,7 @@ class TagsEdit(CompleteLineEdit): # {{{
tags = db.tags(id_, index_is_id=True) tags = db.tags(id_, index_is_id=True)
tags = tags.split(',') if tags else [] tags = tags.split(',') if tags else []
self.current_val = tags self.current_val = tags
self.update_items_cache(db.all_tags()) self.all_items = db.all_tags()
self.original_val = self.current_val self.original_val = self.current_val
@property @property
@ -860,7 +860,7 @@ class TagsEdit(CompleteLineEdit): # {{{
d = TagEditor(self, db, id_) d = TagEditor(self, db, id_)
if d.exec_() == TagEditor.Accepted: if d.exec_() == TagEditor.Accepted:
self.current_val = d.tags self.current_val = d.tags
self.update_items_cache(db.all_tags()) self.all_items = db.all_tags()
def commit(self, db, id_): def commit(self, db, id_):

View File

@ -430,8 +430,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
authors = self.authors(id, index_is_id=True) authors = self.authors(id, index_is_id=True)
if not authors: if not authors:
authors = _('Unknown') authors = _('Unknown')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') title = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
path = author + '/' + title + ' (%d)'%id path = author + '/' + title + ' (%d)'%id
return path return path
@ -442,8 +442,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
authors = self.authors(id, index_is_id=True) authors = self.authors(id, index_is_id=True)
if not authors: if not authors:
authors = _('Unknown') authors = _('Unknown')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') title = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace')
name = title + ' - ' + author name = title + ' - ' + author
while name.endswith('.'): while name.endswith('.'):
name = name[:-1] name = name[:-1]

View File

@ -311,9 +311,14 @@ remove all non-breaking-space entities, or may include false positive matches re
:guilabel:`Ensure scene breaks are consistently formatted` :guilabel:`Ensure scene breaks are consistently formatted`
With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned. With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.
It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the 'Soft' scene break markers, i.e. scene breaks only defined by extra white space, are styled to ensure that they
page width. Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and will not be displayed in conjunction with page breaks.
thus become difficult to distinguish.
:guilabel:`Replace scene breaks`
If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup. <hr />
tags, i.e. horizontal rules, are an exception. These can optionally be specified with styles, if you choose to add your own
style be sure to include the 'width' setting, otherwise the style information will be discarded.
:guilabel:`Remove unnecessary hyphens` :guilabel:`Remove unnecessary hyphens`
|app| will analyze all hyphenated content in the document when this option is enabled. The document itself is used |app| will analyze all hyphenated content in the document when this option is enabled. The document itself is used
@ -628,7 +633,7 @@ between 0 and 1. The default is 0.45, just under the median line length. Lower t
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`. text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
Also, they often have headers and footers as part of the document that will become included with the text. Also, they often have headers and footers as part of the document that will become included with the text.
Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not Use the Search and Replace panel to remove headers and footers to mitigate this issue. If the headers and footers are not
removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read
:ref:`regexptutorial`. :ref:`regexptutorial`.