From 30212404de06023636ae0594a055302c6da2553c Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 6 Feb 2011 14:15:39 +0800 Subject: [PATCH 1/6] fixed handling of 'unformatted' text input --- src/calibre/ebooks/txt/input.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 75bafc7cef..674277fc41 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -58,6 +58,7 @@ class TXTInput(InputFormatPlugin): accelerators): self.log = log log.debug('Reading text from file...') + length = 0 txt = stream.read() @@ -109,11 +110,12 @@ class TXTInput(InputFormatPlugin): # Reformat paragraphs to block formatting based on the detected type. # We don't check for block because the processor assumes block. # single and print at transformed to block for processing. - if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted': + if options.paragraph_type == 'single': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': txt = separate_paragraphs_print_formatted(txt) elif options.paragraph_type == 'unformatted': + print "unwrapping lines using heuristics" from calibre.ebooks.conversion.utils import HeuristicProcessor # unwrap lines based on punctuation docanalysis = DocAnalysis('txt', txt) @@ -123,7 +125,8 @@ class TXTInput(InputFormatPlugin): if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False): docanalysis = DocAnalysis('txt', txt) - length = docanalysis.line_length(.5) + if not length: + length = docanalysis.line_length(.5) dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt,'txt', length) From 9088903f4c4b535372b0c4f4cfd80c2170be292d Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 6 Feb 2011 19:28:15 +0800 Subject: [PATCH 2/6] ... --- src/calibre/ebooks/txt/input.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 674277fc41..ae5a216435 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -115,7 +115,6 @@ class TXTInput(InputFormatPlugin): elif options.paragraph_type == 'print': txt = separate_paragraphs_print_formatted(txt) elif options.paragraph_type == 'unformatted': - print "unwrapping lines using heuristics" from calibre.ebooks.conversion.utils import HeuristicProcessor # unwrap lines based on punctuation docanalysis = DocAnalysis('txt', txt) From 9b1ae4ba9790bfc4cc02c111ee3e83042dd79522 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 6 Feb 2011 19:55:35 +0800 Subject: [PATCH 3/6] ... --- src/calibre/ebooks/txt/input.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index ae5a216435..dc624519bb 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -121,6 +121,7 @@ class TXTInput(InputFormatPlugin): length = docanalysis.line_length(.5) preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) txt = preprocessor.punctuation_unwrap(length, txt, 'txt') + txt = separate_paragraphs_single_line(txt) if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False): docanalysis = DocAnalysis('txt', txt) From c2574b862a6d4865d674f0f9aaa07296e063b0c8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 6 Feb 2011 09:01:46 -0700 Subject: [PATCH 4/6] Fix #8800 (small change needed to irishtimes.recipe) --- resources/recipes/irish_times.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/irish_times.recipe b/resources/recipes/irish_times.recipe index 0ac130ed7a..83ea496b2c 100644 --- a/resources/recipes/irish_times.recipe +++ b/resources/recipes/irish_times.recipe @@ -35,7 +35,7 @@ class IrishTimes(BasicNewsRecipe): def print_version(self, url): if url.count('rss.feedsportal.com'): u = 'http://www.irishtimes.com' + \ - (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') + (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') else: u = url.replace('.html','_pf.html') return u From 28cba375fd6aee8846c9c3ac572700907969fa45 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 6 Feb 2011 09:06:49 -0700 Subject: [PATCH 5/6] Fix #8793 (Unhandled exception: AttributeError:'MessageBox' object has no attribute 'ctc_button') --- src/calibre/gui2/dialogs/message_box.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py index 9d586ce28d..945d50de4e 100644 --- a/src/calibre/gui2/dialogs/message_box.py +++ b/src/calibre/gui2/dialogs/message_box.py @@ -89,7 +89,8 @@ class MessageBox(QDialog, Ui_Dialog): (__version__, unicode(self.windowTitle()), unicode(self.msg.text()), unicode(self.det_msg.toPlainText()))) - self.ctc_button.setText(_('Copied')) + if hasattr(self, 'ctc_button'): + self.ctc_button.setText(_('Copied')) def showEvent(self, ev): ret = QDialog.showEvent(self, ev) From ef51c4d19835f88561b1f04a41ebfce61fe0b991 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 6 Feb 2011 09:19:09 -0700 Subject: [PATCH 6/6] Fix #8799 (undefined pub date appearing as (101)) --- src/calibre/ebooks/oeb/transforms/jacket.py | 23 ++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index fe0d60de7a..d3b66d1e81 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -15,6 +15,7 @@ from calibre import guess_type, strftime from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML from calibre.library.comments import comments_to_html +from calibre.utils.date import is_date_undefined JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' @@ -130,7 +131,10 @@ def render_jacket(mi, output_profile, publisher = '' try: - pubdate = strftime(u'%Y', mi.pubdate.timetuple()) + if is_date_undefined(mi.pubdate): + pubdate = '' + else: + pubdate = strftime(u'%Y', mi.pubdate.timetuple()) except: pubdate = '' @@ -175,19 +179,24 @@ def render_jacket(mi, output_profile, soup = BeautifulSoup(generated_html) if not series: series_tag = soup.find(attrs={'class':'cbj_series'}) - series_tag.extract() + if series_tag is not None: + series_tag.extract() if not rating: rating_tag = soup.find(attrs={'class':'cbj_rating'}) - rating_tag.extract() + if rating_tag is not None: + rating_tag.extract() if not tags: tags_tag = soup.find(attrs={'class':'cbj_tags'}) - tags_tag.extract() + if tags_tag is not None: + tags_tag.extract() if not pubdate: - pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'}) - pubdate_tag.extract() + pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'}) + if pubdate_tag is not None: + pubdate_tag.extract() if output_profile.short_name != 'kindle': hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) - hr_tag.extract() + if hr_tag is not None: + hr_tag.extract() return soup.renderContents(None)