diff --git a/Changelog.yaml b/Changelog.yaml index 699aa3a531..82b335bbdd 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -4,6 +4,100 @@ # for important features/bug fixes. # Also, each release can have new and improved recipes. +- version: 0.7.38 + date: 2011-01-07 + + new features: + - title: "Reduce startup time when using a composite custom column" + + - title: "Template language: Add a list_item function for use with tags like columns. See User Manual for details" + + - title: "TXT Input: Attempt to detect the input encoding when not specified. Auto detect paragraph structure and formatting markup." + + - title: "Search & replace: Add ability to manipulate number and boolean columns." + + - title: "Add type ahead completion to the advanced search dialog." + tickets: [8035] + + - title: "Double click on plugin in Preferences dialog to customize" + tickets: [8175] + + - title: "Allow customization of the SONY driver to send thumbnail to the device. Useful with newer SONY readers" + tickets: [8161] + + - title: "Smarten punctuation: Convert double dashes to em dashes. Preprocessing: Various tweaks" + + bug fixes: + - title: "Fix regression causing the template formatter to intepret a missing format letter as ERROR instead of 's'." + + - title: "Fix regression that broke conversion of PNG images in PDF files on OS X." + tickets: [8215] + + - title: "Content server: Fix improper XML escaping of category titles in the OPDS feeds" + tickets: [8225] + + - title: "When decoding XML if the XML starts with a UTF-8 BOM decode as UTF-8. Fixes parsing of FB2 files with UTF-8 BOMs" + + - title: "E-book viewer: When scrolling to a bookmark and the content is wider than the window, do not scroll in the horizontal direction" + + - title: "E-book viewer: Fix next page skipping the bottom of chapters when the content is wider than the window." + tickets: [8153] + + - title: " FB2 Output: Insert covers." + tickets: [8172] + + - title: "Content server: When serving OPDS feeds handle html descriptions that have namespaced attributes." + tickets: [7938] + + - title: "When downloading metadata from isbndb.com, download a maximum of 30 results rather than 1000" + + - title: "Fix sorting of tags column" + + - title: "Change search/replace to show commas instead of vertical bars as the separator for multiple authors" + + - title: "Template language: Make all column names case insensitive" + + - title: "Fix bug that prevent the Disabled option for Tag Browser partiotining from working in the Preferences dialog" + + - title: "Fix bug when using tags like custom column in the template language" + + - title: "Fix bug where composite custom columns using general_program_mode fields are not evaluated correctly when used in a template." + + - title: "ImageMagick interface: Don't crash when asked to open empty image files" + + - title: "Kobo driver: Add TXT,CBZ,CBR to supported formats list" + tickets: [8124] + + - title: "Don't uneccessarily scroll the book list horizontally when re-selcting previously selected rows." + + new recipes: + - title: "New London Day" + author: "Being" + + - title: "Walla" + author: "marbs" + + - title: "New Journal of Physics" + author: "Chema Cortes" + + - title: "The Baltimore Sun" + author: "Josh Hall" + + - title: "Arabian Business and Sunday Times (UK)" + author: "Darko Miletic" + + - title: "Deia" + author: "Gerardo Diez" + + - title: "Smarter Planet" + author: "Jack Mason" + + + improved recipes: + - The Atlantic + - Danas + - Ledevoir + - version: 0.7.37 date: 2011-01-02 diff --git a/resources/recipes/new_london_day.recipe b/resources/recipes/new_london_day.recipe new file mode 100644 index 0000000000..bc8c44e40e --- /dev/null +++ b/resources/recipes/new_london_day.recipe @@ -0,0 +1,74 @@ +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1294342201(BasicNewsRecipe): + title = u'New London Day' + __author__ = 'Being' + description = 'State, local and business news from New London, CT' + language = 'en_GB' + oldest_article = 1 + max_articles_per_feed = 200 + + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + remove_tags_before = dict(id='article') + remove_tags_after = dict(id='article') + remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), + dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), + dict(name=['script', 'noscript', 'style'])] + remove_tags_after = [ {'class':['photo_article',]} ] + remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]}, + {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]}, + dict(name='font',attrs={'id':["cr-other-headlines"]})] + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + feeds = [ + (u'All News', u'http://www.theday.com/section/rss'), + (u'Breaking News', u'http://www.theday.com/section/rss01'), + (u'Police and Courts', u'http://www.theday.com/section/rss02'), + (u'State News', u'http://www.theday.com/section/rss03'), + (u'Local Business', u'http://www.theday.com/section/rss04'), + (u'Entertainment', u'http://www.theday.com/section/rss05'), + (u'Opinion', u'http://www.theday.com/section/rss06'), + (u'Casinos', u'http://www.theday.com/section/rss12'), + (u'Defense and Military', u'http://www.theday.com/section/rss14'), + (u'Ann Baldelli Ruminations', u'http://www.theday.com/section/rss20'), + (u'Paul Choiniere Ruminations', u'http://www.theday.com/section/rss21'), + (u'Michael Costanza Omnivore', u'http://www.theday.com/section/rss23'), + (u'Rebecca Dangelo Reel Life', u'http://www.theday.com/section/rss25'),] + + def print_version(self, url): + return url.replace('/index.html', '/print.html') + + def get_article_url(self, article): + return article.get('feedburner_origlink', article.get('guid', article.get('link'))) + + + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + + for tag in soup.findAll('form', dict(attrs={'name':["comments_form"]})): + tag.extract() + for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})): + tag.extract() + + return soup + diff --git a/resources/recipes/njp.recipe b/resources/recipes/njp.recipe index ed202512f2..996aef2fdf 100644 --- a/resources/recipes/njp.recipe +++ b/resources/recipes/njp.recipe @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- __license__ = 'GPL v3' -__copyright__ = 'Chema Cort閟 - 2011-01-05' +__copyright__ = u'Chema Cort\xe9s - 2011-01-05' __version__ = 'v0.01' __date__ = '2011-01-05' ''' diff --git a/resources/recipes/walla.recipe b/resources/recipes/walla.recipe new file mode 100644 index 0000000000..5fbfed7a03 --- /dev/null +++ b/resources/recipes/walla.recipe @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1283848012(BasicNewsRecipe): + description = 'The WallaNews.' + cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/rsPhoto/sz_5/rsz_220_220_logo_walla.gif' + title = u'Walla' + language = 'he' + __author__ = 'marbs' + extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' + simultaneous_downloads = 5 +# remove_javascript = True + timefmt = '[%a, %d %b, %Y]' + oldest_article = 1 + max_articles_per_feed = 100 + # remove_attributes = ['width'] + keep_only_tags =dict(name='div', attrs={'class':'wp-0-b w3'}) + remove_tags = [dict(name='div', attrs={'class':'tagsContainer'})] + max_articles_per_feed = 100 +# preprocess_regexps = [ +# (re.compile(r'

 

', re.DOTALL|re.IGNORECASE), lambda match: '') +# ] + + + feeds = [(u'讞讚砖讜转', u'http://rss.walla.co.il/?w=/1/0/1/@rss'), + (u'注住拽讬诐', u'http://rss.walla.co.il/?w=/2/3/1/@rss'), + (u'转专讘讜转', u'http://rss.walla.co.il/?w=/4/249/1/@rss'), + (u'讘专讬讗讜转', u'http://rss.walla.co.il/?w=/5/18/1/@rss'), + (u'TECH', u'http://rss.walla.co.il/?w=/6/4/1/@rss'), + (u'讗住讟专讜诇讜讙讬讛', u'http://rss.walla.co.il/?w=/8/3307/1/@rss'), + (u'讘注诇讬 讞讬讬诐', u'http://rss.walla.co.il/?w=/59/5703/1/@rss'), + (u'专讻讘', u'http://rss.walla.co.il/?w=/31/4700/1/@rss'), + (u'住诇讘住', u'http://rss.walla.co.il/?w=/22/3600/1/@rss'), + (u'讗讜讻诇', u'http://rss.walla.co.il/?w=/9/903/1/@rss'), + (u'讗讜驻谞讛', u'http://rss.walla.co.il/?w=/24/2120/1/@rss'), + (u'讘专谞讝讛', u'http://rss.walla.co.il/?w=/27/3900/1/@rss'), + (u'ZONE', u'http://rss.walla.co.il/?w=/18/500/1/@rss'), + (u'住驻讜专讟', u'http://rss.walla.co.il/?w=/3/7/1/@rss')] + + def print_version(self, url): + print_url = url + '/@@/item/printer' + return print_url + diff --git a/resources/viewer/bookmarks.js b/resources/viewer/bookmarks.js index d36e7c579a..253524326f 100644 --- a/resources/viewer/bookmarks.js +++ b/resources/viewer/bookmarks.js @@ -41,6 +41,7 @@ function scroll_to_bookmark(bookmark) { $.scrollTo($(bm[0]), 1000, { over:ratio, + axis: 'y', // Do not scroll in the x direction onAfter:function(){window.py_bridge.animated_scroll_done()} } ); diff --git a/src/calibre/constants.py b/src/calibre/constants.py index bc359a2b79..2443c55d9d 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.7.37' +__version__ = '0.7.38' __author__ = "Kovid Goyal " import re diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index dd279c6559..f9bca3c8d4 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -18,7 +18,7 @@ __version__ = "1.0" -import re +import re, codecs def detect(aBuf): import calibre.ebooks.chardet.universaldetector as universaldetector @@ -83,9 +83,11 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, if not raw: return u'', encoding if not isinstance(raw, unicode): - if raw.startswith('\xff\xfe'): + if raw.startswith(codecs.BOM_UTF8): + raw, encoding = raw.decode('utf-8')[1:], 'utf-8' + elif raw.startswith(codecs.BOM_UTF16_LE): raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le' - elif raw.startswith('\xfe\xff'): + elif raw.startswith(codecs.BOM_UTF16_BE): raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be' if not isinstance(raw, unicode): for pat in ENCODING_PATS: diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 3ff816b3bf..29006ffd9b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -51,16 +51,16 @@ def chap_head(match): chap = match.group('chap') title = match.group('title') if not title: - return '

'+chap+'


\n' + return '

'+chap+'


\n' else: - return '

'+chap+'

\n

'+title+'

\n' + return '

'+chap+'

\n

'+title+'

\n' def wrap_lines(match): ital = match.group('ital') if not ital: - return ' ' + return ' ' else: - return ital+' ' + return ital+' ' class DocAnalysis(object): ''' @@ -191,7 +191,7 @@ class Dehyphenator(object): dehyphenated = unicode(firsthalf) + unicode(secondhalf) lookupword = self.removesuffixes.sub('', dehyphenated) if self.prefixes.match(firsthalf) is None: - lookupword = self.removeprefix.sub('', lookupword) + lookupword = self.removeprefix.sub('', lookupword) #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) try: searchresult = self.html.find(lookupword.lower()) @@ -353,7 +353,7 @@ class HTMLPreProcessor(object): (re.compile(r'((?<=)\s*file:////?[A-Z].*
|file:////?[A-Z].*
(?=\s*
))', re.IGNORECASE), lambda match: ''), # Center separator lines - (re.compile(u'
\s*(?P([*#鈥+\s*)+)\s*
'), lambda match: '

\n

' + match.group(1) + '

'), + (re.compile(u'
\s*(?P([*#鈥⑩湨]+\s*)+)\s*
'), lambda match: '

\n

' + match.group(1) + '

'), # Remove page links (re.compile(r'', re.IGNORECASE), lambda match: ''), @@ -363,13 +363,11 @@ class HTMLPreProcessor(object): # Remove gray background (re.compile(r']+>'), lambda match : ''), - # Detect Chapters to match default XPATH in GUI - (re.compile(r'
\s*(?P(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(){0,2})\s*(
\s*){1,3}\s*(?P(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head), - # Cover the case where every letter in a chapter title is separated by a space - (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head), + # Convert line breaks to paragraphs + (re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'), + (re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'), + (re.compile(r'\s*</body>'), lambda match : '</p>\n</body>'), - # Have paragraphs show better - (re.compile(r'<br.*?>'), lambda match : '<p>'), # Clean up spaces (re.compile(u'(?<=[\.,;\?!鈥"\'])[\s^ ]*(?=<)'), lambda match: ' '), # Add space before and after italics @@ -455,9 +453,9 @@ class HTMLPreProcessor(object): # delete soft hyphens - moved here so it's executed after header/footer removal if is_pdftohtml: # unwrap/delete soft hyphens - end_rules.append((re.compile(u'[颅](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) + end_rules.append((re.compile(u'[颅](</p>\s*<p>\s*)+\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens with formatting - end_rules.append((re.compile(u'[颅]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) + end_rules.append((re.compile(u'[颅]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) # Make the more aggressive chapter marking regex optional with the preprocess option to # reduce false positives and move after header/footer removal @@ -475,7 +473,7 @@ class HTMLPreProcessor(object): end_rules.append((re.compile(u'(?<=.{%i}[鈥撯擼)\s*<p>\s*(?=[[a-z\d])' % length), lambda match: '')) end_rules.append( # Un wrap using punctuation - (re.compile(u'(?<=.{%i}([a-z盲毛茂枚眉脿猫矛貌霉谩膰茅铆贸艅艣煤芒锚卯么没莽膮臋偶谋,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(u'(?<=.{%i}([a-z盲毛茂枚眉脿猫矛貌霉谩膰茅铆贸艅艣煤芒锚卯么没莽膮臋偶谋茫玫帽忙酶镁冒脽,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: @@ -508,7 +506,15 @@ class HTMLPreProcessor(object): if is_pdftohtml and length > -1: # Dehyphenate dehyphenator = Dehyphenator() - html = dehyphenator(html,'pdf', length) + html = dehyphenator(html,'html', length) + + if is_pdftohtml: + from calibre.ebooks.conversion.utils import PreProcessor + pdf_markup = PreProcessor(self.extra_opts, None) + totalwords = 0 + totalwords = pdf_markup.get_word_count(html) + if totalwords > 7000: + html = pdf_markup.markup_chapters(html, totalwords, True) #dump(html, 'post-preprocess') @@ -554,5 +560,9 @@ class HTMLPreProcessor(object): html = smartyPants(html) html = html.replace(start, '<!--') html = html.replace(stop, '-->') + # convert ellipsis to entities to prevent wrapping + html = re.sub('(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html) + # convert double dashes to em-dash + html = re.sub('\s--\s', u'\u2014', html) return substitute_entites(html) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 11979b933c..1bb232c911 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -6,8 +6,10 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import re +from math import ceil from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.utils.logging import default_log +from calibre.utils.wordcount import get_wordcount_obj class PreProcessor(object): @@ -17,6 +19,9 @@ class PreProcessor(object): self.found_indents = 0 self.extra_opts = extra_opts + def is_pdftohtml(self, src): + return '<!-- created by calibre\'s pdftohtml -->' in src[:1000] + def chapter_head(self, match): chap = match.group('chap') title = match.group('title') @@ -64,7 +69,7 @@ class PreProcessor(object): inspect. Percent is the minimum percent of line endings which should be marked up to return true. ''' - htm_end_ere = re.compile('</p>', re.DOTALL) + htm_end_ere = re.compile('</(p|div)>', re.DOTALL) line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) htm_end = htm_end_ere.findall(raw) line_end = line_end_ere.findall(raw) @@ -101,36 +106,125 @@ class PreProcessor(object): with open(os.path.join(odir, name), 'wb') as f: f.write(raw.encode('utf-8')) + def get_word_count(self, html): + word_count_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html) + word_count_text = re.sub(r'<[^>]*>', '', word_count_text) + wordcount = get_wordcount_obj(word_count_text) + return wordcount.words + + def markup_chapters(self, html, wordcount, blanks_between_paragraphs): + # Typical chapters are between 2000 and 7000 words, use the larger number to decide the + # minimum of chapters to search for + self.min_chapters = 1 + if wordcount > 7000: + self.min_chapters = int(ceil(wordcount / 7000.)) + #print "minimum chapters required are: "+str(self.min_chapters) + heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) + self.html_preprocess_sections = len(heading.findall(html)) + self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") + + # Build the Regular Expressions in pieces + init_lookahead = "(?=<(p|div))" + chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" + title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" + chapter_header_open = r"(?P<chap>" + title_header_open = r"(?P<title>" + chapter_header_close = ")\s*" + title_header_close = ")" + chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>" + title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>" + + is_pdftohtml = self.is_pdftohtml(html) + if is_pdftohtml: + chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*" + chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>" + title_line_open = "<(?P<outer2>p)[^>]*>\s*" + title_line_close = "\s*</(?P=outer2)>" + + + if blanks_between_paragraphs: + blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*" + else: + blank_lines = "" + opt_title_open = "(" + opt_title_close = ")?" + n_lookahead_open = "\s+(?!" + n_lookahead_close = ")" + + default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)" + + chapter_types = [ + [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"], + [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#鈥+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines + [r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters + [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"], # Spaced Lettering + [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles + [r"[^'\"]?(\d+|CHAPTER)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, "Searching for simple numeric chapter headings"], # Numeric Chapters, no dot or colon + [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters + ] + + # Start with most typical chapter headings, get more aggressive until one works + for [chapter_type, lookahead_ignorecase, log_message] in chapter_types: + if self.html_preprocess_sections >= self.min_chapters: + break + full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close + n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) + self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message) + if lookahead_ignorecase: + chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close + chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE) + else: + chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close + chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE) + html = chapdetect.sub(self.chapter_head, html) + + words_per_chptr = wordcount + if words_per_chptr > 0 and self.html_preprocess_sections > 0: + words_per_chptr = wordcount / self.html_preprocess_sections + self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") + return html + + + def __call__(self, html): self.log("********* Preprocessing HTML *********") + # Count the words in the document to estimate how many chapters to look for and whether + # other types of processing are attempted + totalwords = 0 + totalwords = self.get_word_count(html) + + if totalwords < 20: + self.log("not enough text, not preprocessing") + return html + # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly - html = re.sub(r"\s*</p>", "</p>\n", html) - html = re.sub(r"\s*<p(?P<style>[^>]*)>\s*", "\n<p"+"\g<style>"+">", html) + html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html) + html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html) ###### Check Markup ###### # # some lit files don't have any <p> tags or equivalent (generally just plain text between # <pre> tags), check and mark up line endings if required before proceeding if self.no_markup(html, 0.1): - self.log("not enough paragraph markers, adding now") - # check if content is in pre tags, use txt processor to mark up if so - pre = re.compile(r'<pre>', re.IGNORECASE) - if len(pre.findall(html)) == 1: - self.log("Running Text Processing") - from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \ - separate_paragraphs_single_line - outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL) - html = outerhtml.sub('\g<text>', html) - html = separate_paragraphs_single_line(html) - html = preserve_spaces(html) - html = convert_basic(html, epub_split_size_kb=0) - else: - # Add markup naively - # TODO - find out if there are cases where there are more than one <pre> tag or - # other types of unmarked html and handle them in some better fashion - add_markup = re.compile('(?<!>)(\n)') - html = add_markup.sub('</p>\n<p>', html) + self.log("not enough paragraph markers, adding now") + # check if content is in pre tags, use txt processor to mark up if so + pre = re.compile(r'<pre>', re.IGNORECASE) + if len(pre.findall(html)) == 1: + self.log("Running Text Processing") + from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \ + separate_paragraphs_single_line + outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL) + html = outerhtml.sub('\g<text>', html) + html = separate_paragraphs_single_line(html) + html = preserve_spaces(html) + html = convert_basic(html, epub_split_size_kb=0) + else: + # Add markup naively + # TODO - find out if there are cases where there are more than one <pre> tag or + # other types of unmarked html and handle them in some better fashion + add_markup = re.compile('(?<!>)(\n)') + html = add_markup.sub('</p>\n<p>', html) ###### Mark Indents/Cleanup ###### # @@ -141,12 +235,17 @@ class PreProcessor(object): self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles") # remove remaining non-breaking spaces html = re.sub(ur'\u00a0', ' ', html) + # Get rid of various common microsoft specific tags which can cause issues later # Get rid of empty <o:p> tags to simplify other processing html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) + # Delete microsoft 'smart' tags + html = re.sub('(?i)</?st1:\w+>', '', html) # Get rid of empty span, bold, & italics tags html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html) + # ADE doesn't render <br />, change to empty paragraphs + #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html) # If more than 40% of the lines are empty paragraphs and the user has enabled remove # paragraph spacing then delete blank lines to clean up spacing @@ -164,63 +263,16 @@ class PreProcessor(object): self.log("deleting blank lines") html = blankreg.sub('', html) elif float(len(blanklines)) / float(len(lines)) > 0.40: - blanks_between_paragraphs = True - #print "blanks between paragraphs is marked True" + blanks_between_paragraphs = True + #print "blanks between paragraphs is marked True" else: blanks_between_paragraphs = False + #self.dump(html, 'before_chapter_markup') # detect chapters/sections to match xpath or splitting logic # - # Build the Regular Expressions in pieces - init_lookahead = "(?=<(p|div))" - chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*" - title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*" - chapter_header_open = r"(?P<chap>" - title_header_open = r"(?P<title>" - chapter_header_close = ")\s*" - title_header_close = ")" - chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>" - title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>" - if blanks_between_paragraphs: - blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*" - else: - blank_lines = "" - opt_title_open = "(" - opt_title_close = ")?" - n_lookahead_open = "\s+(?!" - n_lookahead_close = ")" - - default_title = r"\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(?=<)" - - min_chapters = 10 - heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) - self.html_preprocess_sections = len(heading.findall(html)) - self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") - - chapter_types = [ - [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"], - [r"[^'\"]?(\d+\.?|CHAPTER)\s*([\dA-Z\-\'\"\?\.!#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters - [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#鈥+\s*)+)(\s*(?=[\w#\-*\s]+<)([\w#-*]+\s*){1,5}\s*)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines - [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles - [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters - ] - - # Start with most typical chapter headings, get more aggressive until one works - for [chapter_type, lookahead_ignorecase, log_message] in chapter_types: - if self.html_preprocess_sections >= min_chapters: - break - full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close - n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) - self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message) - if lookahead_ignorecase: - chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close - chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE) - else: - chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close - chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE) - - html = chapdetect.sub(self.chapter_head, html) + html = self.markup_chapters(html, totalwords, blanks_between_paragraphs) ###### Unwrap lines ###### @@ -247,7 +299,7 @@ class PreProcessor(object): # Calculate Length unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) - self.log("*** Median line length is " + unicode(length) + ", calculated with " + format + " format ***") + self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format") # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor if hardbreaks or unwrap_factor < 0.4: self.log("Unwrapping required, unwrapping Lines") @@ -260,7 +312,7 @@ class PreProcessor(object): self.log("Done dehyphenating") # Unwrap lines using punctation and line length #unwrap_quotes = re.compile(u"(?<=.{%i}\"')\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*(?=[a-z])" % length, re.UNICODE) - unwrap = re.compile(u"(?<=.{%i}([a-z盲毛茂枚眉脿猫矛貌霉谩膰茅铆贸艅艣煤芒锚卯么没莽膮臋偶谋,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) + unwrap = re.compile(u"(?<=.{%i}([a-z盲毛茂枚眉脿猫矛貌霉谩膰茅铆贸艅艣煤芒锚卯么没莽膮臋偶谋茫玫帽忙酶镁冒脽,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) html = unwrap.sub(' ', html) #check any remaining hyphens, but only unwrap if there is a match dehyphenator = Dehyphenator() @@ -276,7 +328,7 @@ class PreProcessor(object): html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) # If still no sections after unwrapping mark split points on lines with no punctuation - if self.html_preprocess_sections < 5: + if self.html_preprocess_sections < self.min_chapters: self.log("Looking for more split points based on punctuation," " currently have " + unicode(self.html_preprocess_sections)) chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#鈥+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index f6deab677a..4dd6e7c7ae 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -173,7 +173,7 @@ class FB2MLizer(object): if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml': self.oeb_book.spine.insert(0, title_item, True) # Create xhtml page to reference cover image so it can be used. - if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: + if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: id = unicode(self.oeb_book.metadata.cover[0]) cover_item = self.oeb_book.manifest.ids[id] if cover_item.media_type in OEB_RASTER_IMAGES: diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/fb2/input.py index 1f9a3ffe95..b019873d39 100644 --- a/src/calibre/ebooks/fb2/input.py +++ b/src/calibre/ebooks/fb2/input.py @@ -46,15 +46,19 @@ class FB2Input(InputFormatPlugin): log.debug('Parsing XML...') raw = stream.read().replace('\0', '') raw = xml_to_unicode(raw, strip_encoding_pats=True, - assume_utf8=True)[0] + assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) + if doc is None: + raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) + if doc is None: + raise ValueError('The FB2 file is not valid XML') stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index 3688abff3f..1b665bf94e 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' import os -from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader from calibre.ebooks.conversion.utils import PreProcessor @@ -18,30 +18,6 @@ class PDBInput(InputFormatPlugin): description = 'Convert PDB to HTML' file_types = set(['pdb']) - options = set([ - OptionRecommendation(name='paragraph_type', recommended_value='auto', - choices=['auto', 'block', 'single', 'print'], - help=_('Paragraph structure.\n' - 'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n' - '* auto: Try to auto detect paragraph type.\n' - '* block: Treat a blank line as a paragraph break.\n' - '* single: Assume every line is a paragraph.\n' - '* print: Assume every line starting with 2+ spaces or a tab ' - 'starts a paragraph.')), - OptionRecommendation(name='formatting_type', recommended_value='auto', - choices=['auto', 'none', 'markdown'], - help=_('Formatting used within the document.' - '* auto: Try to auto detect the document formatting.\n' - '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n' - '* markdown: Run the input though the markdown pre-processor. ' - 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), - OptionRecommendation(name='preserve_spaces', recommended_value=False, - help=_('Normally extra spaces are condensed into a single space. ' - 'With this option all spaces will be displayed.')), - OptionRecommendation(name="markdown_disable_toc", recommended_value=False, - help=_('Do not insert a Table of Contents into the output text.')), - ]) - def convert(self, stream, options, file_ext, log, accelerators): header = PdbHeaderReader(stream) @@ -60,4 +36,4 @@ class PDBInput(InputFormatPlugin): def preprocess_html(self, options, html): self.options = options preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None)) - return preprocessor(html) \ No newline at end of file + return preprocessor(html) diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py index c151551866..30b0c4c57c 100644 --- a/src/calibre/ebooks/pdb/pdf/reader.py +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -19,9 +19,6 @@ class Reader(FormatReader): self.stream = stream self.log = log self.options = options - setattr(self.options, 'new_pdf_engine', False) - setattr(self.options, 'no_images', False) - setattr(self.options, 'unwrap_factor', 0.45) def extract_content(self, output_dir): self.log.info('Extracting PDF...') @@ -31,7 +28,12 @@ class Reader(FormatReader): for x in xrange(self.header.section_count()): pdf.write(self.header.section_data(x)) - from calibre.customize.ui import plugin_for_input_format - pdf.seek(0) - return plugin_for_input_format('pdf').convert(pdf, self.options, - 'pdf', self.log, []) + from calibre.customize.ui import plugin_for_input_format + + pdf_plugin = plugin_for_input_format('pdf') + for option in pdf_plugin.options: + if not hasattr(self.options, option.option.name): + setattr(self.options, option.name, option.recommended_value) + + pdf.seek(0) + return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {}) diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py index c1dcef235d..aac72da7a8 100644 --- a/src/calibre/ebooks/tcr/input.py +++ b/src/calibre/ebooks/tcr/input.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' from cStringIO import StringIO -from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.compression.tcr import decompress class TCRInput(InputFormatPlugin): @@ -16,30 +16,6 @@ class TCRInput(InputFormatPlugin): description = 'Convert TCR files to HTML' file_types = set(['tcr']) - options = set([ - OptionRecommendation(name='paragraph_type', recommended_value='auto', - choices=['auto', 'block', 'single', 'print'], - help=_('Paragraph structure.\n' - 'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n' - '* auto: Try to auto detect paragraph type.\n' - '* block: Treat a blank line as a paragraph break.\n' - '* single: Assume every line is a paragraph.\n' - '* print: Assume every line starting with 2+ spaces or a tab ' - 'starts a paragraph.')), - OptionRecommendation(name='formatting_type', recommended_value='auto', - choices=['auto', 'none', 'markdown'], - help=_('Formatting used within the document.' - '* auto: Try to auto detect the document formatting.\n' - '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n' - '* markdown: Run the input though the markdown pre-processor. ' - 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), - OptionRecommendation(name='preserve_spaces', recommended_value=False, - help=_('Normally extra spaces are condensed into a single space. ' - 'With this option all spaces will be displayed.')), - OptionRecommendation(name="markdown_disable_toc", recommended_value=False, - help=_('Do not insert a Table of Contents into the output text.')), - ]) - def convert(self, stream, options, file_ext, log, accelerators): log.info('Decompressing text...') raw_txt = decompress(stream) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 8e3e8b10de..2f7892692c 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -256,8 +256,10 @@ class BookInfo(QWebView): % (left_pane, right_pane))) def mouseDoubleClickEvent(self, ev): - if self.width() - ev.x() < 25 or \ - self.height() - ev.y() < 25: + swidth = self.page().mainFrame().scrollBarGeometry(Qt.Vertical).width() + sheight = self.page().mainFrame().scrollBarGeometry(Qt.Horizontal).height() + if self.width() - ev.x() < swidth or \ + self.height() - ev.y() < sheight: # Filter out double clicks on the scroll bar ev.accept() else: diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index e5f72099fe..ea7a24510a 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import textwrap +import textwrap, codecs from functools import partial from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \ @@ -128,6 +128,7 @@ class Widget(QWidget): def get_value(self, g): from calibre.gui2.convert.xpath_wizard import XPathEdit from calibre.gui2.convert.regex_builder import RegexEdit + from calibre.gui2.widgets import EncodingComboBox ret = self.get_value_handler(g) if ret != 'this is a dummy return value, xcswx1avcx4x': return ret @@ -139,6 +140,13 @@ class Widget(QWidget): if not ans: ans = None return ans + elif isinstance(g, EncodingComboBox): + ans = unicode(g.currentText()).strip() + try: + codecs.lookup(ans) + except: + ans = '' + return ans elif isinstance(g, QComboBox): return unicode(g.currentText()) elif isinstance(g, QCheckBox): diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py deleted file mode 100644 index 16ff1ff236..0000000000 --- a/src/calibre/gui2/convert/pdb_input.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember <john@nachtimwald.com>' -__docformat__ = 'restructuredtext en' - -from calibre.gui2.convert.txt_input_ui import Ui_Form -from calibre.gui2.convert import Widget - -class PluginWidget(Widget, Ui_Form): - - TITLE = _('PDB Input') - HELP = _('Options specific to')+' PDB '+_('input') - COMMIT_NAME = 'pdb_input' - ICON = I('mimetypes/txt.png') - - def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, - ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces']) - self.db, self.book_id = db, book_id - for x in get_option('paragraph_type').option.choices: - self.opt_paragraph_type.addItem(x) - for x in get_option('formatting_type').option.choices: - self.opt_formatting_type.addItem(x) - self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/tcr_input.py b/src/calibre/gui2/convert/tcr_input.py deleted file mode 100644 index 366643ad5b..0000000000 --- a/src/calibre/gui2/convert/tcr_input.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember <john@nachtimwald.com>' -__docformat__ = 'restructuredtext en' - -from calibre.gui2.convert.txt_input_ui import Ui_Form -from calibre.gui2.convert import Widget - -class PluginWidget(Widget, Ui_Form): - - TITLE = _('TCR Input') - HELP = _('Options specific to')+' TCR '+_('input') - COMMIT_NAME = 'tcr_input' - ICON = I('mimetypes/txt.png') - - def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, - ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces']) - self.db, self.book_id = db, book_id - for x in get_option('paragraph_type').option.choices: - self.opt_paragraph_type.addItem(x) - for x in get_option('formatting_type').option.choices: - self.opt_formatting_type.addItem(x) - self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index d18cc61baf..a5f0f52425 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -61,7 +61,8 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format dtitle = unicode(mi.title) except: dtitle = repr(mi.title) - desc = _('Convert book %d of %d (%s)') % (i + 1, total, dtitle) + desc = _('Convert book %(num)d of %(total)d (%(title)s)') % \ + {'num':i + 1, 'total':total, 'title':dtitle} recs = cPickle.loads(d.recommendations) if d.opf_file is not None: diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index f77f23c154..13469f5622 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -449,7 +449,7 @@ class Document(QWebPage): # {{{ return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results def set_bottom_padding(self, amount): - s = QSize(-1, -1) if amount == 0 else QSize(self.width, + s = QSize(-1, -1) if amount == 0 else QSize(self.viewportSize().width(), self.height+amount) self.setPreferredContentsSize(s) @@ -820,6 +820,7 @@ class DocumentView(QWebView): # {{{ self.flipper.initialize(self.current_page_image()) self.manager.next_document() return + #oheight = self.document.height lower_limit = opos + delta_y # Max value of top y co-ord after scrolling max_y = self.document.height - window_height # The maximum possible top y co-ord if max_y < lower_limit: @@ -835,6 +836,7 @@ class DocumentView(QWebView): # {{{ if epf: self.flipper.initialize(self.current_page_image()) #print 'Document height:', self.document.height + #print 'Height change:', (self.document.height - oheight) max_y = self.document.height - window_height lower_limit = min(max_y, lower_limit) #print 'Scroll to:', lower_limit @@ -842,6 +844,7 @@ class DocumentView(QWebView): # {{{ self.document.scroll_to(self.document.xpos, lower_limit) actually_scrolled = self.document.ypos - opos #print 'After scroll pos:', self.document.ypos + #print 'Scrolled by:', self.document.ypos - opos self.find_next_blank_line(window_height - actually_scrolled) #print 'After blank line pos:', self.document.ypos if epf: diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index afc20ba21c..37799c4cbc 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -640,8 +640,8 @@ class BrowseServer(object): if fmt: href = self.opts.url_prefix + '/get/%s/%s_%d.%s'%( fmt, fname, id_, fmt) - rt = xml(_('Read %s in the %s format')%(args['title'], - fmt.upper()), True) + rt = xml(_('Read %(title)s in the %(fmt)s format')% \ + {'title':args['title'], 'fmt':fmt.upper()}, True) args['get_button'] = \ '<a href="%s" class="read" title="%s">%s</a>' % \ diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index ab0853add9..ead7cf1938 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -128,9 +128,9 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated, count = '' if item.category == 'authors' and \ tweaks['categories_use_field_for_author_name'] == 'author_sort': - name = xml(item.sort) + name = item.sort else: - name = xml(item.name) + name = item.name return E.entry( TITLE(name + ('' if not add_kind else ' (%s)'%item_kind)), ID(id_), diff --git a/src/calibre/translations/cs.po b/src/calibre/translations/cs.po index 77231346a8..3d4de14c39 100644 --- a/src/calibre/translations/cs.po +++ b/src/calibre/translations/cs.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2011-01-04 08:51+0000\n" -"Last-Translator: TomVal <Unknown>\n" +"PO-Revision-Date: 2011-01-06 11:10+0000\n" +"Last-Translator: schunka <Unknown>\n" "Language-Team: Czech <cs@li.org>\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-05 04:43+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:57+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43 @@ -799,7 +799,7 @@ msgstr "Spojit se s Sanda Bambook eBook 膷te膷kou" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25 msgid "Li Fanxi" -msgstr "" +msgstr "Li Fanxi" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41 msgid "Device IP Address (restart calibre after changing)" @@ -1126,11 +1126,11 @@ msgstr "Komunikovat se za艡铆zen铆m Trekstor" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:251 msgid "Communicate with the EEE Reader" -msgstr "" +msgstr "Prob铆h谩 spojen铆 se 膷te膷kou EEE Reader." #: /home/kovid/work/calibre/src/calibre/devices/misc.py:271 msgid "Communicate with the Nextbook Reader" -msgstr "" +msgstr "Prob铆h谩 spojen铆 se 膷te膷kou Nextbook Reader." #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17 msgid "Communicate with the Nokia 770 internet tablet." @@ -1174,11 +1174,11 @@ msgstr "Spojit se se Sony eBook reader" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61 msgid "All by title" -msgstr "" +msgstr "V拧e podle n谩zvu" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62 msgid "All by author" -msgstr "" +msgstr "V拧e podle autora" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64 msgid "" @@ -1226,7 +1226,7 @@ msgstr "Spojit se se Sovos reader." #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78 msgid "Communicate with the Sunstech EB700 reader." -msgstr "" +msgstr "Prob铆h谩 spojen铆 se 膷te膷kou Sunstech EB700." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258 msgid "Unable to detect the %s disk drive. Try rebooting." diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index 6418ab3d7d..a330704198 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: de\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2011-01-01 21:21+0000\n" -"Last-Translator: Kovid Goyal <Unknown>\n" +"PO-Revision-Date: 2011-01-07 02:17+0000\n" +"Last-Translator: heinz beck <Unknown>\n" "Language-Team: American English <kde-i18n-doc@lists.kde.org>\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" @@ -943,7 +943,7 @@ msgstr "Kommunikation mit dem PocketBook 301 Reader." #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233 msgid "Communicate with the PocketBook 602/603/902/903 reader." -msgstr "" +msgstr "verbinden mit PocketBook 602/603/902/903" #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252 msgid "Communicate with the PocketBook 701" @@ -1186,7 +1186,7 @@ msgstr "Kommunikation mit allen Sony eBook Readern." #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61 msgid "All by title" -msgstr "" +msgstr "nach Titel" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62 msgid "All by author" diff --git a/src/calibre/translations/gl.po b/src/calibre/translations/gl.po index 33708ef88c..597487b7dc 100644 --- a/src/calibre/translations/gl.po +++ b/src/calibre/translations/gl.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2011-01-02 13:21+0000\n" -"Last-Translator: Calidonia Hibernia <Unknown>\n" +"PO-Revision-Date: 2011-01-06 14:46+0000\n" +"Last-Translator: Ant贸n M茅ixome <meixome@gmail.com>\n" "Language-Team: dev@gl.openoffice.org\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Language: gl\n" @@ -5749,7 +5749,7 @@ msgstr "Tama帽o da mensaxe para a descrici贸n das miniaturas de portada" #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330 msgid " inch" -msgstr "" +msgstr " polgada" #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331 msgid "&Description note" @@ -10645,15 +10645,15 @@ msgstr "Nunca" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60 msgid "By first letter" -msgstr "" +msgstr "Pola primeira letra" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60 msgid "Disabled" -msgstr "" +msgstr "Desactivado" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:61 msgid "Partitioned" -msgstr "" +msgstr "Particionado" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:140 msgid "User Interface &layout (needs restart):" @@ -10709,7 +10709,7 @@ msgstr "Buscar mentres se escribe" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:152 msgid "Tags browser category partitioning method:" -msgstr "" +msgstr "M茅todo de particionado con categor铆as de etiquetas de navegaci贸n:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:153 msgid "" @@ -10719,10 +10719,15 @@ msgid "" "have a list of fixed-sized groups. Set to disabled\n" "if you never want subcategories" msgstr "" +"Escoller como as subcategor铆as de etiquetas de navegaci贸n se amosan cando\n" +"hai m谩is 铆tems que os do l铆mite. Seleccione por primeira\n" +"letra para ver unha lista A, B, C. Escolla particionado para\n" +"ter unha lista de grupos de tama帽o fixo. Escolla desactivado\n" +"se non vai querer nunca subcategor铆as" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:158 msgid "Collapse when more items than:" -msgstr "" +msgstr "Colapsar cando os 铆tems son m谩is de:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:159 msgid "" @@ -10730,6 +10735,10 @@ msgid "" "up into sub-categories. If the partition method is set to disable, this " "value is ignored." msgstr "" +"Se unha categor铆a de etiquetas de navegaci贸n ten m谩is ca este n煤mero de " +"铆tems, div铆dese\n" +"en subcategor铆as. Se o m茅todo de partici贸n se pon como desactivado, " +"ignorarase este valor." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:161 msgid "&Toolbar" @@ -11494,7 +11503,7 @@ msgstr "Mostrar todas as categor铆as" #: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:300 msgid "Change sub-categorization scheme" -msgstr "" +msgstr "Cambiar o esquema de subcategorizaci贸n" #: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:625 msgid "" diff --git a/src/calibre/translations/it.po b/src/calibre/translations/it.po index 73a13b051e..1be0988afd 100644 --- a/src/calibre/translations/it.po +++ b/src/calibre/translations/it.po @@ -9,13 +9,13 @@ msgstr "" "Project-Id-Version: calibre_calibre-it\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2011-01-02 22:45+0000\n" -"Last-Translator: Marco Ciampa <ciampix@libero.it>\n" +"PO-Revision-Date: 2011-01-06 15:33+0000\n" +"Last-Translator: Francesco Pasa <Unknown>\n" "Language-Team: italiano\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-04 13:53+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n" "X-Generator: Launchpad (build Unknown)\n" "X-Poedit-Bookmarks: -1,-1,-1,-1,-1,1105,-1,1312,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -5694,7 +5694,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330 msgid " inch" -msgstr "" +msgstr " pollice" #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331 msgid "&Description note" diff --git a/src/calibre/translations/nds.po b/src/calibre/translations/nds.po index e4d1ad8f1a..80d6f376c3 100644 --- a/src/calibre/translations/nds.po +++ b/src/calibre/translations/nds.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: nds\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2010-10-18 00:57+0000\n" -"Last-Translator: Nils-Christoph Fiedler <ncfiedler@gnome.org>\n" +"PO-Revision-Date: 2011-01-07 02:48+0000\n" +"Last-Translator: heinz beck <Unknown>\n" "Language-Team: German\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-04 13:55+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n" "X-Generator: Launchpad (build Unknown)\n" "X-Poedit-Country: GERMANY\n" "X-Poedit-Language: German\n" diff --git a/src/calibre/translations/pt_BR.po b/src/calibre/translations/pt_BR.po index af6071797c..26d16546e6 100644 --- a/src/calibre/translations/pt_BR.po +++ b/src/calibre/translations/pt_BR.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "POT-Creation-Date: 2011-01-02 23:55+0000\n" -"PO-Revision-Date: 2010-12-18 05:47+0000\n" -"Last-Translator: Kovid Goyal <Unknown>\n" +"PO-Revision-Date: 2011-01-06 13:01+0000\n" +"Last-Translator: MoroniGranja <Unknown>\n" "Language-Team: American English <kde-i18n-doc@kde.org>\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-01-04 14:00+0000\n" +"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43 @@ -172,7 +172,7 @@ msgstr "Leitor de metadados" #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:266 msgid "Metadata writer" -msgstr "" +msgstr "Escritor de metadata" #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:296 msgid "Catalog generator" @@ -589,6 +589,8 @@ msgid "" "Intended for the Samsung Galaxy and similar tablet devices with a resolution " "of 600x1280" msgstr "" +"Planejado para o Samsung Galaxy e tablets similares com uma resolu莽茫o " +"de600x1280" #: /home/kovid/work/calibre/src/calibre/customize/profiles.py:471 msgid "This profile is intended for the Kobo Reader." @@ -695,7 +697,7 @@ msgstr "Desabilitar a extens茫o com nome" #: /home/kovid/work/calibre/src/calibre/debug.py:148 msgid "Debug log" -msgstr "" +msgstr "Log de Debug" #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:13 msgid "Communicate with Android phones." @@ -808,7 +810,7 @@ msgstr "Comunicar com iTunes." #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:24 msgid "Communicate with the Sanda Bambook eBook reader." -msgstr "" +msgstr "Comunicar com o leitor de eBooks Sanda Bambook" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25 msgid "Li Fanxi" @@ -817,17 +819,22 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41 msgid "Device IP Address (restart calibre after changing)" msgstr "" +"Endere莽o IP do dispositivo (茅 necess谩rio reiniciar calibre ap贸s modificar)" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:46 msgid "" "Unable to add book to library directly from Bambook. Please save the book to " "disk and add the file to library from disk." msgstr "" +"Imposs铆vel adicionar livro a biblioteca diretamente do Bambook. Favor salvar " +"o livro no disco e adicionar o arquivo do disco a biblioteca." #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:66 msgid "" "Unable to connect to Bambook, you need to install Bambook library first." msgstr "" +"N茫o foi poss铆vel conectar ao Bambook, 茅 necess谩rio instalar a biblioteca " +"Bambook." #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:74 msgid "" @@ -835,10 +842,13 @@ msgid "" "If you are trying to connect via Wi-Fi, please make sure the IP address of " "Bambook has been correctly configured." msgstr "" +"N茫o foi poss铆vel conectar ao Bambook. \n" +"Se voc锚 est谩 tentando conectar por Wi-Fi, favor confirmar se o endere莽o IP " +"do Bambook foi configurado corretamente." #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:111 msgid "Bambook" -msgstr "" +msgstr "Bambook" #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:217 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:233 @@ -899,7 +909,7 @@ msgstr "Enviando metadados ao dispositivo..." #: /home/kovid/work/calibre/src/calibre/devices/bambook/libbambookcore.py:132 msgid "Bambook SDK has not been installed." -msgstr "" +msgstr "Bambook SDK n茫o foi instalado." #: /home/kovid/work/calibre/src/calibre/devices/binatone/driver.py:17 msgid "Communicate with the Binatone Readme eBook reader." @@ -938,11 +948,11 @@ msgstr "Comunica-se com o leitor PocketBook 301" #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233 msgid "Communicate with the PocketBook 602/603/902/903 reader." -msgstr "" +msgstr "Comunicar-se com o PocketBook 602/603/902/903 reader." #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252 msgid "Communicate with the PocketBook 701" -msgstr "" +msgstr "Comunicar-se com o PocketBook 701" #: /home/kovid/work/calibre/src/calibre/devices/edge/driver.py:17 msgid "Entourage Edge" @@ -1069,6 +1079,8 @@ msgid "" "The Kobo supports only one collection currently: the \"Im_Reading\" list. " "Create a tag called \"Im_Reading\" " msgstr "" +"O Kobo aceita apenas uma cole莽茫o atualmente: a lista \"Estou_Lendo\". Crie " +"uma tag chamada \"Estou_Lendo\" " #: /home/kovid/work/calibre/src/calibre/devices/kobo/driver.py:446 #: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:279 @@ -1097,7 +1109,7 @@ msgstr "Comunicar com o Sweex MM300" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:79 msgid "Communicate with the Digma Q600" -msgstr "" +msgstr "Comunicar-se com o Digma Q600" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:88 msgid "Communicate with the Kogan" @@ -1110,7 +1122,7 @@ msgstr "Comunicar com o Pandigital Novel" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:142 msgid "Communicate with the VelocityMicro" -msgstr "" +msgstr "Comunicar-se com o VelocityMicro" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:160 msgid "Communicate with the GM2000" @@ -1118,23 +1130,23 @@ msgstr "Comunicar com o GM2000" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:180 msgid "Communicate with the Acer Lumiread" -msgstr "" +msgstr "Comunicar-se com o Acer Lumiread" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:211 msgid "Communicate with the Aluratek Color" -msgstr "" +msgstr "Comunicar-se com o Acer Lumiread" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:231 msgid "Communicate with the Trekstor" -msgstr "" +msgstr "Comunicar-se com o Trekstor" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:251 msgid "Communicate with the EEE Reader" -msgstr "" +msgstr "Comunicar-se com o EEE Reader" #: /home/kovid/work/calibre/src/calibre/devices/misc.py:271 msgid "Communicate with the Nextbook Reader" -msgstr "" +msgstr "Comunicar-se com o Nextbook Reader" #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17 msgid "Communicate with the Nokia 770 internet tablet." @@ -1142,7 +1154,7 @@ msgstr "Comunica-se com o Nokia 770 Internet Tablet." #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:40 msgid "Communicate with the Nokia 810/900 internet tablet." -msgstr "" +msgstr "Comunicar-se com o internet tablet Nokia 810/900." #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:74 msgid "Communicate with the Nokia E52" @@ -1158,11 +1170,11 @@ msgstr "Comunica-se com o leitor Nook." #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:85 msgid "Nook Color" -msgstr "" +msgstr "Nook Color" #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:86 msgid "Communicate with the Nook Color eBook reader." -msgstr "" +msgstr "Comunicar-se com o Nook Color." #: /home/kovid/work/calibre/src/calibre/devices/nuut2/driver.py:17 msgid "Communicate with the Nuut2 eBook reader." @@ -1178,11 +1190,11 @@ msgstr "Comunica-se com todos os leitores da Sony." #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61 msgid "All by title" -msgstr "" +msgstr "Todos por t铆tulo" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62 msgid "All by author" -msgstr "" +msgstr "Todos por autor" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64 msgid "" @@ -1198,6 +1210,9 @@ msgid "" "to the list to enable them. The collections will be given the name provided " "after the \":\" character." msgstr "" +". Duas cole莽玫es especiais est茫o dispon铆veis: %s:%s e %s:%s. Adicione estes " +"valores 脿 lista para habilita-los. As cole莽玫es receber茫o o nome ap贸s os dois " +"pontos (\":\")." #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:190 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/structure.py:68 @@ -1218,7 +1233,7 @@ msgstr "Comunica-se com o leitor Newsmy." #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:47 msgid "Communicate with the Pico reader." -msgstr "" +msgstr "Comunicar-se com o Pico reader." #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:57 msgid "Communicate with the iPapyrus reader." @@ -1230,7 +1245,7 @@ msgstr "Comunicar com o leitor Sovos." #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78 msgid "Communicate with the Sunstech EB700 reader." -msgstr "" +msgstr "Comunicar-se com o Sunstech EB700 reader." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258 msgid "Unable to detect the %s disk drive. Try rebooting." @@ -1254,6 +1269,8 @@ msgid "" "Unable to detect the %s disk drive. Either the device has already been " "ejected, or your kernel is exporting a deprecated version of SYSFS." msgstr "" +"N茫o foi poss铆vel detectar o disco %s. O dispositivo j谩 foi ejetado, ou o seu " +"kernel est谩 exportando uma vers茫o deprecada do SYSFS." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:617 msgid "Unable to mount main memory (Error code: %d)" @@ -1264,6 +1281,8 @@ msgid "" "The main memory of %s is read only. This usually happens because of file " "system errors." msgstr "" +"A mem贸ria principal de %s 茅 somente leitura. Isto normalmente acontece " +"devido a erros no sistema de arquivos." #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:816 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:818 @@ -1693,6 +1712,9 @@ msgid "" "is: %default. Links are only added to the TOC if less than the threshold " "number of chapters were detected." msgstr "" +"N煤mero m谩ximo de links para inserir no sum谩rio. Use 0 para desabilitar. O " +"padr茫o 茅: %default. Links ser茫o adicionados ao sum谩rio somente se o n煤mero " +"encontrado for menor que o limite m谩ximo de cap铆tulos." #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:256 msgid "" @@ -2076,7 +2098,7 @@ msgstr "Voc锚 deve especificar um arquivo do tipo epub" #: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:17 msgid "Fix unmanifested files" -msgstr "" +msgstr "Conserte arquivos sem manifesto." #: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:21 msgid "" diff --git a/src/calibre/translations/zh_CN.po b/src/calibre/translations/zh_CN.po index d040713c09..3a4d979238 100644 --- a/src/calibre/translations/zh_CN.po +++ b/src/calibre/translations/zh_CN.po @@ -12904,7 +12904,7 @@ msgstr "鍏跺畠鏍煎紡" #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:643 msgid "Read %s in the %s format" -msgstr "鐢 %2$s 鏍煎紡闃呰 %1$s" +msgstr "鐢 %s 鏍煎紡闃呰 %s" #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:648 msgid "Get" diff --git a/src/calibre/utils/wordcount.py b/src/calibre/utils/wordcount.py new file mode 100644 index 0000000000..cd0058fb2f --- /dev/null +++ b/src/calibre/utils/wordcount.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +""" +Get word, character, and Asian character counts + +1. Get a word count as a dictionary: + wc = get_wordcount(text) + words = wc['words'] # etc. + +2. Get a word count as an object + wc = get_wordcount_obj(text) + words = wc.words # etc. + +properties counted: + * characters + * chars_no_spaces + * asian_chars + * non_asian_words + * words + +Sourced from: +http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/ +http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/ +""" +__version__ = 0.1 +__author__ = "Ryan Ginstrom" + +IDEOGRAPHIC_SPACE = 0x3000 + +def is_asian(char): + """Is the character Asian?""" + + # 0x3000 is ideographic space (i.e. double-byte space) + # Anything over is an Asian character + return ord(char) > IDEOGRAPHIC_SPACE + +def filter_jchars(c): + """Filters Asian characters to spaces""" + if is_asian(c): + return ' ' + return c + +def nonj_len(word): + u"""Returns number of non-Asian words in {word} + - 鏃ユ湰瑾濧銈€偢銈€兂B -> 2 + - hello -> 1 + @param word: A word, possibly containing Asian characters + """ + # Here are the steps: + # 鏈瑂pam鏃ggs + # -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's'] + # -> ' spam eggs' + # -> ['spam', 'eggs'] + # The length of which is 2! + chars = [filter_jchars(c) for c in word] + return len(u''.join(chars).split()) + +def get_wordcount(text): + """Get the word/character count for text + + @param text: The text of the segment + """ + + characters = len(text) + chars_no_spaces = sum([not x.isspace() for x in text]) + asian_chars = sum([is_asian(x) for x in text]) + non_asian_words = nonj_len(text) + words = non_asian_words + asian_chars + + return dict(characters=characters, + chars_no_spaces=chars_no_spaces, + asian_chars=asian_chars, + non_asian_words=non_asian_words, + words=words) + +def dict2obj(dictionary): + """Transform a dictionary into an object""" + class Obj(object): + def __init__(self, dictionary): + self.__dict__.update(dictionary) + return Obj(dictionary) + +def get_wordcount_obj(text): + """Get the wordcount as an object rather than a dictionary""" + return dict2obj(get_wordcount(text))