merge from trunk

This commit is contained in:
Charles Haley 2011-01-07 20:15:41 +00:00
commit 6a745b68c1
30 changed files with 587 additions and 272 deletions

View File

@ -4,6 +4,100 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
- version: 0.7.38
date: 2011-01-07
new features:
- title: "Reduce startup time when using a composite custom column"
- title: "Template language: Add a list_item function for use with tags like columns. See User Manual for details"
- title: "TXT Input: Attempt to detect the input encoding when not specified. Auto detect paragraph structure and formatting markup."
- title: "Search & replace: Add ability to manipulate number and boolean columns."
- title: "Add type ahead completion to the advanced search dialog."
tickets: [8035]
- title: "Double click on plugin in Preferences dialog to customize"
tickets: [8175]
- title: "Allow customization of the SONY driver to send thumbnail to the device. Useful with newer SONY readers"
tickets: [8161]
- title: "Smarten punctuation: Convert double dashes to em dashes. Preprocessing: Various tweaks"
bug fixes:
- title: "Fix regression causing the template formatter to intepret a missing format letter as ERROR instead of 's'."
- title: "Fix regression that broke conversion of PNG images in PDF files on OS X."
tickets: [8215]
- title: "Content server: Fix improper XML escaping of category titles in the OPDS feeds"
tickets: [8225]
- title: "When decoding XML if the XML starts with a UTF-8 BOM decode as UTF-8. Fixes parsing of FB2 files with UTF-8 BOMs"
- title: "E-book viewer: When scrolling to a bookmark and the content is wider than the window, do not scroll in the horizontal direction"
- title: "E-book viewer: Fix next page skipping the bottom of chapters when the content is wider than the window."
tickets: [8153]
- title: " FB2 Output: Insert covers."
tickets: [8172]
- title: "Content server: When serving OPDS feeds handle html descriptions that have namespaced attributes."
tickets: [7938]
- title: "When downloading metadata from isbndb.com, download a maximum of 30 results rather than 1000"
- title: "Fix sorting of tags column"
- title: "Change search/replace to show commas instead of vertical bars as the separator for multiple authors"
- title: "Template language: Make all column names case insensitive"
- title: "Fix bug that prevent the Disabled option for Tag Browser partiotining from working in the Preferences dialog"
- title: "Fix bug when using tags like custom column in the template language"
- title: "Fix bug where composite custom columns using general_program_mode fields are not evaluated correctly when used in a template."
- title: "ImageMagick interface: Don't crash when asked to open empty image files"
- title: "Kobo driver: Add TXT,CBZ,CBR to supported formats list"
tickets: [8124]
- title: "Don't uneccessarily scroll the book list horizontally when re-selcting previously selected rows."
new recipes:
- title: "New London Day"
author: "Being"
- title: "Walla"
author: "marbs"
- title: "New Journal of Physics"
author: "Chema Cortes"
- title: "The Baltimore Sun"
author: "Josh Hall"
- title: "Arabian Business and Sunday Times (UK)"
author: "Darko Miletic"
- title: "Deia"
author: "Gerardo Diez"
- title: "Smarter Planet"
author: "Jack Mason"
improved recipes:
- The Atlantic
- Danas
- Ledevoir
- version: 0.7.37
date: 2011-01-02

View File

@ -0,0 +1,74 @@
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294342201(BasicNewsRecipe):
title = u'New London Day'
__author__ = 'Being'
description = 'State, local and business news from New London, CT'
language = 'en_GB'
oldest_article = 1
max_articles_per_feed = 200
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style'])]
remove_tags_after = [ {'class':['photo_article',]} ]
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
feeds = [
(u'All News', u'http://www.theday.com/section/rss'),
(u'Breaking News', u'http://www.theday.com/section/rss01'),
(u'Police and Courts', u'http://www.theday.com/section/rss02'),
(u'State News', u'http://www.theday.com/section/rss03'),
(u'Local Business', u'http://www.theday.com/section/rss04'),
(u'Entertainment', u'http://www.theday.com/section/rss05'),
(u'Opinion', u'http://www.theday.com/section/rss06'),
(u'Casinos', u'http://www.theday.com/section/rss12'),
(u'Defense and Military', u'http://www.theday.com/section/rss14'),
(u'Ann Baldelli Ruminations', u'http://www.theday.com/section/rss20'),
(u'Paul Choiniere Ruminations', u'http://www.theday.com/section/rss21'),
(u'Michael Costanza Omnivore', u'http://www.theday.com/section/rss23'),
(u'Rebecca Dangelo Reel Life', u'http://www.theday.com/section/rss25'),]
def print_version(self, url):
return url.replace('/index.html', '/print.html')
def get_article_url(self, article):
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
for tag in soup.findAll('form', dict(attrs={'name':["comments_form"]})):
tag.extract()
for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
tag.extract()
return soup

View File

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = 'Chema Cortés - 2011-01-05'
__copyright__ = u'Chema Cort\xe9s - 2011-01-05'
__version__ = 'v0.01'
__date__ = '2011-01-05'
'''

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'The WallaNews.'
cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/rsPhoto/sz_5/rsz_220_220_logo_walla.gif'
title = u'Walla'
language = 'he'
__author__ = 'marbs'
extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
simultaneous_downloads = 5
# remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
# remove_attributes = ['width']
keep_only_tags =dict(name='div', attrs={'class':'wp-0-b w3'})
remove_tags = [dict(name='div', attrs={'class':'tagsContainer'})]
max_articles_per_feed = 100
# preprocess_regexps = [
# (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
# ]
feeds = [(u'חדשות', u'http://rss.walla.co.il/?w=/1/0/1/@rss'),
(u'עסקים', u'http://rss.walla.co.il/?w=/2/3/1/@rss'),
(u'תרבות', u'http://rss.walla.co.il/?w=/4/249/1/@rss'),
(u'בריאות', u'http://rss.walla.co.il/?w=/5/18/1/@rss'),
(u'TECH', u'http://rss.walla.co.il/?w=/6/4/1/@rss'),
(u'אסטרולוגיה', u'http://rss.walla.co.il/?w=/8/3307/1/@rss'),
(u'בעלי חיים', u'http://rss.walla.co.il/?w=/59/5703/1/@rss'),
(u'רכב', u'http://rss.walla.co.il/?w=/31/4700/1/@rss'),
(u'סלבס', u'http://rss.walla.co.il/?w=/22/3600/1/@rss'),
(u'אוכל', u'http://rss.walla.co.il/?w=/9/903/1/@rss'),
(u'אופנה', u'http://rss.walla.co.il/?w=/24/2120/1/@rss'),
(u'ברנזה', u'http://rss.walla.co.il/?w=/27/3900/1/@rss'),
(u'ZONE', u'http://rss.walla.co.il/?w=/18/500/1/@rss'),
(u'ספורט', u'http://rss.walla.co.il/?w=/3/7/1/@rss')]
def print_version(self, url):
print_url = url + '/@@/item/printer'
return print_url

View File

@ -41,6 +41,7 @@ function scroll_to_bookmark(bookmark) {
$.scrollTo($(bm[0]), 1000,
{
over:ratio,
axis: 'y', // Do not scroll in the x direction
onAfter:function(){window.py_bridge.animated_scroll_done()}
}
);

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.7.37'
__version__ = '0.7.38'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -18,7 +18,7 @@
__version__ = "1.0"
import re
import re, codecs
def detect(aBuf):
import calibre.ebooks.chardet.universaldetector as universaldetector
@ -83,9 +83,11 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
if not raw:
return u'', encoding
if not isinstance(raw, unicode):
if raw.startswith('\xff\xfe'):
if raw.startswith(codecs.BOM_UTF8):
raw, encoding = raw.decode('utf-8')[1:], 'utf-8'
elif raw.startswith(codecs.BOM_UTF16_LE):
raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
elif raw.startswith('\xfe\xff'):
elif raw.startswith(codecs.BOM_UTF16_BE):
raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
if not isinstance(raw, unicode):
for pat in ENCODING_PATS:

View File

@ -51,16 +51,16 @@ def chap_head(match):
chap = match.group('chap')
title = match.group('title')
if not title:
return '<h1>'+chap+'</h1><br/>\n'
return '<h1>'+chap+'</h1><br/>\n'
else:
return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
def wrap_lines(match):
ital = match.group('ital')
if not ital:
return ' '
return ' '
else:
return ital+' '
return ital+' '
class DocAnalysis(object):
'''
@ -191,7 +191,7 @@ class Dehyphenator(object):
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
lookupword = self.removeprefix.sub('', lookupword)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
try:
searchresult = self.html.find(lookupword.lower())
@ -353,7 +353,7 @@ class HTMLPreProcessor(object):
(re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
# Center separator lines
(re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
(re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
# Remove page links
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
@ -363,13 +363,11 @@ class HTMLPreProcessor(object):
# Remove gray background
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
# Detect Chapters to match default XPATH in GUI
(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
# Cover the case where every letter in a chapter title is separated by a space
(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
# Convert line breaks to paragraphs
(re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'),
(re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'),
(re.compile(r'\s*</body>'), lambda match : '</p>\n</body>'),
# Have paragraphs show better
(re.compile(r'<br.*?>'), lambda match : '<p>'),
# Clean up spaces
(re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
# Add space before and after italics
@ -455,9 +453,9 @@ class HTMLPreProcessor(object):
# delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml:
# unwrap/delete soft hyphens
end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
end_rules.append((re.compile(u'[­](</p>\s*<p>\s*)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
# Make the more aggressive chapter marking regex optional with the preprocess option to
# reduce false positives and move after header/footer removal
@ -475,7 +473,7 @@ class HTMLPreProcessor(object):
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
end_rules.append(
# Un wrap using punctuation
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężı,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + start_rules:
@ -508,7 +506,15 @@ class HTMLPreProcessor(object):
if is_pdftohtml and length > -1:
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'pdf', length)
html = dehyphenator(html,'html', length)
if is_pdftohtml:
from calibre.ebooks.conversion.utils import PreProcessor
pdf_markup = PreProcessor(self.extra_opts, None)
totalwords = 0
totalwords = pdf_markup.get_word_count(html)
if totalwords > 7000:
html = pdf_markup.markup_chapters(html, totalwords, True)
#dump(html, 'post-preprocess')
@ -554,5 +560,9 @@ class HTMLPreProcessor(object):
html = smartyPants(html)
html = html.replace(start, '<!--')
html = html.replace(stop, '-->')
# convert ellipsis to entities to prevent wrapping
html = re.sub('(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
# convert double dashes to em-dash
html = re.sub('\s--\s', u'\u2014', html)
return substitute_entites(html)

View File

@ -6,8 +6,10 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log
from calibre.utils.wordcount import get_wordcount_obj
class PreProcessor(object):
@ -17,6 +19,9 @@ class PreProcessor(object):
self.found_indents = 0
self.extra_opts = extra_opts
def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
def chapter_head(self, match):
chap = match.group('chap')
title = match.group('title')
@ -64,7 +69,7 @@ class PreProcessor(object):
inspect. Percent is the minimum percent of line endings which should
be marked up to return true.
'''
htm_end_ere = re.compile('</p>', re.DOTALL)
htm_end_ere = re.compile('</(p|div)>', re.DOTALL)
line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
htm_end = htm_end_ere.findall(raw)
line_end = line_end_ere.findall(raw)
@ -101,36 +106,125 @@ class PreProcessor(object):
with open(os.path.join(odir, name), 'wb') as f:
f.write(raw.encode('utf-8'))
def get_word_count(self, html):
word_count_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
word_count_text = re.sub(r'<[^>]*>', '', word_count_text)
wordcount = get_wordcount_obj(word_count_text)
return wordcount.words
def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
# Typical chapters are between 2000 and 7000 words, use the larger number to decide the
# minimum of chapters to search for
self.min_chapters = 1
if wordcount > 7000:
self.min_chapters = int(ceil(wordcount / 7000.))
#print "minimum chapters required are: "+str(self.min_chapters)
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
# Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))"
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
chapter_header_open = r"(?P<chap>"
title_header_open = r"(?P<title>"
chapter_header_close = ")\s*"
title_header_close = ")"
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
is_pdftohtml = self.is_pdftohtml(html)
if is_pdftohtml:
chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
title_line_open = "<(?P<outer2>p)[^>]*>\s*"
title_line_close = "\s*</(?P=outer2)>"
if blanks_between_paragraphs:
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
else:
blank_lines = ""
opt_title_open = "("
opt_title_close = ")?"
n_lookahead_open = "\s+(?!"
n_lookahead_close = ")"
default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
chapter_types = [
[r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"],
[r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
[r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters
[r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"], # Spaced Lettering
[r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
[r"[^'\"]?(\d+|CHAPTER)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, "Searching for simple numeric chapter headings"], # Numeric Chapters, no dot or colon
[r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
]
# Start with most typical chapter headings, get more aggressive until one works
for [chapter_type, lookahead_ignorecase, log_message] in chapter_types:
if self.html_preprocess_sections >= self.min_chapters:
break
full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
if lookahead_ignorecase:
chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
else:
chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close
chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE)
html = chapdetect.sub(self.chapter_head, html)
words_per_chptr = wordcount
if words_per_chptr > 0 and self.html_preprocess_sections > 0:
words_per_chptr = wordcount / self.html_preprocess_sections
self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters")
return html
def __call__(self, html):
self.log("********* Preprocessing HTML *********")
# Count the words in the document to estimate how many chapters to look for and whether
# other types of processing are attempted
totalwords = 0
totalwords = self.get_word_count(html)
if totalwords < 20:
self.log("not enough text, not preprocessing")
return html
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p(?P<style>[^>]*)>\s*", "\n<p"+"\g<style>"+">", html)
html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html)
html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html)
###### Check Markup ######
#
# some lit files don't have any <p> tags or equivalent (generally just plain text between
# <pre> tags), check and mark up line endings if required before proceeding
if self.no_markup(html, 0.1):
self.log("not enough paragraph markers, adding now")
# check if content is in pre tags, use txt processor to mark up if so
pre = re.compile(r'<pre>', re.IGNORECASE)
if len(pre.findall(html)) == 1:
self.log("Running Text Processing")
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
separate_paragraphs_single_line
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
html = outerhtml.sub('\g<text>', html)
html = separate_paragraphs_single_line(html)
html = preserve_spaces(html)
html = convert_basic(html, epub_split_size_kb=0)
else:
# Add markup naively
# TODO - find out if there are cases where there are more than one <pre> tag or
# other types of unmarked html and handle them in some better fashion
add_markup = re.compile('(?<!>)(\n)')
html = add_markup.sub('</p>\n<p>', html)
self.log("not enough paragraph markers, adding now")
# check if content is in pre tags, use txt processor to mark up if so
pre = re.compile(r'<pre>', re.IGNORECASE)
if len(pre.findall(html)) == 1:
self.log("Running Text Processing")
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
separate_paragraphs_single_line
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
html = outerhtml.sub('\g<text>', html)
html = separate_paragraphs_single_line(html)
html = preserve_spaces(html)
html = convert_basic(html, epub_split_size_kb=0)
else:
# Add markup naively
# TODO - find out if there are cases where there are more than one <pre> tag or
# other types of unmarked html and handle them in some better fashion
add_markup = re.compile('(?<!>)(\n)')
html = add_markup.sub('</p>\n<p>', html)
###### Mark Indents/Cleanup ######
#
@ -141,12 +235,17 @@ class PreProcessor(object):
self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
# remove remaining non-breaking spaces
html = re.sub(ur'\u00a0', ' ', html)
# Get rid of various common microsoft specific tags which can cause issues later
# Get rid of empty <o:p> tags to simplify other processing
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\w+>', '', html)
# Get rid of empty span, bold, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
# ADE doesn't render <br />, change to empty paragraphs
#html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
# If more than 40% of the lines are empty paragraphs and the user has enabled remove
# paragraph spacing then delete blank lines to clean up spacing
@ -164,63 +263,16 @@ class PreProcessor(object):
self.log("deleting blank lines")
html = blankreg.sub('', html)
elif float(len(blanklines)) / float(len(lines)) > 0.40:
blanks_between_paragraphs = True
#print "blanks between paragraphs is marked True"
blanks_between_paragraphs = True
#print "blanks between paragraphs is marked True"
else:
blanks_between_paragraphs = False
#self.dump(html, 'before_chapter_markup')
# detect chapters/sections to match xpath or splitting logic
#
# Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))"
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
chapter_header_open = r"(?P<chap>"
title_header_open = r"(?P<title>"
chapter_header_close = ")\s*"
title_header_close = ")"
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
if blanks_between_paragraphs:
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
else:
blank_lines = ""
opt_title_open = "("
opt_title_close = ")?"
n_lookahead_open = "\s+(?!"
n_lookahead_close = ")"
default_title = r"\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(?=<)"
min_chapters = 10
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
chapter_types = [
[r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"],
[r"[^'\"]?(\d+\.?|CHAPTER)\s*([\dA-Z\-\'\"\?\.!#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters
[r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\w#\-*\s]+<)([\w#-*]+\s*){1,5}\s*)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
[r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
[r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
]
# Start with most typical chapter headings, get more aggressive until one works
for [chapter_type, lookahead_ignorecase, log_message] in chapter_types:
if self.html_preprocess_sections >= min_chapters:
break
full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
if lookahead_ignorecase:
chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
else:
chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close
chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE)
html = chapdetect.sub(self.chapter_head, html)
html = self.markup_chapters(html, totalwords, blanks_between_paragraphs)
###### Unwrap lines ######
@ -247,7 +299,7 @@ class PreProcessor(object):
# Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log("*** Median line length is " + unicode(length) + ", calculated with " + format + " format ***")
self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
if hardbreaks or unwrap_factor < 0.4:
self.log("Unwrapping required, unwrapping Lines")
@ -260,7 +312,7 @@ class PreProcessor(object):
self.log("Done dehyphenating")
# Unwrap lines using punctation and line length
#unwrap_quotes = re.compile(u"(?<=.{%i}\"')\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*(?=[a-z])" % length, re.UNICODE)
unwrap = re.compile(u"(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężı,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
unwrap = re.compile(u"(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
html = unwrap.sub(' ', html)
#check any remaining hyphens, but only unwrap if there is a match
dehyphenator = Dehyphenator()
@ -276,7 +328,7 @@ class PreProcessor(object):
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 5:
if self.html_preprocess_sections < self.min_chapters:
self.log("Looking for more split points based on punctuation,"
" currently have " + unicode(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)

View File

@ -173,7 +173,7 @@ class FB2MLizer(object):
if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
self.oeb_book.spine.insert(0, title_item, True)
# Create xhtml page to reference cover image so it can be used.
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES:

View File

@ -46,15 +46,19 @@ class FB2Input(InputFormatPlugin):
log.debug('Parsing XML...')
raw = stream.read().replace('\0', '')
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
assume_utf8=True, resolve_entities=True)[0]
try:
doc = etree.fromstring(raw)
except etree.XMLSyntaxError:
try:
doc = etree.fromstring(raw, parser=RECOVER_PARSER)
if doc is None:
raise Exception('parse failed')
except:
doc = etree.fromstring(raw.replace('& ', '&amp;'),
parser=RECOVER_PARSER)
if doc is None:
raise ValueError('The FB2 file is not valid XML')
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
css = ''
for s in stylesheets:

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
from calibre.ebooks.conversion.utils import PreProcessor
@ -18,30 +18,6 @@ class PDBInput(InputFormatPlugin):
description = 'Convert PDB to HTML'
file_types = set(['pdb'])
options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto',
choices=['auto', 'block', 'single', 'print'],
help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
'* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
'starts a paragraph.')),
OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'markdown'],
help=_('Formatting used within the document.'
'* auto: Try to auto detect the document formatting.\n'
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
'* markdown: Run the input though the markdown pre-processor. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
help=_('Normally extra spaces are condensed into a single space. '
'With this option all spaces will be displayed.')),
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
help=_('Do not insert a Table of Contents into the output text.')),
])
def convert(self, stream, options, file_ext, log,
accelerators):
header = PdbHeaderReader(stream)

View File

@ -19,9 +19,6 @@ class Reader(FormatReader):
self.stream = stream
self.log = log
self.options = options
setattr(self.options, 'new_pdf_engine', False)
setattr(self.options, 'no_images', False)
setattr(self.options, 'unwrap_factor', 0.45)
def extract_content(self, output_dir):
self.log.info('Extracting PDF...')
@ -31,7 +28,12 @@ class Reader(FormatReader):
for x in xrange(self.header.section_count()):
pdf.write(self.header.section_data(x))
from calibre.customize.ui import plugin_for_input_format
pdf.seek(0)
return plugin_for_input_format('pdf').convert(pdf, self.options,
'pdf', self.log, [])
from calibre.customize.ui import plugin_for_input_format
pdf_plugin = plugin_for_input_format('pdf')
for option in pdf_plugin.options:
if not hasattr(self.options, option.option.name):
setattr(self.options, option.name, option.recommended_value)
pdf.seek(0)
return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {})

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.compression.tcr import decompress
class TCRInput(InputFormatPlugin):
@ -16,30 +16,6 @@ class TCRInput(InputFormatPlugin):
description = 'Convert TCR files to HTML'
file_types = set(['tcr'])
options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto',
choices=['auto', 'block', 'single', 'print'],
help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
'* auto: Try to auto detect paragraph type.\n'
'* block: Treat a blank line as a paragraph break.\n'
'* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab '
'starts a paragraph.')),
OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'markdown'],
help=_('Formatting used within the document.'
'* auto: Try to auto detect the document formatting.\n'
'* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
'* markdown: Run the input though the markdown pre-processor. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False,
help=_('Normally extra spaces are condensed into a single space. '
'With this option all spaces will be displayed.')),
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
help=_('Do not insert a Table of Contents into the output text.')),
])
def convert(self, stream, options, file_ext, log, accelerators):
log.info('Decompressing text...')
raw_txt = decompress(stream)

View File

@ -256,8 +256,10 @@ class BookInfo(QWebView):
% (left_pane, right_pane)))
def mouseDoubleClickEvent(self, ev):
if self.width() - ev.x() < 25 or \
self.height() - ev.y() < 25:
swidth = self.page().mainFrame().scrollBarGeometry(Qt.Vertical).width()
sheight = self.page().mainFrame().scrollBarGeometry(Qt.Horizontal).height()
if self.width() - ev.x() < swidth or \
self.height() - ev.y() < sheight:
# Filter out double clicks on the scroll bar
ev.accept()
else:

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap
import textwrap, codecs
from functools import partial
from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \
@ -128,6 +128,7 @@ class Widget(QWidget):
def get_value(self, g):
from calibre.gui2.convert.xpath_wizard import XPathEdit
from calibre.gui2.convert.regex_builder import RegexEdit
from calibre.gui2.widgets import EncodingComboBox
ret = self.get_value_handler(g)
if ret != 'this is a dummy return value, xcswx1avcx4x':
return ret
@ -139,6 +140,13 @@ class Widget(QWidget):
if not ans:
ans = None
return ans
elif isinstance(g, EncodingComboBox):
ans = unicode(g.currentText()).strip()
try:
codecs.lookup(ans)
except:
ans = ''
return ans
elif isinstance(g, QComboBox):
return unicode(g.currentText())
elif isinstance(g, QCheckBox):

View File

@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.convert.txt_input_ui import Ui_Form
from calibre.gui2.convert import Widget
class PluginWidget(Widget, Ui_Form):
TITLE = _('PDB Input')
HELP = _('Options specific to')+' PDB '+_('input')
COMMIT_NAME = 'pdb_input'
ICON = I('mimetypes/txt.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
self.db, self.book_id = db, book_id
for x in get_option('paragraph_type').option.choices:
self.opt_paragraph_type.addItem(x)
for x in get_option('formatting_type').option.choices:
self.opt_formatting_type.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.convert.txt_input_ui import Ui_Form
from calibre.gui2.convert import Widget
class PluginWidget(Widget, Ui_Form):
TITLE = _('TCR Input')
HELP = _('Options specific to')+' TCR '+_('input')
COMMIT_NAME = 'tcr_input'
ICON = I('mimetypes/txt.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
self.db, self.book_id = db, book_id
for x in get_option('paragraph_type').option.choices:
self.opt_paragraph_type.addItem(x)
for x in get_option('formatting_type').option.choices:
self.opt_formatting_type.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -61,7 +61,8 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
dtitle = unicode(mi.title)
except:
dtitle = repr(mi.title)
desc = _('Convert book %d of %d (%s)') % (i + 1, total, dtitle)
desc = _('Convert book %(num)d of %(total)d (%(title)s)') % \
{'num':i + 1, 'total':total, 'title':dtitle}
recs = cPickle.loads(d.recommendations)
if d.opf_file is not None:

View File

@ -449,7 +449,7 @@ class Document(QWebPage): # {{{
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
def set_bottom_padding(self, amount):
s = QSize(-1, -1) if amount == 0 else QSize(self.width,
s = QSize(-1, -1) if amount == 0 else QSize(self.viewportSize().width(),
self.height+amount)
self.setPreferredContentsSize(s)
@ -820,6 +820,7 @@ class DocumentView(QWebView): # {{{
self.flipper.initialize(self.current_page_image())
self.manager.next_document()
return
#oheight = self.document.height
lower_limit = opos + delta_y # Max value of top y co-ord after scrolling
max_y = self.document.height - window_height # The maximum possible top y co-ord
if max_y < lower_limit:
@ -835,6 +836,7 @@ class DocumentView(QWebView): # {{{
if epf:
self.flipper.initialize(self.current_page_image())
#print 'Document height:', self.document.height
#print 'Height change:', (self.document.height - oheight)
max_y = self.document.height - window_height
lower_limit = min(max_y, lower_limit)
#print 'Scroll to:', lower_limit
@ -842,6 +844,7 @@ class DocumentView(QWebView): # {{{
self.document.scroll_to(self.document.xpos, lower_limit)
actually_scrolled = self.document.ypos - opos
#print 'After scroll pos:', self.document.ypos
#print 'Scrolled by:', self.document.ypos - opos
self.find_next_blank_line(window_height - actually_scrolled)
#print 'After blank line pos:', self.document.ypos
if epf:

View File

@ -640,8 +640,8 @@ class BrowseServer(object):
if fmt:
href = self.opts.url_prefix + '/get/%s/%s_%d.%s'%(
fmt, fname, id_, fmt)
rt = xml(_('Read %s in the %s format')%(args['title'],
fmt.upper()), True)
rt = xml(_('Read %(title)s in the %(fmt)s format')% \
{'title':args['title'], 'fmt':fmt.upper()}, True)
args['get_button'] = \
'<a href="%s" class="read" title="%s">%s</a>' % \

View File

@ -128,9 +128,9 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated,
count = ''
if item.category == 'authors' and \
tweaks['categories_use_field_for_author_name'] == 'author_sort':
name = xml(item.sort)
name = item.sort
else:
name = xml(item.name)
name = item.name
return E.entry(
TITLE(name + ('' if not add_kind else ' (%s)'%item_kind)),
ID(id_),

View File

@ -8,13 +8,13 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2011-01-04 08:51+0000\n"
"Last-Translator: TomVal <Unknown>\n"
"PO-Revision-Date: 2011-01-06 11:10+0000\n"
"Last-Translator: schunka <Unknown>\n"
"Language-Team: Czech <cs@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-05 04:43+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:57+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43
@ -799,7 +799,7 @@ msgstr "Spojit se s Sanda Bambook eBook čtečkou"
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25
msgid "Li Fanxi"
msgstr ""
msgstr "Li Fanxi"
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41
msgid "Device IP Address (restart calibre after changing)"
@ -1126,11 +1126,11 @@ msgstr "Komunikovat se zařízením Trekstor"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:251
msgid "Communicate with the EEE Reader"
msgstr ""
msgstr "Probíhá spojení se čtečkou EEE Reader."
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:271
msgid "Communicate with the Nextbook Reader"
msgstr ""
msgstr "Probíhá spojení se čtečkou Nextbook Reader."
#: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17
msgid "Communicate with the Nokia 770 internet tablet."
@ -1174,11 +1174,11 @@ msgstr "Spojit se se Sony eBook reader"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
msgid "All by title"
msgstr ""
msgstr "Vše podle názvu"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
msgid "All by author"
msgstr ""
msgstr "Vše podle autora"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64
msgid ""
@ -1226,7 +1226,7 @@ msgstr "Spojit se se Sovos reader."
#: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78
msgid "Communicate with the Sunstech EB700 reader."
msgstr ""
msgstr "Probíhá spojení se čtečkou Sunstech EB700."
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258
msgid "Unable to detect the %s disk drive. Try rebooting."

View File

@ -8,13 +8,13 @@ msgstr ""
"Project-Id-Version: de\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2011-01-01 21:21+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n"
"PO-Revision-Date: 2011-01-07 02:17+0000\n"
"Last-Translator: heinz beck <Unknown>\n"
"Language-Team: American English <kde-i18n-doc@lists.kde.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
"Generated-By: pygettext.py 1.5\n"
@ -943,7 +943,7 @@ msgstr "Kommunikation mit dem PocketBook 301 Reader."
#: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233
msgid "Communicate with the PocketBook 602/603/902/903 reader."
msgstr ""
msgstr "verbinden mit PocketBook 602/603/902/903"
#: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252
msgid "Communicate with the PocketBook 701"
@ -1186,7 +1186,7 @@ msgstr "Kommunikation mit allen Sony eBook Readern."
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
msgid "All by title"
msgstr ""
msgstr "nach Titel"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
msgid "All by author"

View File

@ -8,13 +8,13 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2011-01-02 13:21+0000\n"
"Last-Translator: Calidonia Hibernia <Unknown>\n"
"PO-Revision-Date: 2011-01-06 14:46+0000\n"
"Last-Translator: Antón Méixome <meixome@gmail.com>\n"
"Language-Team: dev@gl.openoffice.org\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
"Language: gl\n"
@ -5749,7 +5749,7 @@ msgstr "Tamaño da mensaxe para a descrición das miniaturas de portada"
#: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330
msgid " inch"
msgstr ""
msgstr " polgada"
#: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331
msgid "&Description note"
@ -10645,15 +10645,15 @@ msgstr "Nunca"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60
msgid "By first letter"
msgstr ""
msgstr "Pola primeira letra"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60
msgid "Disabled"
msgstr ""
msgstr "Desactivado"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:61
msgid "Partitioned"
msgstr ""
msgstr "Particionado"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:140
msgid "User Interface &layout (needs restart):"
@ -10709,7 +10709,7 @@ msgstr "Buscar mentres se escribe"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:152
msgid "Tags browser category partitioning method:"
msgstr ""
msgstr "Método de particionado con categorías de etiquetas de navegación:"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:153
msgid ""
@ -10719,10 +10719,15 @@ msgid ""
"have a list of fixed-sized groups. Set to disabled\n"
"if you never want subcategories"
msgstr ""
"Escoller como as subcategorías de etiquetas de navegación se amosan cando\n"
"hai máis ítems que os do límite. Seleccione por primeira\n"
"letra para ver unha lista A, B, C. Escolla particionado para\n"
"ter unha lista de grupos de tamaño fixo. Escolla desactivado\n"
"se non vai querer nunca subcategorías"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:158
msgid "Collapse when more items than:"
msgstr ""
msgstr "Colapsar cando os ítems son máis de:"
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:159
msgid ""
@ -10730,6 +10735,10 @@ msgid ""
"up into sub-categories. If the partition method is set to disable, this "
"value is ignored."
msgstr ""
"Se unha categoría de etiquetas de navegación ten máis ca este número de "
"ítems, divídese\n"
"en subcategorías. Se o método de partición se pon como desactivado, "
"ignorarase este valor."
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:161
msgid "&Toolbar"
@ -11494,7 +11503,7 @@ msgstr "Mostrar todas as categorías"
#: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:300
msgid "Change sub-categorization scheme"
msgstr ""
msgstr "Cambiar o esquema de subcategorización"
#: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:625
msgid ""

View File

@ -9,13 +9,13 @@ msgstr ""
"Project-Id-Version: calibre_calibre-it\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2011-01-02 22:45+0000\n"
"Last-Translator: Marco Ciampa <ciampix@libero.it>\n"
"PO-Revision-Date: 2011-01-06 15:33+0000\n"
"Last-Translator: Francesco Pasa <Unknown>\n"
"Language-Team: italiano\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-04 13:53+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
"X-Poedit-Bookmarks: -1,-1,-1,-1,-1,1105,-1,1312,-1,-1\n"
"Generated-By: pygettext.py 1.5\n"
@ -5694,7 +5694,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330
msgid " inch"
msgstr ""
msgstr " pollice"
#: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331
msgid "&Description note"

View File

@ -8,13 +8,13 @@ msgstr ""
"Project-Id-Version: nds\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2010-10-18 00:57+0000\n"
"Last-Translator: Nils-Christoph Fiedler <ncfiedler@gnome.org>\n"
"PO-Revision-Date: 2011-01-07 02:48+0000\n"
"Last-Translator: heinz beck <Unknown>\n"
"Language-Team: German\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-04 13:55+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
"X-Poedit-Country: GERMANY\n"
"X-Poedit-Language: German\n"

View File

@ -8,13 +8,13 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-01-02 23:55+0000\n"
"PO-Revision-Date: 2010-12-18 05:47+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n"
"PO-Revision-Date: 2011-01-06 13:01+0000\n"
"Last-Translator: MoroniGranja <Unknown>\n"
"Language-Team: American English <kde-i18n-doc@kde.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-01-04 14:00+0000\n"
"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n"
"X-Generator: Launchpad (build Unknown)\n"
#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43
@ -172,7 +172,7 @@ msgstr "Leitor de metadados"
#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:266
msgid "Metadata writer"
msgstr ""
msgstr "Escritor de metadata"
#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:296
msgid "Catalog generator"
@ -589,6 +589,8 @@ msgid ""
"Intended for the Samsung Galaxy and similar tablet devices with a resolution "
"of 600x1280"
msgstr ""
"Planejado para o Samsung Galaxy e tablets similares com uma resolução "
"de600x1280"
#: /home/kovid/work/calibre/src/calibre/customize/profiles.py:471
msgid "This profile is intended for the Kobo Reader."
@ -695,7 +697,7 @@ msgstr "Desabilitar a extensão com nome"
#: /home/kovid/work/calibre/src/calibre/debug.py:148
msgid "Debug log"
msgstr ""
msgstr "Log de Debug"
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:13
msgid "Communicate with Android phones."
@ -808,7 +810,7 @@ msgstr "Comunicar com iTunes."
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:24
msgid "Communicate with the Sanda Bambook eBook reader."
msgstr ""
msgstr "Comunicar com o leitor de eBooks Sanda Bambook"
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25
msgid "Li Fanxi"
@ -817,17 +819,22 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41
msgid "Device IP Address (restart calibre after changing)"
msgstr ""
"Endereço IP do dispositivo (é necessário reiniciar calibre após modificar)"
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:46
msgid ""
"Unable to add book to library directly from Bambook. Please save the book to "
"disk and add the file to library from disk."
msgstr ""
"Impossível adicionar livro a biblioteca diretamente do Bambook. Favor salvar "
"o livro no disco e adicionar o arquivo do disco a biblioteca."
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:66
msgid ""
"Unable to connect to Bambook, you need to install Bambook library first."
msgstr ""
"Não foi possível conectar ao Bambook, é necessário instalar a biblioteca "
"Bambook."
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:74
msgid ""
@ -835,10 +842,13 @@ msgid ""
"If you are trying to connect via Wi-Fi, please make sure the IP address of "
"Bambook has been correctly configured."
msgstr ""
"Não foi possível conectar ao Bambook. \n"
"Se você está tentando conectar por Wi-Fi, favor confirmar se o endereço IP "
"do Bambook foi configurado corretamente."
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:111
msgid "Bambook"
msgstr ""
msgstr "Bambook"
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:217
#: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:233
@ -899,7 +909,7 @@ msgstr "Enviando metadados ao dispositivo..."
#: /home/kovid/work/calibre/src/calibre/devices/bambook/libbambookcore.py:132
msgid "Bambook SDK has not been installed."
msgstr ""
msgstr "Bambook SDK não foi instalado."
#: /home/kovid/work/calibre/src/calibre/devices/binatone/driver.py:17
msgid "Communicate with the Binatone Readme eBook reader."
@ -938,11 +948,11 @@ msgstr "Comunica-se com o leitor PocketBook 301"
#: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233
msgid "Communicate with the PocketBook 602/603/902/903 reader."
msgstr ""
msgstr "Comunicar-se com o PocketBook 602/603/902/903 reader."
#: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252
msgid "Communicate with the PocketBook 701"
msgstr ""
msgstr "Comunicar-se com o PocketBook 701"
#: /home/kovid/work/calibre/src/calibre/devices/edge/driver.py:17
msgid "Entourage Edge"
@ -1069,6 +1079,8 @@ msgid ""
"The Kobo supports only one collection currently: the \"Im_Reading\" list. "
"Create a tag called \"Im_Reading\" "
msgstr ""
"O Kobo aceita apenas uma coleção atualmente: a lista \"Estou_Lendo\". Crie "
"uma tag chamada \"Estou_Lendo\" "
#: /home/kovid/work/calibre/src/calibre/devices/kobo/driver.py:446
#: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:279
@ -1097,7 +1109,7 @@ msgstr "Comunicar com o Sweex MM300"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:79
msgid "Communicate with the Digma Q600"
msgstr ""
msgstr "Comunicar-se com o Digma Q600"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:88
msgid "Communicate with the Kogan"
@ -1110,7 +1122,7 @@ msgstr "Comunicar com o Pandigital Novel"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:142
msgid "Communicate with the VelocityMicro"
msgstr ""
msgstr "Comunicar-se com o VelocityMicro"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:160
msgid "Communicate with the GM2000"
@ -1118,23 +1130,23 @@ msgstr "Comunicar com o GM2000"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:180
msgid "Communicate with the Acer Lumiread"
msgstr ""
msgstr "Comunicar-se com o Acer Lumiread"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:211
msgid "Communicate with the Aluratek Color"
msgstr ""
msgstr "Comunicar-se com o Acer Lumiread"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:231
msgid "Communicate with the Trekstor"
msgstr ""
msgstr "Comunicar-se com o Trekstor"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:251
msgid "Communicate with the EEE Reader"
msgstr ""
msgstr "Comunicar-se com o EEE Reader"
#: /home/kovid/work/calibre/src/calibre/devices/misc.py:271
msgid "Communicate with the Nextbook Reader"
msgstr ""
msgstr "Comunicar-se com o Nextbook Reader"
#: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17
msgid "Communicate with the Nokia 770 internet tablet."
@ -1142,7 +1154,7 @@ msgstr "Comunica-se com o Nokia 770 Internet Tablet."
#: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:40
msgid "Communicate with the Nokia 810/900 internet tablet."
msgstr ""
msgstr "Comunicar-se com o internet tablet Nokia 810/900."
#: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:74
msgid "Communicate with the Nokia E52"
@ -1158,11 +1170,11 @@ msgstr "Comunica-se com o leitor Nook."
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:85
msgid "Nook Color"
msgstr ""
msgstr "Nook Color"
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:86
msgid "Communicate with the Nook Color eBook reader."
msgstr ""
msgstr "Comunicar-se com o Nook Color."
#: /home/kovid/work/calibre/src/calibre/devices/nuut2/driver.py:17
msgid "Communicate with the Nuut2 eBook reader."
@ -1178,11 +1190,11 @@ msgstr "Comunica-se com todos os leitores da Sony."
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
msgid "All by title"
msgstr ""
msgstr "Todos por título"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
msgid "All by author"
msgstr ""
msgstr "Todos por autor"
#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64
msgid ""
@ -1198,6 +1210,9 @@ msgid ""
"to the list to enable them. The collections will be given the name provided "
"after the \":\" character."
msgstr ""
". Duas coleções especiais estão disponíveis: %s:%s e %s:%s. Adicione estes "
"valores à lista para habilita-los. As coleções receberão o nome após os dois "
"pontos (\":\")."
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:190
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/structure.py:68
@ -1218,7 +1233,7 @@ msgstr "Comunica-se com o leitor Newsmy."
#: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:47
msgid "Communicate with the Pico reader."
msgstr ""
msgstr "Comunicar-se com o Pico reader."
#: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:57
msgid "Communicate with the iPapyrus reader."
@ -1230,7 +1245,7 @@ msgstr "Comunicar com o leitor Sovos."
#: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78
msgid "Communicate with the Sunstech EB700 reader."
msgstr ""
msgstr "Comunicar-se com o Sunstech EB700 reader."
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258
msgid "Unable to detect the %s disk drive. Try rebooting."
@ -1254,6 +1269,8 @@ msgid ""
"Unable to detect the %s disk drive. Either the device has already been "
"ejected, or your kernel is exporting a deprecated version of SYSFS."
msgstr ""
"Não foi possível detectar o disco %s. O dispositivo já foi ejetado, ou o seu "
"kernel está exportando uma versão deprecada do SYSFS."
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:617
msgid "Unable to mount main memory (Error code: %d)"
@ -1264,6 +1281,8 @@ msgid ""
"The main memory of %s is read only. This usually happens because of file "
"system errors."
msgstr ""
"A memória principal de %s é somente leitura. Isto normalmente acontece "
"devido a erros no sistema de arquivos."
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:816
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:818
@ -1693,6 +1712,9 @@ msgid ""
"is: %default. Links are only added to the TOC if less than the threshold "
"number of chapters were detected."
msgstr ""
"Número máximo de links para inserir no sumário. Use 0 para desabilitar. O "
"padrão é: %default. Links serão adicionados ao sumário somente se o número "
"encontrado for menor que o limite máximo de capítulos."
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:256
msgid ""
@ -2076,7 +2098,7 @@ msgstr "Você deve especificar um arquivo do tipo epub"
#: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:17
msgid "Fix unmanifested files"
msgstr ""
msgstr "Conserte arquivos sem manifesto."
#: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:21
msgid ""

View File

@ -12904,7 +12904,7 @@ msgstr "其它格式"
#: /home/kovid/work/calibre/src/calibre/library/server/browse.py:643
msgid "Read %s in the %s format"
msgstr "用 %2$s 格式阅读 %1$s"
msgstr "用 %s 格式阅读 %s"
#: /home/kovid/work/calibre/src/calibre/library/server/browse.py:648
msgid "Get"

View File

@ -0,0 +1,85 @@
#!/usr/bin/python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
"""
Get word, character, and Asian character counts
1. Get a word count as a dictionary:
wc = get_wordcount(text)
words = wc['words'] # etc.
2. Get a word count as an object
wc = get_wordcount_obj(text)
words = wc.words # etc.
properties counted:
* characters
* chars_no_spaces
* asian_chars
* non_asian_words
* words
Sourced from:
http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
"""
__version__ = 0.1
__author__ = "Ryan Ginstrom"
IDEOGRAPHIC_SPACE = 0x3000
def is_asian(char):
"""Is the character Asian?"""
# 0x3000 is ideographic space (i.e. double-byte space)
# Anything over is an Asian character
return ord(char) > IDEOGRAPHIC_SPACE
def filter_jchars(c):
"""Filters Asian characters to spaces"""
if is_asian(c):
return ' '
return c
def nonj_len(word):
u"""Returns number of non-Asian words in {word}
- 日本語AアジアンB -> 2
- hello -> 1
@param word: A word, possibly containing Asian characters
"""
# Here are the steps:
# 本spam日eggs
# -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's']
# -> ' spam eggs'
# -> ['spam', 'eggs']
# The length of which is 2!
chars = [filter_jchars(c) for c in word]
return len(u''.join(chars).split())
def get_wordcount(text):
"""Get the word/character count for text
@param text: The text of the segment
"""
characters = len(text)
chars_no_spaces = sum([not x.isspace() for x in text])
asian_chars = sum([is_asian(x) for x in text])
non_asian_words = nonj_len(text)
words = non_asian_words + asian_chars
return dict(characters=characters,
chars_no_spaces=chars_no_spaces,
asian_chars=asian_chars,
non_asian_words=non_asian_words,
words=words)
def dict2obj(dictionary):
"""Transform a dictionary into an object"""
class Obj(object):
def __init__(self, dictionary):
self.__dict__.update(dictionary)
return Obj(dictionary)
def get_wordcount_obj(text):
"""Get the wordcount as an object rather than a dictionary"""
return dict2obj(get_wordcount(text))