From da679b885faf07c3218946d072b529259c5e6955 Mon Sep 17 00:00:00 2001 From: ldolse Date: Fri, 7 Jan 2011 11:26:45 +0800 Subject: [PATCH 01/26] chapter heading tweaks --- src/calibre/ebooks/conversion/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 4bb96ac088..2090cff12d 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -153,7 +153,7 @@ class PreProcessor(object): default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(]*>)?(?=<)" chapter_types = [ - [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"], + [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, "Searching for common Chapter Headings"], [r"]*>\s*(]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)()?\s*", True, "Searching for emphasized lines"], # Emphasized lines [r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"], # Spaced Lettering From dd96c645f020cd57682bbeba8501c21b8b77b0b9 Mon Sep 17 00:00:00 2001 From: ldolse Date: Fri, 7 Jan 2011 14:19:12 +0800 Subject: [PATCH 02/26] tied line histogram into txt paragraph structure detection --- src/calibre/ebooks/conversion/preprocess.py | 2 ++ src/calibre/ebooks/txt/input.py | 2 +- src/calibre/ebooks/txt/processor.py | 31 ++++++++++++++------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 29006ffd9b..97aaa653a9 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -78,6 +78,8 @@ class DocAnalysis(object): linere = re.compile('(?<=
)(?!\s*
).*?(?=
)', re.DOTALL) elif format == 'spanned_html': linere = re.compile('(?<=)', re.DOTALL) + elif format == 'txt': + linere = re.compile('.*?\n', re.DOTALL) self.lines = linere.findall(raw) def line_length(self, percent): diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 47e92a45a9..7fb22755de 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -90,7 +90,7 @@ class TXTInput(InputFormatPlugin): # We don't check for block because the processor assumes block. # single and print at transformed to block for processing. - if options.paragraph_type == 'single': + if options.paragraph_type == 'single' or 'unformatted': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': txt = separate_paragraphs_print_formatted(txt) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index f6d628e7c5..53935584d2 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -9,6 +9,7 @@ import os, re from calibre import prepare_string_for_xml, isbytestring from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.conversion.preprocess import DocAnalysis __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -102,26 +103,36 @@ def detect_paragraph_type(txt): print: Each paragraph starts with a 2+ spaces or a tab and ends when a new paragraph is reached. markdown: Markdown formatting is in the document. + unformatted: most lines have hard line breaks, few/no spaces or indents - returns block, single, print, markdown + returns block, single, print, markdown, unformatted ''' txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') txt_line_count = len(re.findall('(?mu)^\s*.+$', txt)) - # Check for print - tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) - if tab_line_count / float(txt_line_count) >= .25: - return 'print' + # Check for hard line breaks - true if 55% of the doc breaks in the same region + docanalysis = DocAnalysis('txt', txt) + hardbreaks = docanalysis.line_histogram(.55) - # Check for block - empty_line_count = len(re.findall('(?mu)^\s*$', txt)) - if empty_line_count / float(txt_line_count) >= .25: - return 'block' + if hardbreaks: + # Check for print + tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) + if tab_line_count / float(txt_line_count) >= .25: + return 'print' + + # Check for block + empty_line_count = len(re.findall('(?mu)^\s*$', txt)) + if empty_line_count / float(txt_line_count) >= .25: + return 'block' + + # Assume unformatted text with hardbreaks if nothing else matches + return 'unformatted' - # Nothing else matched to assume single. + # return single if hardbreaks is false return 'single' + def detect_formatting_type(txt): # Check for markdown # Headings From 633ad8c1f91e686a810f646ed3aaf777a5e2a1f0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 7 Jan 2011 13:55:51 -0700 Subject: [PATCH 03/26] IGN:Tag release --- src/calibre/translations/calibre.pot | 553 ++++++++++++++------------- 1 file changed, 285 insertions(+), 268 deletions(-) diff --git a/src/calibre/translations/calibre.pot b/src/calibre/translations/calibre.pot index f889e85bea..fdf44d7b08 100644 --- a/src/calibre/translations/calibre.pot +++ b/src/calibre/translations/calibre.pot @@ -4,9 +4,9 @@ # msgid "" msgstr "" -"Project-Id-Version: calibre 0.7.37\n" -"POT-Creation-Date: 2011-01-02 15:47+MST\n" -"PO-Revision-Date: 2011-01-02 15:47+MST\n" +"Project-Id-Version: calibre 0.7.38\n" +"POT-Creation-Date: 2011-01-07 13:12+MST\n" +"PO-Revision-Date: 2011-01-07 13:12+MST\n" "Last-Translator: Automatically generated\n" "Language-Team: LANGUAGE\n" "MIME-Version: 1.0\n" @@ -36,20 +36,20 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/ebooks/chm/metadata.py:56 #: /home/kovid/work/calibre/src/calibre/ebooks/comic/input.py:407 #: /home/kovid/work/calibre/src/calibre/ebooks/epub/periodical.py:127 -#: /home/kovid/work/calibre/src/calibre/ebooks/fb2/input.py:96 -#: /home/kovid/work/calibre/src/calibre/ebooks/fb2/input.py:98 +#: /home/kovid/work/calibre/src/calibre/ebooks/fb2/input.py:100 +#: /home/kovid/work/calibre/src/calibre/ebooks/fb2/input.py:102 #: /home/kovid/work/calibre/src/calibre/ebooks/html/input.py:332 #: /home/kovid/work/calibre/src/calibre/ebooks/html/input.py:335 #: /home/kovid/work/calibre/src/calibre/ebooks/lrf/html/convert_from.py:1894 #: /home/kovid/work/calibre/src/calibre/ebooks/lrf/html/convert_from.py:1896 #: /home/kovid/work/calibre/src/calibre/ebooks/lrf/output.py:24 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/__init__.py:235 -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:30 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:31 -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:71 -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:378 -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:383 -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:615 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:32 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:73 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:380 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:385 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:617 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/ereader.py:36 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/ereader.py:61 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/fb2.py:54 @@ -87,7 +87,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:118 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:173 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:174 -#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:40 +#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:27 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/palmdoc/writer.py:29 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ztxt/writer.py:27 #: /home/kovid/work/calibre/src/calibre/ebooks/pdf/manipulate/crop.py:82 @@ -144,8 +144,8 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/library/database2.py:2421 #: /home/kovid/work/calibre/src/calibre/library/database2.py:2552 #: /home/kovid/work/calibre/src/calibre/library/server/mobile.py:229 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:146 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:149 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:158 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:161 #: /home/kovid/work/calibre/src/calibre/library/server/xml.py:79 #: /home/kovid/work/calibre/src/calibre/utils/localization.py:118 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:46 @@ -160,27 +160,31 @@ msgstr "" msgid "Base" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:200 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:130 +msgid "Customize" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:294 msgid "File type" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:236 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:330 msgid "Metadata reader" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:266 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:360 msgid "Metadata writer" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:296 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:390 msgid "Catalog generator" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:405 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:499 msgid "User Interface Action" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:431 +#: /home/kovid/work/calibre/src/calibre/customize/__init__.py:525 #: /home/kovid/work/calibre/src/calibre/gui2/actions/preferences.py:18 #: /home/kovid/work/calibre/src/calibre/gui2/actions/preferences.py:23 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/main.py:189 @@ -618,11 +622,11 @@ msgstr "" msgid "Communicate with Android phones." msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:57 +#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:58 msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:101 +#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:102 msgid "Communicate with S60 phones." msgstr "" @@ -699,7 +703,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/devices/apple/driver.py:2554 #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi.py:63 -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:598 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:599 #: /home/kovid/work/calibre/src/calibre/library/database2.py:2246 #: /home/kovid/work/calibre/src/calibre/library/database2.py:2264 msgid "Catalog" @@ -1073,14 +1077,22 @@ msgstr "" msgid "All by author" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64 +#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:65 msgid "Comma separated list of metadata fields to turn into collections on the device. Possibilities include: " msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:67 +#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:68 msgid ". Two special collections are available: %s:%s and %s:%s. Add these values to the list to enable them. The collections will be given the name provided after the \":\" character." msgstr "" +#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:72 +msgid "Upload separate cover thumbnails for books (newer readers)" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:73 +msgid "Normally, the SONY readers get the cover image from the ebook file itself. With this option, calibre will send a separate cover image to the reader, useful if you are sending DRMed books in which you cannot change the cover. WARNING: This option should only be used with newer SONY readers: 350, 650, 950 and newer." +msgstr "" + #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:190 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/structure.py:68 msgid "Unnamed" @@ -1164,33 +1176,32 @@ msgstr "" msgid "Configure Device" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:37 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:51 msgid "settings for device drivers" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:39 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:53 msgid "Ordered list of formats the device will accept" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:41 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:55 msgid "Place files in sub directories if the device supports them" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:43 -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:86 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:57 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:81 msgid "Read metadata from files on device" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:45 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:59 msgid "Use author sort instead of author" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:47 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:61 msgid "Template to control how books are saved" msgstr "" -#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:50 -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:89 +#: /home/kovid/work/calibre/src/calibre/devices/usbms/deviceconfig.py:64 msgid "Extra customization" msgstr "" @@ -2000,23 +2011,23 @@ msgstr "" msgid "Extract common e-book formats from archives (zip/rar) files. Also try to autodetect if they are actually cbz/cbr files." msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:114 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:116 msgid "TEMPLATE ERROR" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:540 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:542 #: /home/kovid/work/calibre/src/calibre/gui2/custom_column_widgets.py:62 #: /home/kovid/work/calibre/src/calibre/gui2/custom_column_widgets.py:494 msgid "No" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:540 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:542 #: /home/kovid/work/calibre/src/calibre/gui2/custom_column_widgets.py:62 #: /home/kovid/work/calibre/src/calibre/gui2/custom_column_widgets.py:494 msgid "Yes" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:614 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:616 #: /home/kovid/work/calibre/src/calibre/ebooks/pdf/manipulate/info.py:45 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info.py:112 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info.py:113 @@ -2026,11 +2037,11 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:361 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:907 #: /home/kovid/work/calibre/src/calibre/library/field_metadata.py:304 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:578 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:590 msgid "Title" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:615 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:617 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:61 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:67 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:366 @@ -2038,18 +2049,18 @@ msgstr "" msgid "Author(s)" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:616 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:618 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:63 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:72 msgid "Publisher" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:617 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:619 #: /home/kovid/work/calibre/src/calibre/ebooks/pdf/manipulate/info.py:49 msgid "Producer" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:618 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:620 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:40 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:214 #: /home/kovid/work/calibre/src/calibre/gui2/convert/metadata_ui.py:189 @@ -2061,7 +2072,7 @@ msgstr "" msgid "Comments" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:620 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:622 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:166 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:30 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/tag_categories.py:60 @@ -2073,7 +2084,7 @@ msgstr "" msgid "Tags" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:622 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:624 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:164 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:29 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/tag_categories.py:60 @@ -2084,16 +2095,16 @@ msgstr "" msgid "Series" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:623 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:625 msgid "Language" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:625 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:627 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:1102 msgid "Timestamp" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:627 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:629 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:163 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:65 #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:70 @@ -2101,7 +2112,7 @@ msgstr "" msgid "Published" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:629 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/book/base.py:631 msgid "Rights" msgstr "" @@ -2611,18 +2622,6 @@ msgstr "" msgid "Sidebar" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:23 -#: /home/kovid/work/calibre/src/calibre/ebooks/tcr/input.py:23 -#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:24 -msgid "Normally calibre treats blank lines as paragraph markers. With this option it will assume that every line represents a paragraph instead." -msgstr "" - -#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:27 -#: /home/kovid/work/calibre/src/calibre/ebooks/tcr/input.py:27 -#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:28 -msgid "Normally calibre treats blank lines as paragraph markers. With this option it will assume that every line starting with an indent (either a tab or 2+ spaces) represents a paragraph. Paragraphs end when the next line that starts with an indent is reached." -msgstr "" - #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/output.py:23 msgid "Format to use inside the pdb container. Choices are:" msgstr "" @@ -2906,15 +2905,28 @@ msgstr "" msgid " (Preface)" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:34 +#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:26 +msgid "" +"Paragraph structure.\n" +"choices are ['auto', 'block', 'single', 'print', 'markdown']\n" +"* auto: Try to auto detect paragraph type.\n" +"* block: Treat a blank line as a paragraph break.\n" +"* single: Assume every line is a paragraph.\n" +"* print: Assume every line starting with 2+ spaces or a tab starts a paragraph." +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35 +msgid "" +"Formatting used within the document.* auto: Try to auto detect the document formatting.\n" +"* none: Do not modify the paragraph formatting. Everything is a paragraph.\n" +"* markdown: Run the input though the markdown pre-processor. To learn more about markdown see" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:41 msgid "Normally extra spaces are condensed into a single space. With this option all spaces will be displayed." msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:37 -msgid "Run the text input through the markdown pre-processor. To learn more about markdown see" -msgstr "" - -#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:40 +#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:44 msgid "Do not insert a Table of Contents into the output text." msgstr "" @@ -3338,7 +3350,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/actions/choose_library.py:152 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/toolbar.py:51 #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:167 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:114 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:126 msgid "%d books" msgstr "" @@ -3544,7 +3556,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/actions/copy_to_library.py:150 #: /home/kovid/work/calibre/src/calibre/gui2/device.py:680 -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:816 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:813 #: /home/kovid/work/calibre/src/calibre/gui2/metadata.py:190 msgid "Failed" msgstr "" @@ -4198,11 +4210,11 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/add_wizard/welcome_ui.py:71 #: /home/kovid/work/calibre/src/calibre/gui2/convert/debug_ui.py:57 #: /home/kovid/work/calibre/src/calibre/gui2/convert/debug_ui.py:58 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:142 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:143 #: /home/kovid/work/calibre/src/calibre/gui2/convert/metadata_ui.py:176 #: /home/kovid/work/calibre/src/calibre/gui2/convert/xexp_edit_ui.py:58 -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:84 -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:85 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:79 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:80 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/choose_library_ui.py:86 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single_ui.py:426 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single_ui.py:428 @@ -4264,7 +4276,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:26 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:53 #: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:62 -#: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:432 +#: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:434 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info.py:130 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info.py:131 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info.py:132 @@ -4311,7 +4323,7 @@ msgstr "" msgid "None" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:431 +#: /home/kovid/work/calibre/src/calibre/gui2/book_details.py:433 msgid "Double-click to open Book Details window" msgstr "" @@ -4328,7 +4340,6 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/fb2_output.py:15 #: /home/kovid/work/calibre/src/calibre/gui2/convert/lrf_output.py:20 #: /home/kovid/work/calibre/src/calibre/gui2/convert/mobi_output.py:20 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input.py:13 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output.py:17 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdf_input.py:13 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdf_output.py:18 @@ -4365,25 +4376,25 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/epub_output_ui.py:56 #: /home/kovid/work/calibre/src/calibre/gui2/convert/fb2_input_ui.py:33 #: /home/kovid/work/calibre/src/calibre/gui2/convert/fb2_output_ui.py:38 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:136 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:137 #: /home/kovid/work/calibre/src/calibre/gui2/convert/lrf_output_ui.py:120 #: /home/kovid/work/calibre/src/calibre/gui2/convert/metadata_ui.py:171 #: /home/kovid/work/calibre/src/calibre/gui2/convert/mobi_output_ui.py:74 #: /home/kovid/work/calibre/src/calibre/gui2/convert/page_setup_ui.py:120 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:36 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:46 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:47 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdf_input_ui.py:43 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdf_output_ui.py:47 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:42 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:46 #: /home/kovid/work/calibre/src/calibre/gui2/convert/rb_output_ui.py:33 #: /home/kovid/work/calibre/src/calibre/gui2/convert/snb_output_ui.py:42 #: /home/kovid/work/calibre/src/calibre/gui2/convert/structure_detection_ui.py:80 #: /home/kovid/work/calibre/src/calibre/gui2/convert/toc_ui.py:67 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:51 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:65 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:58 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:66 #: /home/kovid/work/calibre/src/calibre/gui2/convert/xexp_edit_ui.py:55 #: /home/kovid/work/calibre/src/calibre/gui2/convert/xpath_wizard_ui.py:72 -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:82 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:77 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/search_item_ui.py:40 #: /home/kovid/work/calibre/src/calibre/gui2/filename_pattern_ui.py:111 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/adding_ui.py:48 @@ -4859,7 +4870,6 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/comic_input.py:16 #: /home/kovid/work/calibre/src/calibre/gui2/convert/fb2_input.py:13 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input.py:13 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdf_input.py:13 #: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input.py:13 msgid "input" @@ -5026,15 +5036,15 @@ msgid "&Base font size:" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/font_key_ui.py:110 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:140 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:141 msgid "Font size &key:" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/font_key_ui.py:111 #: /home/kovid/work/calibre/src/calibre/gui2/convert/font_key_ui.py:115 #: /home/kovid/work/calibre/src/calibre/gui2/convert/font_key_ui.py:117 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:139 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:144 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:140 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:145 #: /home/kovid/work/calibre/src/calibre/gui2/convert/lrf_output_ui.py:123 #: /home/kovid/work/calibre/src/calibre/gui2/convert/lrf_output_ui.py:125 #: /home/kovid/work/calibre/src/calibre/gui2/convert/lrf_output_ui.py:130 @@ -5085,75 +5095,75 @@ msgstr "" msgid "Justify text" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:137 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:138 msgid "&Disable font size rescaling" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:138 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:139 msgid "Base &font size:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:141 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:142 msgid "Wizard to help you choose an appropriate font size key" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:143 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:144 msgid "Line &height:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:145 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:146 msgid "Input character &encoding:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:146 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:147 msgid "Remove &spacing between paragraphs" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:147 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:148 msgid "Indent size:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:148 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:149 msgid "

When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:149 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:150 msgid " em" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:150 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:151 msgid "Text justification:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:151 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:152 msgid "&Linearize tables" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:152 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:153 msgid "Extra &CSS" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:153 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:154 msgid "&Transliterate unicode characters to ASCII" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:154 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:155 msgid "Insert &blank line" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:155 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:156 msgid "Keep &ligatures" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:156 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:157 msgid "Smarten &punctuation" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:157 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:158 msgid "Minimum &line height:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:158 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/look_and_feel_ui.py:159 msgid " %" msgstr "" @@ -5410,17 +5420,11 @@ msgstr "" msgid "&Bottom:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input.py:12 -msgid "PDB Input" -msgstr "" - #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:37 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:52 msgid "Treat each &line as a paragraph" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:38 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:53 msgid "Assume print formatting" msgstr "" @@ -5428,20 +5432,20 @@ msgstr "" msgid "PDB Output" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:47 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:48 msgid "&Format:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:48 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:43 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:49 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:47 #: /home/kovid/work/calibre/src/calibre/gui2/convert/rb_output_ui.py:34 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:67 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:68 msgid "&Inline TOC" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:49 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:45 -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:73 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_output_ui.py:50 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:49 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:74 msgid "Output Encoding:" msgstr "" @@ -5477,7 +5481,7 @@ msgstr "" msgid "PMLZ Output" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:44 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/pmlz_output_ui.py:48 msgid "Do not reduce image size and depth" msgstr "" @@ -5684,47 +5688,55 @@ msgstr "" msgid "TXT Input" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:54 -msgid "Process using markdown" +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:59 +msgid "Paragraph style:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:55 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:60 +msgid "Preserve &spaces" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:61 +msgid "Formatting style:" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:62 +msgid "Markdown Options" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:63 msgid "

Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit markdown." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:56 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:64 msgid "Do not insert Table of Contents into output text when using markdown" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:57 -msgid "Preserve &spaces" -msgstr "" - #: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output.py:16 msgid "TXT Output" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:66 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:67 msgid "&Line ending style:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:68 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:69 msgid "&Maximum line length:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:69 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:70 msgid "Force maximum line length" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:70 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:71 msgid "Apply Markdown formatting to text" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:71 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:72 msgid "Do not remove links ( tags) before processing" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:72 +#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:73 msgid "Do not remove image references before processing" msgstr "" @@ -6051,37 +6063,39 @@ msgstr "" msgid "

Cannot upload books to device there is no more free space available " msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget.py:89 +#: +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget.py:118 #: /home/kovid/work/calibre/src/calibre/gui2/library/delegates.py:388 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugboard.py:234 -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:57 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:61 msgid "Invalid template" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget.py:90 +#: +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget.py:119 #: /home/kovid/work/calibre/src/calibre/gui2/library/delegates.py:389 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugboard.py:235 -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:58 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:62 msgid "The template %s is invalid:" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:83 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:78 msgid "Select available formats and their order for this device" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:87 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:82 msgid "Use sub directories" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:88 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:83 msgid "Use author sort for author" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:90 +#: /home/kovid/work/calibre/src/calibre/gui2/device_drivers/configwidget_ui.py:84 msgid "Save &template:" msgstr "" @@ -6118,7 +6132,7 @@ msgid "My Books" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/catalog_ui.py:80 -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:308 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:309 msgid "Generate catalog" msgstr "" @@ -6325,7 +6339,7 @@ msgstr "" #: #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/confirm_delete_location_ui.py:63 #: /home/kovid/work/calibre/src/calibre/gui2/layout.py:63 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:218 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:230 msgid "Library" msgstr "" @@ -6364,7 +6378,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/library/models.py:909 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/create_custom_column.py:33 #: /home/kovid/work/calibre/src/calibre/library/field_metadata.py:295 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:577 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:589 msgid "Date" msgstr "" @@ -6584,49 +6598,49 @@ msgstr "" msgid "Immediately make all changes without closing the dialog. This operation cannot be canceled or undone" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:338 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:339 msgid "Book %d:" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:353 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:354 msgid "You can destroy your library using this feature. Changes are permanent. There is no undo function. You are strongly encouraged to back up your library before proceeding.

Search and replace in text fields using character matching or regular expressions. " msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:361 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:362 msgid "In character mode, the field is searched for the entered search text. The text is replaced by the specified replacement text everywhere it is found in the specified field. After replacement is finished, the text can be changed to upper-case, lower-case, or title-case. If the case-sensitive check box is checked, the search text must match exactly. If it is unchecked, the search text will match both upper- and lower-case letters" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:372 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:373 msgid "In regular expression mode, the search text is an arbitrary python-compatible regular expression. The replacement text can contain backreferences to parenthesized expressions in the pattern. The search is not anchored, and can match and replace multiple times on the same string. The modification functions (lower-case etc) are applied to the matched text, not to the field as a whole. The destination box specifies the field where the result after matching and replacement is to be assigned. You can replace the text in the field, or prepend or append the matched text. See this reference for more information on python's regular expressions, and in particular the 'sub' function." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:427 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:428 msgid "S/R TEMPLATE ERROR" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:545 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:548 msgid "You must specify a destination when source is a composite field" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:654 -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:662 -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:757 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:651 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:659 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:754 msgid "Search/replace invalid" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:655 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:652 msgid "Authors cannot be set to the empty string. Book title %s not processed" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:663 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:660 msgid "Title cannot be set to the empty string. Book title %s not processed" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:758 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:755 msgid "Search pattern is invalid: %s" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:802 +#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:799 msgid "" "Applying changes to %d books.\n" "Phase {0} {1}%%." @@ -7448,7 +7462,7 @@ msgid "Advanced Search" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/search_ui.py:199 -msgid "What kind of match to use:" +msgid "&What kind of match to use:" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/search_ui.py:200 @@ -7927,7 +7941,7 @@ msgid "Attached, you will find the e-book" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/email.py:247 -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:107 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:106 msgid "by" msgstr "" @@ -9263,57 +9277,53 @@ msgstr "" msgid "Delete plugboard" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:100 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:99 msgid "%(plugin_type)s %(plugins)s" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:101 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:100 msgid "plugins" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:110 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:109 msgid "" "\n" "Customization: " msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:156 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:160 msgid "No valid plugin path" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:157 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:161 msgid "%s is not a valid plugin path" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:160 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:164 msgid "Choose plugin" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:172 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:176 msgid "Plugin cannot be disabled" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:173 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:177 msgid "The plugin: %s cannot be disabled" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:183 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:187 msgid "Plugin not customizable" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:184 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:188 msgid "Plugin: %s does not need customization" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:194 -msgid "Customize" -msgstr "" - -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:237 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:199 msgid "Cannot remove builtin plugin" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:238 +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:200 msgid " cannot be removed. It is a builtin plugin. Try disabling it instead." msgstr "" @@ -9353,6 +9363,14 @@ msgstr "" msgid "The lookup name of any custom field. These names begin with \"#\")" msgstr "" +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:57 +msgid "Constant template" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:58 +msgid "The template contains no {fields}, so all books will have the same name. Is this OK?" +msgstr "" + #: #: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template_ui.py:47 msgid "Save &template" @@ -9436,7 +9454,7 @@ msgid "Here you can control how calibre will save your books when you click the msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:75 -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:340 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:341 msgid "Failed to start content server" msgstr "" @@ -9836,101 +9854,104 @@ msgid "Add your own categories to the Tag Browser" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tools.py:64 -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:183 -msgid "Convert book %d of %d (%s)" -msgstr "" - -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:91 -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:203 -msgid "Could not convert some books" +msgid "Convert book %(num)d of %(total)d (%(title)s)" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tools.py:92 #: /home/kovid/work/calibre/src/calibre/gui2/tools.py:204 +msgid "Could not convert some books" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:93 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:205 msgid "Could not convert %d of %d books, because no suitable source format was found." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:121 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:122 msgid "Queueing books for bulk conversion" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:182 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:183 msgid "Queueing " msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:250 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:184 +msgid "Convert book %d of %d (%s)" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:251 msgid "Fetch news from " msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:320 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:321 msgid "Convert existing" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:321 +#: /home/kovid/work/calibre/src/calibre/gui2/tools.py:322 msgid "The following books have already been converted to %s format. Do you wish to reconvert them?" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:171 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:172 msgid "&Restore" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:173 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:174 msgid "&Donate to support calibre" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:177 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:178 msgid "&Eject connected device" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:218 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:219 msgid "Calibre Quick Start Guide" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:266 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:267 msgid "Debug mode" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:267 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:268 msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s

The log will be displayed automatically." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:451 -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:462 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:452 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:463 msgid "Conversion Error" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:463 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:464 msgid "

Could not convert: %s

It is a DRMed book. You must first remove the DRM using third party tools." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:477 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:478 msgid "Recipe Disabled" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:493 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:494 msgid "Failed" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:530 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:531 msgid "is the result of the efforts of many volunteers from all over the world. If you find it useful, please consider donating to support its development. Your donation helps keep calibre development going." msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:556 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:557 msgid "There are active jobs. Are you sure you want to quit?" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:559 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:560 msgid "" " is communicating with the device!
\n" " Quitting may cause corruption on the device.
\n" " Are you sure you want to quit?" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:563 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:564 msgid "WARNING: Active jobs" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:638 +#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:639 msgid "will keep running in the system tray. To close it, choose Quit in the context menu of the system tray." msgstr "" @@ -10462,19 +10483,19 @@ msgstr "" msgid "Swap Case" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:868 +#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:893 msgid "Drag to resize" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:903 +#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:928 msgid "Show" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:910 +#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:935 msgid "Hide" msgstr "" -#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:947 +#: /home/kovid/work/calibre/src/calibre/gui2/widgets.py:972 msgid "Toggle" msgstr "" @@ -10741,54 +10762,54 @@ msgstr "" msgid "Turn on the &content server" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:264 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:297 msgid "today" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:267 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:300 msgid "yesterday" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:270 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:303 msgid "thismonth" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:273 -#: /home/kovid/work/calibre/src/calibre/library/caches.py:274 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:306 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:307 msgid "daysago" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:453 -#: /home/kovid/work/calibre/src/calibre/library/caches.py:463 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:486 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:496 msgid "unchecked" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:453 -#: /home/kovid/work/calibre/src/calibre/library/caches.py:463 -#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:183 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:486 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:496 +#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:185 msgid "no" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:456 -#: /home/kovid/work/calibre/src/calibre/library/caches.py:466 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:489 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:499 msgid "checked" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:456 -#: /home/kovid/work/calibre/src/calibre/library/caches.py:466 -#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:183 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:489 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:499 +#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:185 msgid "yes" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:460 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:493 msgid "blank" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/caches.py:460 +#: /home/kovid/work/calibre/src/calibre/library/caches.py:493 msgid "empty" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:52 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:53 msgid "" "The fields to output when cataloging books in the database. Should be a comma-separated list of fields.\n" "Available fields: %s,\n" @@ -10797,7 +10818,7 @@ msgid "" "Applies to: CSV, XML output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:63 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:64 msgid "" "Output field to sort on.\n" "Available fields: author_sort, id, rating, size, timestamp, title.\n" @@ -10805,7 +10826,7 @@ msgid "" "Applies to: CSV, XML output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:230 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:231 msgid "" "The fields to output when cataloging books in the database. Should be a comma-separated list of fields.\n" "Available fields: %s.\n" @@ -10813,7 +10834,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:240 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:241 msgid "" "Output field to sort on.\n" "Available fields: author_sort, id, rating, size, timestamp, title.\n" @@ -10821,7 +10842,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:249 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:250 msgid "" "Create a citation for BibTeX entries.\n" "Boolean value: True, False\n" @@ -10829,7 +10850,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:258 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:259 msgid "" "The template for citation creation from database fields.\n" " Should be a template with {} enclosed fields.\n" @@ -10838,7 +10859,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:268 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:269 msgid "" "BibTeX file encoding output.\n" "Available types: utf8, cp1252, ascii.\n" @@ -10846,7 +10867,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:277 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:278 msgid "" "BibTeX file encoding flag.\n" "Available types: strict, replace, ignore, backslashreplace.\n" @@ -10854,7 +10875,7 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:286 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:287 msgid "" "Entry type for BibTeX catalog.\n" "Available types: book, misc, mixed.\n" @@ -10862,35 +10883,35 @@ msgid "" "Applies to: BIBTEX output format" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:571 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:572 msgid "" "Title of generated catalog used as title in metadata.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:578 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:579 msgid "" "Save the output from different stages of the conversion pipeline to the specified directory. Useful if you are unsure at which stage of the conversion process a bug is occurring.\n" "Default: '%default'None\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:588 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:589 msgid "" "field:pattern specifying custom field/contents indicating book should be excluded.\n" "Default: '%default'\n" "Applies to ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:595 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:596 msgid "" "Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[]'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:601 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:602 msgid "" "Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n" "--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n" @@ -10898,49 +10919,49 @@ msgid "" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:609 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:610 msgid "" "Include book descriptions in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:616 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:617 msgid "" "Include 'Genres' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:623 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:624 msgid "" "Include 'Titles' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:630 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:631 msgid "" "Include 'Series' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:637 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:638 msgid "" "Include 'Recently Added' section in catalog.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:644 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:645 msgid "" "Custom field containing note text to insert in Description header.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:651 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:652 msgid "" ":[before|after]:[True|False] specifying:\n" " Custom field containing notes to merge with Comments\n" @@ -10950,21 +10971,21 @@ msgid "" "Applies to ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:661 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:662 msgid "" "Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" "Default: '%default'\n" "Applies to: ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:668 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:669 msgid "" "field:pattern indicating book has been read.\n" "Default: '%default'\n" "Applies to ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:674 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:675 msgid "" "Size hint (in inches) for book covers in catalog.\n" "Range: 1.0 - 2.0\n" @@ -10972,7 +10993,7 @@ msgid "" "Applies to ePub, MOBI output formats" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/catalog.py:682 +#: /home/kovid/work/calibre/src/calibre/library/catalog.py:683 msgid "" "Tag indicating book to be displayed as wishlist item.\n" "Default: '%default'\n" @@ -11472,11 +11493,11 @@ msgid "" "For help on an individual command: %%prog command --help\n" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/custom_columns.py:557 +#: /home/kovid/work/calibre/src/calibre/library/custom_columns.py:573 msgid "No label was provided" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/custom_columns.py:559 +#: /home/kovid/work/calibre/src/calibre/library/custom_columns.py:575 msgid "The label must contain only lower case letters, digits and underscores, and start with a letter" msgstr "" @@ -11621,8 +11642,8 @@ msgstr "" msgid "Replace whitespace with underscores." msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:352 -#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:376 +#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:354 +#: /home/kovid/work/calibre/src/calibre/library/save_to_disk.py:378 msgid "Requested formats not available" msgstr "" @@ -11719,7 +11740,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:337 #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:547 -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:577 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:589 msgid "Newest" msgstr "" @@ -11757,7 +11778,7 @@ msgid "Other formats" msgstr "" #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:643 -msgid "Read %s in the %s format" +msgid "Read %(title)s in the %(fmt)s format" msgstr "" #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:648 @@ -11813,35 +11834,35 @@ msgstr "" msgid "Auto reload server when source code changes. May not work in all environments." msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:114 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:126 msgid "%d book" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:138 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:150 msgid "%d items" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:156 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:168 msgid "RATING: %s
" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:159 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:171 msgid "TAGS: %s
" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:164 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:176 msgid "SERIES: %s [%s]
" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:257 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:269 msgid "Books in your library" msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:263 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:275 msgid "By " msgstr "" -#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:264 +#: /home/kovid/work/calibre/src/calibre/library/server/opds.py:276 msgid "Books sorted by " msgstr "" @@ -11958,19 +11979,15 @@ msgstr "" msgid "switch requires an odd number of arguments" msgstr "" -#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:312 +#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:323 msgid "format: type {0} requires an integer value, got {1}" msgstr "" -#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:318 +#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:329 msgid "format: type {0} requires a decimal (float) value, got {1}" msgstr "" -#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:320 -msgid "format: unknown format type letter {0}" -msgstr "" - -#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:471 +#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:481 msgid "No such variable " msgstr "" From 0d5c69cd64efeb5e15f938a17920d26b2891aa51 Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 7 Jan 2011 18:46:14 -0500 Subject: [PATCH 04/26] Fix bug #8231. --- src/calibre/ebooks/pdb/palmdoc/reader.py | 6 +++--- src/calibre/ebooks/pdb/pdf/reader.py | 6 +++--- src/calibre/ebooks/pdb/ztxt/reader.py | 6 +++--- src/calibre/ebooks/tcr/input.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index 945e31559a..439492ba0c 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -65,9 +65,9 @@ class Reader(FormatReader): from calibre.customize.ui import plugin_for_input_format txt_plugin = plugin_for_input_format('txt') - for option in txt_plugin.options: - if not hasattr(self.options, option.option.name): - setattr(self.options, option.name, option.recommended_value) + for opt in txt_plugin.options: + if not hasattr(self.options, opt.option.name): + setattr(self.options, opt.option.name, opt.recommended_value) stream.seek(0) return txt_plugin.convert(stream, self.options, 'txt', self.log, {}) diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py index 30b0c4c57c..2a9636b083 100644 --- a/src/calibre/ebooks/pdb/pdf/reader.py +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -31,9 +31,9 @@ class Reader(FormatReader): from calibre.customize.ui import plugin_for_input_format pdf_plugin = plugin_for_input_format('pdf') - for option in pdf_plugin.options: - if not hasattr(self.options, option.option.name): - setattr(self.options, option.name, option.recommended_value) + for opt in pdf_plugin.options: + if not hasattr(self.options, opt.option.name): + setattr(self.options, opt.option.name, opt.recommended_value) pdf.seek(0) return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {}) diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 6e7f5dd923..cff7382754 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -83,9 +83,9 @@ class Reader(FormatReader): from calibre.customize.ui import plugin_for_input_format txt_plugin = plugin_for_input_format('txt') - for option in txt_plugin.options: - if not hasattr(self.options, option.option.name): - setattr(self.options, option.name, option.recommended_value) + for opt in txt_plugin.options: + if not hasattr(self.options, opt.option.name): + setattr(self.options, opt.option.name, opt.recommended_value) stream.seek(0) return txt_plugin.convert(stream, self.options, 'txt', self.log, {}) diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py index aac72da7a8..4d15fd0923 100644 --- a/src/calibre/ebooks/tcr/input.py +++ b/src/calibre/ebooks/tcr/input.py @@ -26,9 +26,9 @@ class TCRInput(InputFormatPlugin): from calibre.customize.ui import plugin_for_input_format txt_plugin = plugin_for_input_format('txt') - for option in txt_plugin.options: - if not hasattr(options, option.option.name): - setattr(options, option.name, option.recommended_value) + for opt in txt_plugin.options: + if not hasattr(self.options, opt.option.name): + setattr(self.options, opt.option.name, opt.recommended_value) stream.seek(0) return txt_plugin.convert(stream, options, From 1a0d6c13fb16d925d8ac728a5f216579ea75d7d0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 7 Jan 2011 17:53:43 -0700 Subject: [PATCH 05/26] Fix #8232 (Conversion bug in HTML to MOBI) --- src/calibre/ebooks/oeb/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c015868992..e11f6b45be 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1892,7 +1892,7 @@ class OEBBook(object): return fix_data(data.decode(bom_enc)) except UnicodeDecodeError: pass - if self.input_encoding is not None: + if self.input_encoding: try: return fix_data(data.decode(self.input_encoding, 'replace')) except UnicodeDecodeError: From 3f30de95a34dc2c3d418a962355f89f43f6c32db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 7 Jan 2011 18:18:43 -0700 Subject: [PATCH 06/26] ... --- src/calibre/ebooks/conversion/plumber.py | 2 ++ src/calibre/ebooks/html/input.py | 2 +- src/calibre/ebooks/mobi/reader.py | 2 +- src/calibre/gui2/convert/__init__.py | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index f5beba375d..b1d760ea2d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -977,6 +977,8 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None, from calibre.ebooks.oeb.base import OEBBook html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html, opts.preprocess_html, opts) + if not encoding: + encoding = None oeb = OEBBook(log, html_preprocessor, pretty_print=opts.pretty_print, input_encoding=encoding) if not populate: diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 6f875ae803..1f07f4ca41 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -119,7 +119,7 @@ class HTMLFile(object): self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096])) if not self.is_binary: - if encoding is None: + if not encoding: encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1] self.encoding = encoding else: diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 14e3ed11c3..57f32e7131 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -139,7 +139,7 @@ class BookHeader(object): 65001: 'utf-8', }[self.codepage] except (IndexError, KeyError): - self.codec = 'cp1252' if user_encoding is None else user_encoding + self.codec = 'cp1252' if not user_encoding else user_encoding log.warn('Unknown codepage %d. Assuming %s' % (self.codepage, self.codec)) if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length \ diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index ea7a24510a..925fecd693 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -146,6 +146,8 @@ class Widget(QWidget): codecs.lookup(ans) except: ans = '' + if not ans: + ans = None return ans elif isinstance(g, QComboBox): return unicode(g.currentText()) From 90177a42053f29c302faf7483de6dd3fc455d400 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sat, 8 Jan 2011 09:23:32 +0800 Subject: [PATCH 07/26] tweaked threshold for preprocess --- src/calibre/ebooks/conversion/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index f367aa02d7..5db920b01d 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -194,7 +194,7 @@ class PreProcessor(object): totalwords = 0 totalwords = self.get_word_count(html) - if totalwords < 20: + if totalwords < 50: self.log("not enough text, not preprocessing") return html From 5854f5308e46d1be747cf85d789d9ca9de78e80b Mon Sep 17 00:00:00 2001 From: ldolse Date: Sat, 8 Jan 2011 19:45:51 +0800 Subject: [PATCH 08/26] moved punctuation unwrap into a function, tied to txt input --- src/calibre/ebooks/conversion/utils.py | 20 +++++++++++++++++--- src/calibre/ebooks/txt/input.py | 10 ++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 5db920b01d..27dacdf5fb 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -184,7 +184,22 @@ class PreProcessor(object): self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") return html - + def punctuation_unwrap(self, length, content, format): + # define the pieces of the regex + lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?\s*()?" + blanklines = "\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*" + line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" + txt_line_wrap = u"(\u0020|\u0009)*\n" + + unwrap_regex = lookahead+line_ending+blanklines+line_opening + if format == 'txt': + unwrap_regex = lookahead+txt_line_wrap + + unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE) + content = unwrap.sub(' ', content) + return content + def __call__(self, html): self.log("********* Preprocessing HTML *********") @@ -312,8 +327,7 @@ class PreProcessor(object): self.log("Done dehyphenating") # Unwrap lines using punctation and line length #unwrap_quotes = re.compile(u"(?<=.{%i}\"')\s*\s*()?\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*(?=[a-z])" % length, re.UNICODE) - unwrap = re.compile(u"(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?\s*()?\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - html = unwrap.sub(' ', html) + html = self.punctuation_unwrap(length, html, 'html') #check any remaining hyphens, but only unwrap if there is a match dehyphenator = Dehyphenator() html = dehyphenator(html,'html_cleanup', length) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 7fb22755de..98756c5fa1 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -95,6 +95,16 @@ class TXTInput(InputFormatPlugin): elif options.paragraph_type == 'print': txt = separate_paragraphs_print_formatted(txt) + if options.paragraph_type == 'unformatted': + from calibre.ebooks.conversion.utils import PreProcessor + from calibre.ebooks.conversion.preprocess import DocAnalysis + # get length + docanalysis = DocAnalysis('txt', txt) + length = docanalysis.line_length(.5) + # unwrap lines based on punctuation + preprocessor = PreProcessor(options, log=getattr(self, 'log', None)) + txt = preprocessor.punctuation_unwrap(length, txt, 'txt') + flow_size = getattr(options, 'flow_size', 0) html = convert_basic(txt, epub_split_size_kb=flow_size) From f88045c16266474ed625a0e38b0a9fa12aded75d Mon Sep 17 00:00:00 2001 From: ldolse Date: Sat, 8 Jan 2011 20:35:19 +0800 Subject: [PATCH 09/26] fixed comments --- src/calibre/ebooks/txt/processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 53935584d2..c6cf1078cd 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -102,10 +102,9 @@ def detect_paragraph_type(txt): single: Each line is a paragraph. print: Each paragraph starts with a 2+ spaces or a tab and ends when a new paragraph is reached. - markdown: Markdown formatting is in the document. unformatted: most lines have hard line breaks, few/no spaces or indents - returns block, single, print, markdown, unformatted + returns block, single, print, unformatted ''' txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') From 09ff8524214cc51091f8ec8dca616e2675e40789 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 06:53:24 -0700 Subject: [PATCH 10/26] El Publico by Gerardo Diez. Fixes #405 (New news feed) --- resources/recipes/deia.recipe | 2 +- resources/recipes/el_publico.recipe | 43 +++++++++++++++++++++++++ resources/recipes/elpais_impreso.recipe | 8 ++--- 3 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 resources/recipes/el_publico.recipe diff --git a/resources/recipes/deia.recipe b/resources/recipes/deia.recipe index 980d59d3d1..5d39be9a10 100644 --- a/resources/recipes/deia.recipe +++ b/resources/recipes/deia.recipe @@ -22,7 +22,7 @@ class Deia(BasicNewsRecipe): cover_url ='http://2.bp.blogspot.com/_RjrWzC6tI14/TM6jrPLaBZI/AAAAAAAAFaI/ayffwxidFEY/s1600/2009-10-13-logo-deia.jpg' timefmt ='[%a, %d %b, %Y]' encoding ='utf8' - language ='es_ES' + language ='es' remove_javascript =True remove_tags_after =dict(id='Texto') remove_tags_before =dict(id='Texto') diff --git a/resources/recipes/el_publico.recipe b/resources/recipes/el_publico.recipe new file mode 100644 index 0000000000..d0da739b03 --- /dev/null +++ b/resources/recipes/el_publico.recipe @@ -0,0 +1,43 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Gerardo Diez' +__copyright__ = 'Gerardo Diez' +description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)' +__docformat__ = 'restructuredtext en' + +''' +publico.es +''' +from calibre.web.feeds.recipes import BasicNewsRecipe +class Publico(BasicNewsRecipe): + title =u'Publico.es' + __author__ ='Gerardo Diez' + publisher =u'Mediapubli Sociedad de Publicaciones y Ediciones S.L.' + category ='news, politics, finances, world, spain, science, catalunya' + oldest_article =1 + max_articles_per_feed =100 + simultaneous_downloads =10 + cover_url =u'http://imagenes.publico.es/css/img/logo_publico.gif' + timefmt ='[%a, %d %b, %Y]' + encoding ='utf8' + language ='es' + remove_javascript =True + no_stylesheets =True + keep_only_tags =dict(id='main') + remove_tags =[ + dict(name='div', attrs={'class':['Noticias_642x50', 'contInfo ancho']}), + dict(name='ul', attrs={'class':['navComentarios', 'comentarios']}), + dict(name='div', attrs={'id':['commentsContext', 'toolbar', 'comentarios']}), + dict(name='h5', attrs={'id':'comentarios'}) + ] + feeds =[(u'Internacional', u'http://www.publico.es/estaticos/rss/internacional'), + (u'Espa\xf1a', u'http://www.publico.es/estaticos/rss/espana'), + (u'Dinero', u'http://www.publico.es/estaticos/rss/dinero'), + (u'Ciencias', u'http://www.publico.es/estaticos/rss/ciencias'), + (u'Culturas', u'http://www.publico.es/estaticos/rss/culturas'), + (u'Deportes', u'http://www.publico.es/estaticos/rss/deportes'), + (u'Televisi\xf3n y Gente', u'http://www.publico.es/estaticos/rss/televisionygente'), + (u'Catalu\xf1a', u'http://www.publico.es/estaticos/rss/catalunya'), + (u'Viajes', u'http://www.publico.es/estaticos/rss/viajes')] + + diff --git a/resources/recipes/elpais_impreso.recipe b/resources/recipes/elpais_impreso.recipe index 130013286c..b22a41dcec 100644 --- a/resources/recipes/elpais_impreso.recipe +++ b/resources/recipes/elpais_impreso.recipe @@ -17,7 +17,7 @@ class ElPais_RSS(BasicNewsRecipe): no_stylesheets = True encoding = 'cp1252' use_embedded_content = False - language = 'es_ES' + language = 'es' remove_empty_feeds = True publication_type = 'newspaper' masthead_url = 'http://www.elpais.com/im/tit_logo.gif' @@ -57,14 +57,14 @@ class ElPais_RSS(BasicNewsRecipe): ,(u'Madrid' , u'http://www.elpais.com/rss/feed.html?feedId=1016' ) ,(u'Pais Vasco' , u'http://www.elpais.com/rss/feed.html?feedId=17062') ,(u'Galicia' , u'http://www.elpais.com/rss/feed.html?feedId=17063') - ,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' ) - ,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' ) + ,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' ) + ,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' ) ,(u'Deportes' , u'http://www.elpais.com/rss/feed.html?feedId=1007' ) ,(u'Cultura' , u'http://www.elpais.com/rss/feed.html?feedId=1008' ) ,(u'Cine' , u'http://www.elpais.com/rss/feed.html?feedId=17052') ,(u'Literatura' , u'http://www.elpais.com/rss/feed.html?feedId=17053') ,(u'Musica' , u'http://www.elpais.com/rss/feed.html?feedId=17051') - ,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060') + ,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060') ,(u'Tecnologia' , u'http://www.elpais.com/rss/feed.html?feedId=1005' ) ,(u'Economia' , u'http://www.elpais.com/rss/feed.html?feedId=1006' ) ,(u'Ciencia' , u'http://www.elpais.com/rss/feed.html?feedId=17068') From 823cdcc4373bc523a0ba584e0eb82febb7d1f231 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 07:27:08 -0700 Subject: [PATCH 11/26] ... --- src/calibre/manual/conversion.rst | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 3a7ae16598..a5aad9b450 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -533,17 +533,22 @@ PDF documents are one of the worst formats to convert from. They are a fixed pag Meaning, it is very difficult to determine where one paragraph ends and another begins. |app| will try to unwrap paragraphs using a configurable, :guilabel:`Line Un-Wrapping Factor`. This is a scale used to determine the length at which a line should be unwrapped. Valid values are a decimal -between 0 and 1. The default is 0.5, this is the median line length. Lower this value to include more -text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under PDF Input. +between 0 and 1. The default is 0.45, just under the median line length. Lower this value to include more +text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`. Also, they often have headers and footers as part of the document that will become included with the text. Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not removed from the text it can throw off the paragraph unwrapping. -Some limitations of PDF input is complex, multi-column, and image based documents are not supported. -Extraction of vector images and tables from within the document is also not supported. Some PDFs use special glyphs to -represent double ll or doubfle ff or fi,etc. Conversion of these may or may not work depending on jusy how they are -represented internally in the PDF. +Some limitations of PDF input are: + + * Complex, multi-column, and image based documents are not supported. + * Extraction of vector images and tables from within the document is also not supported. + * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF. + * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. + +To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an +output ranging anywhere from decent to unusable, depending on the input PDF. Comic Book Collections ~~~~~~~~~~~~~~~~~~~~~~~~~ From 8ac2dd0a65776aafcb8132aca5f256c9fcb4acd4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 07:46:55 -0700 Subject: [PATCH 12/26] Email settings: Before displaying the email test dialog warn the user that it will expose their email password --- src/calibre/gui2/wizard/send_email.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/wizard/send_email.py b/src/calibre/gui2/wizard/send_email.py index b9b65dc940..5785f52276 100644 --- a/src/calibre/gui2/wizard/send_email.py +++ b/src/calibre/gui2/wizard/send_email.py @@ -16,7 +16,7 @@ from PyQt4.Qt import QWidget, pyqtSignal, QDialog, Qt, QLabel, \ from calibre.gui2.wizard.send_email_ui import Ui_Form from calibre.utils.smtp import config as smtp_prefs from calibre.gui2.dialogs.test_email_ui import Ui_Dialog as TE_Dialog -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, question_dialog class TestEmail(QDialog, TE_Dialog): @@ -92,7 +92,10 @@ class SendEmail(QWidget, Ui_Form): pa = self.preferred_to_address() to_set = pa is not None if self.set_email_settings(to_set): - TestEmail(pa, self).exec_() + if question_dialog(self, _('OK to proceed?'), + _('This will display your email password on the screen' + '. Is it OK to proceed?'), show_copy_button=False): + TestEmail(pa, self).exec_() def test_email_settings(self, to): opts = smtp_prefs().parse() From 4abfeed6accf655c8f61f05bc7027de6b8ecad27 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 08:29:40 -0700 Subject: [PATCH 13/26] ... --- src/calibre/manual/conversion.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index a5aad9b450..4b2b169d72 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -538,7 +538,8 @@ text in the unwrapping. Increase to include less. You can adjust this value in t Also, they often have headers and footers as part of the document that will become included with the text. Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not -removed from the text it can throw off the paragraph unwrapping. +removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read +:ref:`regexptutorial`. Some limitations of PDF input are: From 8f7d8c1022533ef5fd07f6162b03672cadafcb92 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 10:17:36 -0700 Subject: [PATCH 14/26] Fix #8241 (Updated recipe for Exiled online) --- resources/images/news/exiled.png | Bin 0 -> 1352 bytes resources/recipes/exiled.recipe | 37 ++++++++++++++++--------------- 2 files changed, 19 insertions(+), 18 deletions(-) create mode 100644 resources/images/news/exiled.png diff --git a/resources/images/news/exiled.png b/resources/images/news/exiled.png new file mode 100644 index 0000000000000000000000000000000000000000..c233aaf132d07704afa1841db6ddb886d0a76593 GIT binary patch literal 1352 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|y3`~05O_bCBhr!<(hh#fNN8|y#?;i*Ri#D1>tiAti^fsE znLO;1CULxY`}VbRR`%=HuivV!x<22w`rn-M!iFDA61jr27g^-x=H{xNO3Aw~C#krp zgF&;?=kwp+{PG<|JAeM%xic+kU8hI(J6{1FzIm3;Dyjbd`TFeUJW7XGTwN8q$S1An z(^=_7)5MNzPLAYcySQ}tZA zWfzx)-@<}75(&3$ZCjZHj%#b%*A+OeooyC7+iF4SB!(cblT&RfKYh^*Sf6fG|Cg1e zF_Vi?rBk4damTxTwXv}dBDH@k%-Jo@?JQ0|&(ycMT>t<7FPU|69zts$&(OX<=ZAU2 z7SVd8kX08soEOCGtNVLORH4h~vcV2zCx*Zy;X&)m%ii7LwD7C0w&TC@`tjrSafOPW zmwf)DKi$vxKtAl%43%xMdd%BmHDqkLJWkH(P`a$bB*pvg%&%WpS6^Sp=ykMEhS~Gb zoq$!G4lDn&D!gq8@?`q)@Amfh_ZY8cMFYWSttBb1D|g0ZYd`qCKHlH2h_Ud`eFmO; zNr|4i+`RpsS_&>fUY<*)q#Ek-HlP2T=iU9DopIv(`@2kyoR);Fe6s7!{8wJ9IHRXM z-jTQTkjKu{m`ba--eZp*P1kRDEMu{u-u{6FGxKtP=Im^7Jsw^|OWtN{C6;*3$t4vP z6%rGScxwO0TWr|&ta7UqbHJ*Z7M@;9=i5h$`^^PLUNbwtEYqWk%$K*#6+FX$fZ?`X z(URS>za|+^D5BImr+W_=WE&hJM@y{FGAe*aro`0-=k zgMnUNOB;H7J3Bi$4}9KqGiMs(hCG7_@%zi(ym-NIsGzE)an3BS zr$0VAF?K9!ZT(ppcFBMxqqMZ7q(o%H+Z?m`>{(lV)Fv~UndzL~#Be{|b8Se`&K$9b zD65(syQMjrT3cILAG{71Q@DQi`R&^?g_m)%G$zhS3!ZejV%yx)Pd^`c^XBjG@^=hM z9Cr13VvK92Pv2jkZ@2zsHQR^Z)Akj48BX7x{~id`KmASG7@@LB=ltsH+ly@HPAFOG zzfr&?@XFP#>HHV(uW?q{)1Y8p3M`3KOI#yLQW8s2t&)pUffR$0fuW(U0T5Y)7#dg^ znp&Bf>l&C_85nFmXte}ILvDUbW?ChR1`{g-3oAnlhz6(dLvMi^7(8A5T-G@yGywq1 Cm_*M2 literal 0 HcmV?d00001 diff --git a/resources/recipes/exiled.recipe b/resources/recipes/exiled.recipe index 72dfc02e8b..6a65e22edc 100644 --- a/resources/recipes/exiled.recipe +++ b/resources/recipes/exiled.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2011, Darko Miletic ' ''' exiledonline.com ''' @@ -20,18 +18,20 @@ class Exiled(BasicNewsRecipe): use_embedded_content = False encoding = 'utf8' remove_javascript = True - language = 'en' - - cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' - - html2lrf_options = [ - '--comment' , description - , '--base-font-size', '10' - , '--category' , category - , '--publisher' , publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + language = 'en' + publication_type = 'newsblog' + masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif} + #topslug{font-size: xx-large; font-weight: bold; color: red} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } keep_only_tags = [dict(name='div', attrs={'id':'main'})] @@ -47,12 +47,13 @@ class Exiled(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - mtag = '\n\n\n' - soup.head.insert(0,mtag) + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) return soup def get_article_url(self, article): raw = article.get('link', None) final = raw + 'all/1/' return final - From 611c0373573a6ad74cc0ba5b4d4b8a5788760651 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 10:52:29 -0700 Subject: [PATCH 15/26] ... --- src/calibre/ebooks/conversion/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 97aaa653a9..ae111355e4 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -563,8 +563,8 @@ class HTMLPreProcessor(object): html = html.replace(start, '') # convert ellipsis to entities to prevent wrapping - html = re.sub('(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html) + html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html) # convert double dashes to em-dash - html = re.sub('\s--\s', u'\u2014', html) + html = re.sub(r'\s--\s', u'\u2014', html) return substitute_entites(html) From 843e1f2068cf1707f7f002be7c05c37282e9fa36 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 13:17:32 -0500 Subject: [PATCH 16/26] TXT Input: Basic heuristic processor. --- src/calibre/ebooks/txt/heuristicprocessor.py | 88 ++++++++++++++++++++ src/calibre/ebooks/txt/input.py | 12 ++- src/calibre/ebooks/txt/processor.py | 23 ++++- 3 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 src/calibre/ebooks/txt/heuristicprocessor.py diff --git a/src/calibre/ebooks/txt/heuristicprocessor.py b/src/calibre/ebooks/txt/heuristicprocessor.py new file mode 100644 index 0000000000..cbfa33a96a --- /dev/null +++ b/src/calibre/ebooks/txt/heuristicprocessor.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +import re +import string + +from calibre import prepare_string_for_xml +from calibre.ebooks.unidecode.unidecoder import Unidecoder + +class TXTHeuristicProcessor(object): + + def __init__(self): + self.ITALICIZE_WORDS = [ + 'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.', + 'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetra', 'n.b.', 'N.b.', + 'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.', + 'Mlle.', 'Mons.', 'PS.', 'PPS.', + ] + self.ITALICIZE_STYLE_PATS = [ + r'(?msu)_(?P.+?)_', + r'(?msu)/(?P.+?)/', + r'(?msu)~~(?P.+?)~~', + r'(?msu)\*(?P.+?)\*', + r'(?msu)~(?P.+?)~', + r'(?msu)_/(?P.+?)/_', + r'(?msu)_\*(?P.+?)\*_', + r'(?msu)\*/(?P.+?)/\*', + r'(?msu)_\*/(?P.+?)/\*_', + r'(?msu)/:(?P.+?):/', + r'(?msu)\|:(?P.+?):\|', + ] + + def del_maketrans(self, deletechars): + return dict([(ord(x), u'') for x in deletechars]) + + def is_heading(self, line): + if not line: + return False + if len(line) > 40: + return False + + line = Unidecoder().decode(line) + + # punctuation. + if line.translate(self.del_maketrans(string.letters + string.digits + ' :-')): + return False + + # All upper case. + #if line.isupper(): + # return True + # Roman numerals. + #if not line.translate(self.del_maketrans('IVXYCivxyc ')): + # return True + + return True + + def process_paragraph(self, paragraph): + for word in self.ITALICIZE_WORDS: + paragraph = paragraph.replace(word, '%s' % word) + for pat in self.ITALICIZE_STYLE_PATS: + paragraph = re.sub(pat, lambda mo: '%s' % mo.group('words'), paragraph) + return paragraph + + def convert(self, txt, title='', epub_split_size_kb=0): + from calibre.ebooks.txt.processor import clean_txt, split_txt, HTML_TEMPLATE + txt = clean_txt(txt) + txt = split_txt(txt, epub_split_size_kb) + + processed = [] + last_was_heading = False + for line in txt.split('\n\n'): + if self.is_heading(line): + if not last_was_heading: + processed.append(u'

%s

' % prepare_string_for_xml(line.replace('\n', ' '))) + else: + processed.append(u'

%s

' % prepare_string_for_xml(line.replace('\n', ' '))) + last_was_heading = True + else: + processed.append(u'

%s

' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' ')))) + last_was_heading = False + + txt = u'\n'.join(processed) + txt = re.sub('[ ]{2,}', ' ', txt) + + return HTML_TEMPLATE % (title, txt) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 47e92a45a9..fd805f8ce8 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -10,7 +10,8 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ - preserve_spaces, detect_paragraph_type, detect_formatting_type + preserve_spaces, detect_paragraph_type, detect_formatting_type, \ + convert_heuristic from calibre import _ent_pat, xml_entity_to_unicode class TXTInput(InputFormatPlugin): @@ -31,7 +32,7 @@ class TXTInput(InputFormatPlugin): '* print: Assume every line starting with 2+ spaces or a tab ' 'starts a paragraph.')), OptionRecommendation(name='formatting_type', recommended_value='auto', - choices=['auto', 'none', 'markdown'], + choices=['auto', 'none', 'heuristic', 'markdown'], help=_('Formatting used within the document.' '* auto: Try to auto detect the document formatting.\n' '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n' @@ -96,7 +97,12 @@ class TXTInput(InputFormatPlugin): txt = separate_paragraphs_print_formatted(txt) flow_size = getattr(options, 'flow_size', 0) - html = convert_basic(txt, epub_split_size_kb=flow_size) + + if options.formatting_type == 'heuristic': + html = convert_heuristic(txt, epub_split_size_kb=flow_size) + else: + html = convert_basic(txt, epub_split_size_kb=flow_size) + from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index f6d628e7c5..79eee79c29 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -9,6 +9,7 @@ import os, re from calibre import prepare_string_for_xml, isbytestring from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -16,7 +17,7 @@ __docformat__ = 'restructuredtext en' HTML_TEMPLATE = u'%s\n%s\n' -def convert_basic(txt, title='', epub_split_size_kb=0): +def clean_txt(txt): if isbytestring(txt): txt = txt.decode('utf-8', 'replace') # Strip whitespace from the beginning and end of the line. Also replace @@ -35,6 +36,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0): chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) illegal_chars = re.compile(u'|'.join(map(unichr, chars))) txt = illegal_chars.sub('', txt) + + return txt + +def split_txt(txt, epub_split_size_kb=0): #Takes care if there is no point to split if epub_split_size_kb > 0: if isinstance(txt, unicode): @@ -49,6 +54,12 @@ def convert_basic(txt, title='', epub_split_size_kb=0): if isbytestring(txt): txt = txt.decode('utf-8') + return txt + +def convert_basic(txt, title='', epub_split_size_kb=0): + txt = clean_txt(txt) + txt = split_txt(txt, epub_split_size_kb) + lines = [] # Split into paragraphs based on having a blank line between text. for line in txt.split('\n\n'): @@ -57,6 +68,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0): return HTML_TEMPLATE % (title, u'\n'.join(lines)) +def convert_heuristic(txt, title='', epub_split_size_kb=0): + tp = TXTHeuristicProcessor() + return tp.convert(txt, title, epub_split_size_kb) + def convert_markdown(txt, title='', disable_toc=False): md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], @@ -111,12 +126,12 @@ def detect_paragraph_type(txt): # Check for print tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) - if tab_line_count / float(txt_line_count) >= .25: + if tab_line_count / float(txt_line_count) >= .15: return 'print' # Check for block empty_line_count = len(re.findall('(?mu)^\s*$', txt)) - if empty_line_count / float(txt_line_count) >= .25: + if empty_line_count / float(txt_line_count) >= .15: return 'block' # Nothing else matched to assume single. @@ -143,4 +158,4 @@ def detect_formatting_type(txt): if txt.count('\\'+c) > 10: return 'markdown' - return 'none' + return 'heuristic' From c5a679a437c7ab52bb0320c83eef4535c151feb5 Mon Sep 17 00:00:00 2001 From: GRiker Date: Sat, 8 Jan 2011 11:42:31 -0700 Subject: [PATCH 17/26] GwR patch for bogus cover data --- src/calibre/library/catalog.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 0a5d5284e2..1af9c3aa58 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -21,7 +21,7 @@ from calibre.utils.config import config_dir from calibre.utils.date import format_date, isoformat, now as nowf from calibre.utils.logging import default_log as log from calibre.utils.zipfile import ZipFile, ZipInfo -from calibre.utils.magick.draw import thumbnail +from calibre.utils.magick.draw import identify_data, thumbnail FIELDS = ['all', 'author_sort', 'authors', 'comments', 'cover', 'formats', 'id', 'isbn', 'ondevice', 'pubdate', 'publisher', 'rating', @@ -2861,11 +2861,19 @@ class EPUB_MOBI(CatalogPlugin): self.updateProgressMicroStep("Thumbnail %d of %d" % \ (i,len(self.booksByTitle)), i/float(len(self.booksByTitle))) - # Check to see if source file exists - if 'cover' in title and os.path.isfile(title['cover']): + + # Confirm existence, integrity of cover image + valid_cover = True + try: + _w, _h, _fmt = identify_data(open(title['cover'], 'rb').read()) + except: + valid_cover = False + + if valid_cover: # Add the thumb spec to thumbs[] thumbs.append("thumbnail_%d.jpg" % int(title['id'])) - + self.generateThumbnail(title, image_dir, thumb_file) + ''' # Check to see if thumbnail exists thumb_fp = "%s/thumbnail_%d.jpg" % (image_dir,int(title['id'])) thumb_file = 'thumbnail_%d.jpg' % int(title['id']) @@ -2879,6 +2887,7 @@ class EPUB_MOBI(CatalogPlugin): self.generateThumbnail(title, image_dir, thumb_file) else: self.generateThumbnail(title, image_dir, thumb_file) + ''' else: # Use default cover if False and self.verbose: From 8a44bf07edf1b3282a65edd044421b963d4dd794 Mon Sep 17 00:00:00 2001 From: GRiker Date: Sat, 8 Jan 2011 11:48:41 -0700 Subject: [PATCH 18/26] GwR patch for bogus cover data --- src/calibre/library/catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 1af9c3aa58..df1341fc38 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -2862,7 +2862,7 @@ class EPUB_MOBI(CatalogPlugin): (i,len(self.booksByTitle)), i/float(len(self.booksByTitle))) - # Confirm existence, integrity of cover image + thumb_file = 'thumbnail_%d.jpg' % int(title['id']) valid_cover = True try: _w, _h, _fmt = identify_data(open(title['cover'], 'rb').read()) From f593b2163154bcd61e21b0e06f8cf0e29514af86 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 13:53:32 -0500 Subject: [PATCH 19/26] TXT Input: Tweak Heuristic italicizing. --- src/calibre/ebooks/txt/heuristicprocessor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/txt/heuristicprocessor.py b/src/calibre/ebooks/txt/heuristicprocessor.py index cbfa33a96a..b0bbd49961 100644 --- a/src/calibre/ebooks/txt/heuristicprocessor.py +++ b/src/calibre/ebooks/txt/heuristicprocessor.py @@ -21,15 +21,15 @@ class TXTHeuristicProcessor(object): ] self.ITALICIZE_STYLE_PATS = [ r'(?msu)_(?P.+?)_', - r'(?msu)/(?P.+?)/', + r'(?msu)/(?P[^<>]+?)/', r'(?msu)~~(?P.+?)~~', r'(?msu)\*(?P.+?)\*', r'(?msu)~(?P.+?)~', - r'(?msu)_/(?P.+?)/_', + r'(?msu)_/(?P[^<>]+?)/_', r'(?msu)_\*(?P.+?)\*_', - r'(?msu)\*/(?P.+?)/\*', - r'(?msu)_\*/(?P.+?)/\*_', - r'(?msu)/:(?P.+?):/', + r'(?msu)\*/(?P[^<>]+?)/\*', + r'(?msu)_\*/(?P[^<>]+?)/\*_', + r'(?msu)/:(?P[^<>]+?):/', r'(?msu)\|:(?P.+?):\|', ] @@ -84,5 +84,6 @@ class TXTHeuristicProcessor(object): txt = u'\n'.join(processed) txt = re.sub('[ ]{2,}', ' ', txt) + print txt return HTML_TEMPLATE % (title, txt) From c8f18ff02e32f56220f83872f4def00cca58e73d Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 15:49:10 -0500 Subject: [PATCH 20/26] TXT Input: Heuristic processor, use PreProcessor to mark chapter headings. --- src/calibre/ebooks/txt/heuristicprocessor.py | 43 ++++---------------- src/calibre/ebooks/txt/processor.py | 3 -- 2 files changed, 7 insertions(+), 39 deletions(-) diff --git a/src/calibre/ebooks/txt/heuristicprocessor.py b/src/calibre/ebooks/txt/heuristicprocessor.py index b0bbd49961..c4489badc5 100644 --- a/src/calibre/ebooks/txt/heuristicprocessor.py +++ b/src/calibre/ebooks/txt/heuristicprocessor.py @@ -33,30 +33,6 @@ class TXTHeuristicProcessor(object): r'(?msu)\|:(?P.+?):\|', ] - def del_maketrans(self, deletechars): - return dict([(ord(x), u'') for x in deletechars]) - - def is_heading(self, line): - if not line: - return False - if len(line) > 40: - return False - - line = Unidecoder().decode(line) - - # punctuation. - if line.translate(self.del_maketrans(string.letters + string.digits + ' :-')): - return False - - # All upper case. - #if line.isupper(): - # return True - # Roman numerals. - #if not line.translate(self.del_maketrans('IVXYCivxyc ')): - # return True - - return True - def process_paragraph(self, paragraph): for word in self.ITALICIZE_WORDS: paragraph = paragraph.replace(word, '%s' % word) @@ -70,20 +46,15 @@ class TXTHeuristicProcessor(object): txt = split_txt(txt, epub_split_size_kb) processed = [] - last_was_heading = False for line in txt.split('\n\n'): - if self.is_heading(line): - if not last_was_heading: - processed.append(u'

%s

' % prepare_string_for_xml(line.replace('\n', ' '))) - else: - processed.append(u'

%s

' % prepare_string_for_xml(line.replace('\n', ' '))) - last_was_heading = True - else: - processed.append(u'

%s

' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' ')))) - last_was_heading = False + processed.append(u'

%s

' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' ')))) txt = u'\n'.join(processed) txt = re.sub('[ ]{2,}', ' ', txt) - print txt + html = HTML_TEMPLATE % (title, txt) + + from calibre.ebooks.conversion.utils import PreProcessor + pp = PreProcessor() + html = pp.markup_chapters(html, pp.get_word_count(html), False) - return HTML_TEMPLATE % (title, txt) + return html diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 1e67caccc6..9dc29e45dd 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -9,11 +9,8 @@ import os, re from calibre import prepare_string_for_xml, isbytestring from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator -<<<<<<< TREE from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor -======= from calibre.ebooks.conversion.preprocess import DocAnalysis ->>>>>>> MERGE-SOURCE __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' From bd14205637cbf71fe4aad655de50f4f0fea98a60 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 15:53:51 -0500 Subject: [PATCH 21/26] ... --- src/calibre/ebooks/txt/heuristicprocessor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/txt/heuristicprocessor.py b/src/calibre/ebooks/txt/heuristicprocessor.py index c4489badc5..c4c6a56123 100644 --- a/src/calibre/ebooks/txt/heuristicprocessor.py +++ b/src/calibre/ebooks/txt/heuristicprocessor.py @@ -5,7 +5,6 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' import re -import string from calibre import prepare_string_for_xml from calibre.ebooks.unidecode.unidecoder import Unidecoder @@ -48,7 +47,7 @@ class TXTHeuristicProcessor(object): processed = [] for line in txt.split('\n\n'): processed.append(u'

%s

' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' ')))) - + txt = u'\n'.join(processed) txt = re.sub('[ ]{2,}', ' ', txt) html = HTML_TEMPLATE % (title, txt) From 831ee1fc81b50d9ccd7c771161db322715fa3092 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 16:53:54 -0500 Subject: [PATCH 22/26] TXT Input: Add documentation for the heuristic formatting option to the option help. --- src/calibre/ebooks/txt/input.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 5060e124ff..c8ce389574 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -37,6 +37,8 @@ class TXTInput(InputFormatPlugin): help=_('Formatting used within the document.' '* auto: Try to auto detect the document formatting.\n' '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n' + '* heuristic: Try to detect formatting for elements such as chapter headings ' + 'and style the elements appropriately.\n' '* markdown: Run the input though the markdown pre-processor. ' 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), OptionRecommendation(name='preserve_spaces', recommended_value=False, From 12cbaa2304db610ccf101bbd4abe13ff58f68fee Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 8 Jan 2011 17:26:32 -0500 Subject: [PATCH 23/26] TXT Input: Make formatting_type options easier to understand. --- src/calibre/ebooks/txt/input.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index c8ce389574..e782cd0cd9 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -35,11 +35,12 @@ class TXTInput(InputFormatPlugin): OptionRecommendation(name='formatting_type', recommended_value='auto', choices=['auto', 'none', 'heuristic', 'markdown'], help=_('Formatting used within the document.' - '* auto: Try to auto detect the document formatting.\n' - '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n' - '* heuristic: Try to detect formatting for elements such as chapter headings ' - 'and style the elements appropriately.\n' - '* markdown: Run the input though the markdown pre-processor. ' + '* auto: Automatically decide which formatting processor to use.\n' + '* none: Do not process the document formatting. Everything is a ' + 'paragraph and no styling is applied.\n' + '* heuristic: Process using heuristics to determine formatting such ' + 'as chapter headings and italic text.\n' + '* markdown: Processing using markdown formatting. ' 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), OptionRecommendation(name='preserve_spaces', recommended_value=False, help=_('Normally extra spaces are condensed into a single space. ' From 8bcdb0fed79c7b0f0b9fbb80d9b3a5b0c683c5d0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 19:17:39 -0700 Subject: [PATCH 24/26] Fix #8007 (Search performance on multiple words) --- src/calibre/library/caches.py | 28 +++++--- src/calibre/utils/search_query_parser.py | 83 ++++++++++++++++++------ 2 files changed, 81 insertions(+), 30 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 980c9f1fa9..7caeeabda8 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{ self.search_restriction = '' self.field_metadata = field_metadata self.all_search_locations = field_metadata.get_search_terms() - SearchQueryParser.__init__(self, self.all_search_locations) + SearchQueryParser.__init__(self, self.all_search_locations, optimize=True) self.build_date_relop_dict() self.build_numeric_relop_dict() @@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, relop_le] } - def get_dates_matches(self, location, query): + def get_dates_matches(self, location, query, candidates): matches = set([]) if len(query) < 2: return matches @@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{ loc = self.field_metadata[location]['rec_index'] if query == 'false': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is None or item[loc] <= UNDEFINED_DATE: matches.add(item[0]) return matches if query == 'true': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is not None and item[loc] > UNDEFINED_DATE: matches.add(item[0]) @@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{ field_count = query.count('-') + 1 else: field_count = query.count('/') + 1 - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None or item[loc] is None: continue if relop(item[loc], qd, field_count): matches.add(item[0]) @@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, lambda r, q: r <= q] } - def get_numeric_matches(self, location, query, val_func = None): + def get_numeric_matches(self, location, query, candidates, val_func = None): matches = set([]) if len(query) == 0: return matches @@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{ except: return matches - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue v = val_func(item) @@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{ matches.add(item[0]) return matches - def get_matches(self, location, query, allow_recursion=True): + def get_matches(self, location, query, allow_recursion=True, candidates=None): matches = set([]) + if candidates is None: + candidates = self.universal_set() + if len(candidates) == 0: + return matches + if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases @@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{ else: q = query - for item in self._data: + for id_ in candidates: + item = self._data[id] if item is None: continue if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index db7c7bde5f..447ff8cd14 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -118,8 +118,9 @@ class SearchQueryParser(object): failed.append(test[0]) return failed - def __init__(self, locations, test=False): + def __init__(self, locations, test=False, optimize=False): self._tests_failed = False + self.optimize = optimize # Define a token standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'), locations) @@ -182,38 +183,52 @@ class SearchQueryParser(object): # empty the list of searches used for recursion testing self.recurse_level = 0 self.searches_seen = set([]) - return self._parse(query) + candidates = self.universal_set() + return self._parse(query, candidates) # this parse is used internally because it doesn't clear the # recursive search test list. However, we permit seeing the # same search a few times because the search might appear within # another search. - def _parse(self, query): + def _parse(self, query, candidates=None): self.recurse_level += 1 res = self._parser.parseString(query)[0] - t = self.evaluate(res) + if candidates is None: + candidates = self.universal_set() + t = self.evaluate(res, candidates) self.recurse_level -= 1 return t def method(self, group_name): return getattr(self, 'evaluate_'+group_name) - def evaluate(self, parse_result): - return self.method(parse_result.getName())(parse_result) + def evaluate(self, parse_result, candidates): + return self.method(parse_result.getName())(parse_result, candidates) - def evaluate_and(self, argument): - return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + def evaluate_and(self, argument, candidates): + # RHS checks only those items matched by LHS + # returns result of RHS check: RHmatches(LHmatches(c)) + # return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.intersection(self.evaluate(argument[1], l)) - def evaluate_or(self, argument): - return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + def evaluate_or(self, argument, candidates): + # RHS checks only those elements not matched by LHS + # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c)) + # return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.union(self.evaluate(argument[1], candidates.difference(l))) - def evaluate_not(self, argument): - return self.universal_set().difference(self.evaluate(argument[0])) + def evaluate_not(self, argument, candidates): + # unary op checks only candidates. Result: list of items matching + # returns: c - matches(c) + # return self.universal_set().difference(self.evaluate(argument[0])) + return candidates.difference(self.evaluate(argument[0], candidates)) - def evaluate_parenthesis(self, argument): - return self.evaluate(argument[0]) + def evaluate_parenthesis(self, argument, candidates): + return self.evaluate(argument[0], candidates) - def evaluate_token(self, argument): + def evaluate_token(self, argument, candidates): location = argument[0] query = argument[1] if location.lower() == 'search': @@ -224,17 +239,27 @@ class SearchQueryParser(object): raise ParseException(query, len(query), 'undefined saved search', self) if self.recurse_level > 5: self.searches_seen.add(query) - return self._parse(saved_searches().lookup(query)) + return self._parse(saved_searches().lookup(query), candidates) except: # convert all exceptions (e.g., missing key) to a parse error raise ParseException(query, len(query), 'undefined saved search', self) - return self.get_matches(location, query) + return self._get_matches(location, query, candidates) - def get_matches(self, location, query): + def _get_matches(self, location, query, candidates): + if self.optimize: + return self.get_matches(location, query, candidates=candidates) + else: + return self.get_matches(location, query) + + def get_matches(self, location, query, candidates=None): ''' Should return the set of matches for :param:'location` and :param:`query`. + The search must be performed over all entries is :param:`candidates` is + None otherwise only over the items in candidates. + :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`. :param:`query` is a string literal. + :param: None or a subset of the set returned by :meth:`universal_set`. ''' return set([]) @@ -561,7 +586,7 @@ class Tester(SearchQueryParser): def universal_set(self): return self._universal_set - def get_matches(self, location, query): + def get_matches(self, location, query, candidates=None): location = location.lower() if location in self.fields.keys(): getter = operator.itemgetter(self.fields[location]) @@ -573,8 +598,13 @@ class Tester(SearchQueryParser): if not query: return set([]) query = query.lower() - return set(key for key, val in self.texts.items() \ - if query and query in getattr(getter(val), 'lower', lambda : '')()) + if candidates: + return set(key for key, val in self.texts.items() \ + if key in candidates and query and query + in getattr(getter(val), 'lower', lambda : '')()) + else: + return set(key for key, val in self.texts.items() \ + if query and query in getattr(getter(val), 'lower', lambda : '')()) @@ -592,6 +622,7 @@ class Tester(SearchQueryParser): def main(args=sys.argv): + print 'testing unoptimized' tester = Tester(['authors', 'author', 'series', 'formats', 'format', 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', @@ -601,6 +632,16 @@ def main(args=sys.argv): print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' return 1 + print '\n\ntesting optimized' + tester = Tester(['authors', 'author', 'series', 'formats', 'format', + 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', + 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', + 'all', 'search'], test=True, optimize=True) + failed = tester.run_tests() + if tester._tests_failed or failed: + print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' + return 1 + return 0 if __name__ == '__main__': From 5b8ea643214d2db4665614bc1046f8aa3db5c9cb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Jan 2011 19:19:28 -0700 Subject: [PATCH 25/26] And another droid --- src/calibre/devices/android/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index b7e2f0fd2e..73c930778e 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -29,7 +29,7 @@ class ANDROID(USBMS): # Motorola 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], - 0x4286 : [0x216] }, + 0x4286 : [0x216], 0x42b3 : [0x216] }, # Sony Ericsson 0xfce : { 0xd12e : [0x0100]}, From f5a6195ceb697e6638bb7460ab9b2f1949a5342b Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 9 Jan 2011 00:02:24 -0500 Subject: [PATCH 26/26] TXT Output: Clean up and produce consistant output. Spacing around headings. Headings are not indented when using the remove paragraph spacing option. --- src/calibre/ebooks/txt/txtml.py | 73 +++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index a3b4ed7afe..786f50824d 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en' Transform OEB content into plain text ''' -import os import re from lxml import etree @@ -33,6 +32,15 @@ BLOCK_STYLES = [ 'block', ] +HEADING_TAGS = [ + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', +] + SPACE_TAGS = [ 'td', 'br', @@ -47,6 +55,10 @@ class TXTMLizer(object): self.log.info('Converting XHTML to TXT...') self.oeb_book = oeb_book self.opts = opts + self.toc_ids = [] + self.last_was_heading = False + + self.create_flat_toc(self.oeb_book.toc) return self.mlize_spine() @@ -58,8 +70,11 @@ class TXTMLizer(object): stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = self.remove_newlines(content) - output += self.dump_text(etree.fromstring(content), stylizer) - output = self.cleanup_text(u''.join(output)) + output += self.dump_text(etree.fromstring(content), stylizer, item) + output += '\n\n\n\n\n\n' + output = u''.join(output) + output = u'\n'.join(l.rstrip() for l in output.splitlines()) + output = self.cleanup_text(output) return output @@ -68,6 +83,8 @@ class TXTMLizer(object): text = text.replace('\r\n', ' ') text = text.replace('\n', ' ') text = text.replace('\r', ' ') + # Condense redundant spaces created by replacing newlines with spaces. + text = re.sub(r'[ ]{2,}', ' ', text) return text @@ -80,6 +97,14 @@ class TXTMLizer(object): toc.append(u'* %s\n\n' % item.title) return ''.join(toc) + def create_flat_toc(self, nodes): + ''' + Turns a hierarchical list of TOC href's into a flat list. + ''' + for item in nodes: + self.toc_ids.append(item.href) + self.create_flat_toc(item.nodes) + def cleanup_text(self, text): self.log.debug('\tClean up text...') # Replace bad characters. @@ -92,7 +117,7 @@ class TXTMLizer(object): text = text.replace('\f+', ' ') # Single line paragraph. - text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) + text = re.sub('(?<=.)\n(?=.)', ' ', text) # Remove multiple spaces. text = re.sub('[ ]{2,}', ' ', text) @@ -101,13 +126,19 @@ class TXTMLizer(object): text = re.sub('\n[ ]+\n', '\n\n', text) if self.opts.remove_paragraph_spacing: text = re.sub('\n{2,}', '\n', text) - text = re.sub('(?imu)^(?=.)', '\t', text) + text = re.sub(r'(?msu)^(?P[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text) + text = re.sub(r'(?msu)(?P[^\n])\n+(?P[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text) else: - text = re.sub('\n{3,}', '\n\n', text) + text = re.sub('\n{7,}', '\n\n\n\n\n\n', text) # Replace spaces at the beginning and end of lines + # We don't replace tabs because those are only added + # when remove paragraph spacing is enabled. text = re.sub('(?imu)^[ ]+', '', text) text = re.sub('(?imu)[ ]+$', '', text) + + # Remove empty space and newlines at the beginning of the document. + text = re.sub(r'(?u)^[ \n]+', '', text) if self.opts.max_line_length: max_length = self.opts.max_line_length @@ -145,13 +176,11 @@ class TXTMLizer(object): return text - def dump_text(self, elem, stylizer, end=''): + def dump_text(self, elem, stylizer, page): ''' @elem: The element in the etree that we are working on. @stylizer: The style information attached to the element. - @end: The last two characters of the text from the previous element. - This is used to determine if a blank line is needed when starting - a new block element. + @page: OEB page used to determine absolute urls. ''' if not isinstance(elem.tag, basestring) \ @@ -170,13 +199,22 @@ class TXTMLizer(object): return [''] tag = barename(elem.tag) + tag_id = elem.attrib.get('id', None) in_block = False + in_heading = False + + # Are we in a heading? + # This can either be a heading tag or a TOC item. + if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids: + in_heading = True + if not self.last_was_heading: + text.append('\n\n\n\n\n\n') # Are we in a paragraph block? if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: + if self.opts.remove_paragraph_spacing and not in_heading: + text.append(u'\t') in_block = True - if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: - text.append(u'\n\n') if tag in SPACE_TAGS: text.append(u' ') @@ -185,14 +223,17 @@ class TXTMLizer(object): if hasattr(elem, 'text') and elem.text: text.append(elem.text) + # Recurse down into tags within the tag we are in. for item in elem: - en = u'' - if len(text) >= 2: - en = text[-1][-2:] - text += self.dump_text(item, stylizer, en) + text += self.dump_text(item, stylizer, page) if in_block: text.append(u'\n\n') + if in_heading: + text.append(u'\n') + self.last_was_heading = True + else: + self.last_was_heading = False if hasattr(elem, 'tail') and elem.tail: text.append(elem.tail)