diff --git a/resources/calibre-portable.bat b/resources/calibre-portable.bat index fb3444e34e..473cdc4236 100644 --- a/resources/calibre-portable.bat +++ b/resources/calibre-portable.bat @@ -1,6 +1,4 @@ @echo OFF -REM CalibreRun.bat -REM ~~~~~~~~~~~~~~ REM Batch File to start a Calibre configuration on Windows REM giving explicit control of the location of: REM - Calibe Program Files @@ -24,7 +22,10 @@ REM ------------------------------------- REM Set up Calibre Config folder REM ------------------------------------- -If EXIST CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig +IF EXIST CalibreConfig ( + SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig + ECHO CONFIG=%cd%\CalibreConfig +) REM -------------------------------------------------------------- @@ -38,24 +39,53 @@ REM drive letter of the USB stick. REM Comment out any of the following that are not to be used REM -------------------------------------------------------------- -SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary -IF EXIST CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary -IF EXIST CalibreBooks SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks +IF EXIST U:\eBooks\CalibreLibrary ( + SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary + ECHO LIBRARY=U:\eBOOKS\CalibreLibrary +) +IF EXIST CalibreLibrary ( + SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary + ECHO LIBRARY=%cd%\CalibreLibrary +) +IF EXIST CalibreBooks ( + SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks + ECHO LIBRARY=%cd%\CalibreBooks +) REM -------------------------------------------------------------- -REM Specify Location of metadata database (optional) +REM Specify Location of metadata database (optional) REM REM Location where the metadata.db file is located. If not set REM the same location as Books files will be assumed. This. REM options is used to get better performance when the Library is REM on a (slow) network drive. Putting the metadata.db file -REM locally gives a big performance improvement. +REM locally makes gives a big performance improvement. +REM +REM NOTE. If you use this option, then the ability to switch +REM libraries within Calibre will be disabled. Therefore +REM you do not want to set it if the metadata.db file +REM is at the same location as the book files. REM -------------------------------------------------------------- -IF EXIST CalibreBooks SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db -IF EXIST CalibreMetadata SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db - +IF EXIST CalibreBooks ( + IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreBooks" ( + SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db + ECHO DATABASE=%cd%\CalibreBooks\metadata.db + ECHO ' + ECHO ***CAUTION*** Library Switching will be disabled + ECHO ' + ) +) +IF EXIST CalibreMetadata ( + IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" ( + SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db + ECHO DATABASE=%cd%\CalibreMetadata\metadata.db + ECHO ' + ECHO ***CAUTION*** Library Switching will be disabled + ECHO ' + ) +) REM -------------------------------------------------------------- REM Specify Location of source (optional) @@ -63,13 +93,20 @@ REM REM It is easy to run Calibre from source REM Just set the environment variable to where the source is located REM When running from source the GUI will have a '*' after the version. +REM number that is displayed at the bottom of the Calibre main screen. REM -------------------------------------------------------------- -IF EXIST Calibre\src SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src - +IF EXIST Calibre\src ( + SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src + ECHO SOURCE=%cd%\Calibre\src +) +IF EXIST D:\Calibre\Calibre\src ( + SET CALIBRE_DEVELOP_FROM=D:\Calibre\Calibre\src + ECHO SOURCE=D:\Calibre\Calibre\src +) REM -------------------------------------------------------------- -REM Specify Location of calibre binaries (optinal) +REM Specify Location of calibre binaries (optional) REM REM To avoid needing Calibre to be set in the search path, ensure REM that Calibre Program Files is current directory when starting. @@ -78,21 +115,15 @@ REM This folder can be populated by cpying the Calibre2 folder from REM an existing isntallation or by isntalling direct to here. REM -------------------------------------------------------------- -IF EXIST Calibre2 CD Calibre2 - - -REM -------------------------------------------- -REM Display settings that will be used -REM -------------------------------------------- - -echo PROGRAMS=%cd% -echo SOURCE=%CALIBRE_DEVELOP_FROM% -echo CONFIG=%CALIBRE_CONFIG_DIRECTORY% -echo LIBRARY=%CALIBRE_LIBRARY_DIRECTORY% -echo DATABASE=%CALIBRE_OVERRIDE_DATABASE_PATH% +IF EXIST Calibre2 ( + Calibre2 CD Calibre2 + ECHO PROGRAMS=%cd% +) +REM ---------------------------------------------------------- REM The following gives a chance to check the settings before REM starting Calibre. It can be commented out if not wanted. +REM ---------------------------------------------------------- echo "Press CTRL-C if you do not want to continue" pause @@ -111,4 +142,4 @@ REM Use with /WAIT to wait until Calibre completes to run a task on exit REM -------------------------------------------------------- echo "Starting up Calibre" -START /belownormal Calibre --with-library %CALIBRE_LIBRARY_DIRECTORY% +START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%" diff --git a/resources/catalog/section_list_templates.py b/resources/catalog/section_list_templates.py index de73147fcf..7f92fad6ac 100644 --- a/resources/catalog/section_list_templates.py +++ b/resources/catalog/section_list_templates.py @@ -6,6 +6,8 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' ''' + These templates control the content of titles displayed in the various sections + Available fields: {title} Title of the book {series} Series name @@ -14,6 +16,7 @@ __docformat__ = 'restructuredtext en' {rating_parens} Rating, in parentheses {pubyear} Year the book was published {pubyear_parens} Year the book was published, in parentheses + ''' # Books by Author by_authors_normal_title_template = '{title} {pubyear_parens}' diff --git a/resources/images/heuristics.png b/resources/images/heuristics.png new file mode 100644 index 0000000000..92c53ae8ff Binary files /dev/null and b/resources/images/heuristics.png differ diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe index 2e358060b8..4da3384093 100644 --- a/resources/recipes/el_pais.recipe +++ b/resources/recipes/el_pais.recipe @@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en' elpais.es ''' +from time import strftime + from calibre.web.feeds.news import BasicNewsRecipe class ElPais(BasicNewsRecipe): __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells' description = 'Main daily newspaper from Spain' - cover_url = 'http://www.elpais.com/im/tit_logo_global.gif' title = u'El Pais' publisher = u'Ediciones El Pa\xeds SL' category = 'News, politics, culture, economy, general interest' @@ -62,6 +63,6 @@ class ElPais(BasicNewsRecipe): (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058') ] -def print_version(self, url): - url = url+'?print=1' - return url + def get_cover_url(self): + return 'http://img5.kiosko.net/' + strftime("%Y/%m/%d") + '/es/elpais.750.jpg' + diff --git a/resources/recipes/ihned.recipe b/resources/recipes/ihned.recipe index daf63e19ed..a74f9e5649 100644 --- a/resources/recipes/ihned.recipe +++ b/resources/recipes/ihned.recipe @@ -5,7 +5,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class IHNed(BasicNewsRecipe): - stahnout_vsechny = False + stahnout_vsechny = True #True = stahuje vsechny z homepage #False = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 8f92852237..2424113e31 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' @@ -23,6 +24,10 @@ class NYTimes(BasicNewsRecipe): webEdition = False oldest_article = 7 + # replace paid Kindle Version: the name will be changed to "The New York Times" to cause + # previous paid versions of the new york times to best sent to the back issues folder on the kindle + replaceKindleVersion = False + # includeSections: List of sections to include. If empty, all sections found will be included. # Otherwise, only the sections named will be included. For example, # @@ -94,6 +99,10 @@ class NYTimes(BasicNewsRecipe): title='New York Times (Web)' description = 'New York Times on the Web' needs_subscription = True + elif replaceKindleVersion: + title='The New York Times' + description = 'Today\'s New York Times' + needs_subscription = True else: title='New York Times' description = 'Today\'s New York Times' @@ -150,6 +159,11 @@ class NYTimes(BasicNewsRecipe): 'relatedSearchesModule', 'side_tool', 'singleAd', + 'entry entry-utility', #added for DealBook + 'entry-tags', #added for DealBook + 'footer promos clearfix', #added for DealBook + 'footer links clearfix', #added for DealBook + 'inlineImage module', #added for DealBook re.compile('^subNavigation'), re.compile('^leaderboard'), re.compile('^module'), @@ -183,6 +197,9 @@ class NYTimes(BasicNewsRecipe): 'side_index', 'side_tool', 'toolsRight', + 'skybox', #added for DealBook + 'TopAd', #added for DealBook + 'related-content', #added for DealBook ]), dict(name=['script', 'noscript', 'style','form','hr'])] no_stylesheets = True @@ -237,7 +254,7 @@ class NYTimes(BasicNewsRecipe): def exclude_url(self,url): if not url.startswith("http"): return True - if not url.endswith(".html"): + if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook return True if 'nytimes.com' not in url: return True @@ -560,7 +577,6 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - if self.webEdition & (self.oldest_article>0): date_tag = soup.find(True,attrs={'class': ['dateline','date']}) if date_tag: @@ -583,128 +599,168 @@ class NYTimes(BasicNewsRecipe): img_div = soup.find('div','inlineImage module') if img_div: img_div.extract() + + return self.strip_anchors(soup) def postprocess_html(self,soup, True): - try: - if self.one_picture_per_article: - # Remove all images after first - largeImg = soup.find(True, {'class':'articleSpanImage'}) - inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) - if largeImg: - for inlineImg in inlineImgs: - inlineImg.extract() - else: - if inlineImgs: - firstImg = inlineImgs[0] - for inlineImg in inlineImgs[1:]: - inlineImg.extract() - # Move firstImg before article body - cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) - if cgFirst: - # Strip all sibling NavigableStrings: noise - navstrings = cgFirst.findAll(text=True, recursive=False) - [ns.extract() for ns in navstrings] - headline_found = False - tag = cgFirst.find(True) - insertLoc = 0 - while True: - insertLoc += 1 - if hasattr(tag,'class') and tag['class'] == 'articleHeadline': - headline_found = True - break - tag = tag.nextSibling - if not tag: - headline_found = False - break - if headline_found: - cgFirst.insert(insertLoc,firstImg) - else: - self.log(">>> No class:'columnGroup first' found <<<") - except: - self.log("ERROR: One picture per article in postprocess_html") - - try: - # Change captions to italic - for caption in soup.findAll(True, {'class':'caption'}) : - if caption and len(caption) > 0: - cTag = Tag(soup, "p", [("class", "caption")]) - c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() - mp_off = c.find("More Photos") - if mp_off >= 0: - c = c[:mp_off] - cTag.insert(0, c) - caption.replaceWith(cTag) - except: - self.log("ERROR: Problem in change captions to italic") - - try: - # Change to

- h1 = soup.find('h1') - if h1: - headline = h1.find("nyt_headline") - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - h1.replaceWith(tag) - else: - # Blog entry - replace headline, remove
tags - headline = soup.find('title') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - soup.insert(0, tag) - hrs = soup.findAll('hr') - for hr in hrs: - hr.extract() - except: - self.log("ERROR: Problem in Change to

") - try: - # Change

to

- used in editorial blogs - masthead = soup.find("h1") - if masthead: - # Nuke the href - if masthead.a: - del(masthead.a['href']) - tag = Tag(soup, "h3") - tag.insert(0, self.fixChars(masthead.contents[0])) - masthead.replaceWith(tag) - except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + try: + if self.one_picture_per_article: + # Remove all images after first + largeImg = soup.find(True, {'class':'articleSpanImage'}) + inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) + if largeImg: + for inlineImg in inlineImgs: + inlineImg.extract() + else: + if inlineImgs: + firstImg = inlineImgs[0] + for inlineImg in inlineImgs[1:]: + inlineImg.extract() + # Move firstImg before article body + cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) + if cgFirst: + # Strip all sibling NavigableStrings: noise + navstrings = cgFirst.findAll(text=True, recursive=False) + [ns.extract() for ns in navstrings] + headline_found = False + tag = cgFirst.find(True) + insertLoc = 0 + while True: + insertLoc += 1 + if hasattr(tag,'class') and tag['class'] == 'articleHeadline': + headline_found = True + break + tag = tag.nextSibling + if not tag: + headline_found = False + break + if headline_found: + cgFirst.insert(insertLoc,firstImg) + else: + self.log(">>> No class:'columnGroup first' found <<<") + except: + self.log("ERROR: One picture per article in postprocess_html") - try: - # Change to - for subhead in soup.findAll(True, {'class':'bold'}) : - if subhead.contents: - bTag = Tag(soup, "b") - bTag.insert(0, subhead.contents[0]) - subhead.replaceWith(bTag) - except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") - - try: - divTag = soup.find('div',attrs={'id':'articleBody'}) - if divTag: - divTag['class'] = divTag['id'] - except: - self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") - - try: - # Add class="authorId" to
so we can format with CSS - divTag = soup.find('div',attrs={'id':'authorId'}) - if divTag and divTag.contents[0]: - tag = Tag(soup, "p") - tag['class'] = "authorId" - tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], - use_alt=False))) - divTag.replaceWith(tag) - except: - self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") - - return soup + try: + # Change captions to italic + for caption in soup.findAll(True, {'class':'caption'}) : + if caption and len(caption) > 0: + cTag = Tag(soup, "p", [("class", "caption")]) + c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() + mp_off = c.find("More Photos") + if mp_off >= 0: + c = c[:mp_off] + cTag.insert(0, c) + caption.replaceWith(cTag) + except: + self.log("ERROR: Problem in change captions to italic") + + try: + # Change to

+ h1 = soup.find('h1') + blogheadline = str(h1) #added for dealbook + if h1: + headline = h1.find("nyt_headline") + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.contents[0])) + h1.replaceWith(tag) + elif blogheadline.find('entry-title'):#added for dealbook + tag = Tag(soup, "h2")#added for dealbook + tag['class'] = "headline"#added for dealbook + tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook + h1.replaceWith(tag)#added for dealbook + + else: + # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 + headline = soup.find('title') + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.renderContents())) + soup.insert(0, tag) + hrs = soup.findAll('hr') + for hr in hrs: + hr.extract() + except: + self.log("ERROR: Problem in Change to

") + + try: + #if this is from a blog (dealbook, fix the byline format + bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) + if bylineauthor: + tag = Tag(soup, "h6") + tag['class'] = "byline" + tag.insert(0, self.fixChars(bylineauthor.renderContents())) + bylineauthor.replaceWith(tag) + except: + self.log("ERROR: fixing byline author format") + + try: + #if this is a blog (dealbook) fix the credit style for the pictures + blogcredit = soup.find('div',attrs={'class':'credit'}) + if blogcredit: + tag = Tag(soup, "h6") + tag['class'] = "credit" + tag.insert(0, self.fixChars(blogcredit.renderContents())) + blogcredit.replaceWith(tag) + except: + self.log("ERROR: fixing credit format") + + + try: + # Change

to

- used in editorial blogs + masthead = soup.find("h1") + if masthead: + # Nuke the href + if masthead.a: + del(masthead.a['href']) + tag = Tag(soup, "h3") + tag.insert(0, self.fixChars(masthead.contents[0])) + masthead.replaceWith(tag) + except: + self.log("ERROR: Problem in Change

to

- used in editorial blogs") + + try: + # Change to + for subhead in soup.findAll(True, {'class':'bold'}) : + if subhead.contents: + bTag = Tag(soup, "b") + bTag.insert(0, subhead.contents[0]) + subhead.replaceWith(bTag) + except: + self.log("ERROR: Problem in Change

to

- used in editorial blogs") + try: + #remove the update tag + blogupdated = soup.find('span', {'class':'update'}) + if blogupdated: + blogupdated.replaceWith("") + except: + self.log("ERROR: Removing strong tag") + + try: + divTag = soup.find('div',attrs={'id':'articleBody'}) + if divTag: + divTag['class'] = divTag['id'] + except: + self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") + + try: + # Add class="authorId" to
so we can format with CSS + divTag = soup.find('div',attrs={'id':'authorId'}) + if divTag and divTag.contents[0]: + tag = Tag(soup, "p") + tag['class'] = "authorId" + tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], + use_alt=False))) + divTag.replaceWith(tag) + except: + self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") + + return soup def populate_article_metadata(self, article, soup, first): shortparagraph = "" try: diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 5a82882dfa..277070020b 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -36,7 +36,7 @@ class ANDROID(USBMS): # Google 0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226, - 0x227], 0x4e21: [0x0100, 0x226, 0x227]}, + 0x227], 0x4e21: [0x0100, 0x226, 0x227], 0xb058: [0x0222]}, # Samsung 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], @@ -64,12 +64,13 @@ class ANDROID(USBMS): EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN) VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', - 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS'] + 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', + 'TELECHIP'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', - 'SGH-T849', '_MB300', 'A70S'] + 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S'] diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index aaf948f25e..9f8dbcb379 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -193,6 +193,9 @@ class LUMIREAD(USBMS): THUMBNAIL_HEIGHT = 200 + VENDOR_NAME = 'ACER' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'LUMIREAD_600' + def upload_cover(self, path, filename, metadata, filepath): if metadata.thumbnail and metadata.thumbnail[-1]: cfilepath = filepath.replace('/', os.sep) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index 874fbe4b10..e9329c39c6 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -229,7 +229,11 @@ class PRS505(USBMS): debug_print('PRS505: not uploading cover') return debug_print('PRS505: uploading cover') - self._upload_cover(path, filename, metadata, filepath) + try: + self._upload_cover(path, filename, metadata, filepath) + except: + import traceback + traceback.print_exc() def _upload_cover(self, path, filename, metadata, filepath): if metadata.thumbnail and metadata.thumbnail[-1]: diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 6fdf7ddc68..04ee892c19 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -483,29 +483,29 @@ OptionRecommendation(name='pubdate', OptionRecommendation(name='timestamp', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book timestamp (used by the date column in calibre).')), - + OptionRecommendation(name='enable_heuristics', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Enable heurisic processing. This option must be set for any ' + help=_('Enable heuristic processing. This option must be set for any ' 'heuristic processing to take place.')), OptionRecommendation(name='markup_chapter_headings', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Detect unformatted chapter headings and sub headings. Change ' + help=_('Detect unformatted chapter headings and sub headings. Change ' 'them to h2 and h3 tags. This setting will not create a TOC, ' 'but can be used in conjunction with structure detection to create ' 'one.')), - + OptionRecommendation(name='italicize_common_cases', recommended_value=False, level=OptionRecommendation.LOW, help=_('Look for common words and patterns that denote ' 'italics and italicize them.')), - + OptionRecommendation(name='fix_indents', recommended_value=False, level=OptionRecommendation.LOW, help=_('Turn indentation created from multiple non-breaking space entities ' 'into CSS indents.')), - + OptionRecommendation(name='html_unwrap_factor', recommended_value=0.40, level=OptionRecommendation.LOW, help=_('Scale used to determine the length at which a line should ' @@ -513,31 +513,31 @@ OptionRecommendation(name='html_unwrap_factor', 'default is 0.4, just below the median line length. If only a ' 'few lines in the document require unwrapping this value should ' 'be reduced')), - + OptionRecommendation(name='unwrap_lines', recommended_value=False, level=OptionRecommendation.LOW, help=_('Unwrap lines using punctuation and other formatting clues.')), - + OptionRecommendation(name='delete_blank_paragraphs', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove empty paragraphs from the document when they exist between ' 'every other paragraph')), - + OptionRecommendation(name='format_scene_breaks', recommended_value=False, level=OptionRecommendation.LOW, - help=_('left aligned scene break markers are center aligned. ' + help=_('Left aligned scene break markers are center aligned. ' 'Replace soft scene breaks that use multiple blank lines with' 'horizontal rules.')), OptionRecommendation(name='dehyphenate', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Analyses hyphenated words throughout the document. The ' + help=_('Analyze hyphenated words throughout the document. The ' 'document itself is used as a dictionary to determine whether hyphens ' 'should be retained or removed.')), OptionRecommendation(name='renumber_headings', recommended_value=False, level=OptionRecommendation.LOW, - help=_('Looks for occurences of sequential

or

tags. ' + help=_('Looks for occurrences of sequential

or

tags. ' 'The tags are renumbered to prevent splitting in the middle ' 'of chapter headings.')), @@ -545,10 +545,10 @@ OptionRecommendation(name='sr1_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr1-replace.')), - + OptionRecommendation(name='sr1_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr1-search.')), + help=_('Replacement to replace the text found with sr1-search.')), OptionRecommendation(name='sr2_search', recommended_value='', level=OptionRecommendation.LOW, @@ -557,7 +557,7 @@ OptionRecommendation(name='sr2_search', OptionRecommendation(name='sr2_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr2-search.')), + help=_('Replacement to replace the text found with sr2-search.')), OptionRecommendation(name='sr3_search', recommended_value='', level=OptionRecommendation.LOW, @@ -566,7 +566,7 @@ OptionRecommendation(name='sr3_search', OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters to replace the text found with sr3-search.')), + help=_('Replacement to replace the text found with sr3-search.')), ] # }}} diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index bbd71ede3a..087d8ed486 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' import functools, re -from calibre import entity_to_unicode +from calibre import entity_to_unicode, as_unicode XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') SVG_NS = 'http://www.w3.org/2000/svg' @@ -463,7 +463,8 @@ class HTMLPreProcessor(object): replace_txt = '' rules.insert(0, (search_re, replace_txt)) except Exception as e: - self.log.error('Failed to parse %s regexp because %s' % (search, e)) + self.log.error('Failed to parse %r regexp because %s' % + (search, as_unicode(e))) end_rules = [] # delete soft hyphens - moved here so it's executed after header/footer removal diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 4663eeccdf..aabb1b8bc4 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -54,7 +54,7 @@ class HeuristicProcessor(object): return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): - chap = match.group('chap') + #chap = match.group('chap') title = match.group('title') if not title: self.chapters_no_title = self.chapters_no_title + 1 @@ -102,8 +102,7 @@ class HeuristicProcessor(object): min_lns = tot_ln_fds * percent #self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") - if min_lns > tot_htm_ends: - return True + return min_lns > tot_htm_ends def dump(self, raw, where): import os @@ -136,7 +135,7 @@ class HeuristicProcessor(object): 'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.', 'Mlle.', 'Mons.', 'PS.', 'PPS.', ] - + ITALICIZE_STYLE_PATS = [ r'(?msu)(?<=\s)_(?P\S[^_]{0,40}?\S)?_(?=\s)', r'(?msu)(?<=\s)/(?P\S[^/]{0,40}?\S)?/(?=\s)', @@ -150,7 +149,7 @@ class HeuristicProcessor(object): r'(?msu)(?<=\s)/:(?P\S[^:/]{0,40}?\S)?:/(?=\s)', r'(?msu)(?<=\s)\|:(?P\S[^:\|]{0,40}?\S)?:\|(?=\s)', ] - + for word in ITALICIZE_WORDS: html = html.replace(word, '%s' % word) @@ -242,7 +241,7 @@ class HeuristicProcessor(object): lp_title = default_title else: lp_title = simple_title - + if ignorecase: arg_ignorecase = r'(?i)' else: @@ -250,7 +249,7 @@ class HeuristicProcessor(object): if title_req: lp_opt_title_open = '' - lp_opt_title_close = '' + lp_opt_title_close = '' else: lp_opt_title_open = opt_title_open lp_opt_title_close = opt_title_close @@ -399,7 +398,7 @@ class HeuristicProcessor(object): if len(lines) > 1: self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") - + if float(len(blanklines)) / float(len(lines)) > 0.40: return True else: @@ -460,7 +459,7 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'markup_chapter_headings', False): html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs) - if getattr(self.extra_opts, 'italicize_common_cases', False): + if getattr(self.extra_opts, 'italicize_common_cases', False): html = self.markup_italicis(html) # If more than 40% of the lines are empty paragraphs and the user has enabled delete @@ -487,7 +486,7 @@ class HeuristicProcessor(object): unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") - + ###### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 080faffae6..1599d3c896 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -21,7 +21,7 @@ from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.chardet import xml_to_unicode from calibre.customize.conversion import OptionRecommendation from calibre.constants import islinux, isfreebsd, iswindows -from calibre import unicode_path +from calibre import unicode_path, as_unicode from calibre.utils.localization import get_lang from calibre.utils.filenames import ascii_filename @@ -111,7 +111,7 @@ class HTMLFile(object): with open(self.path, 'rb') as f: src = f.read() except IOError, err: - msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err)) + msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) if level == 0: raise IOError(msg) raise IgnoreFile(msg, err.errno) diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index ff8955939e..ff901c3715 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.utils import HeuristicProcessor - class LITInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 8188027e01..4ce3618441 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,7 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 08b4369078..299c77af10 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -199,8 +199,8 @@ class EbookIterator(object): not hasattr(self.pathtoopf, 'manifest'): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) - self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts, - plumber.input_plugin) + self.pathtoopf = create_oebbook(self.log, self.pathtoopf, + plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index dd14de2d20..5b99b19e74 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + self.log = log log.debug('Reading text from file...') txt = stream.read() @@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin): log.debug('Auto detected paragraph type as %s' % options.paragraph_type) # Dehyphenate - dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None)) + dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt,'txt', length) # We don't check for block because the processor assumes block. diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 4d0d176fe4..29b3d899bc 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -51,12 +51,12 @@ class TXTOutput(OutputFormatPlugin): recommended_value=False, level=OptionRecommendation.LOW, help=_('Do not remove links within the document. This is only ' \ 'useful when paired with the markdown-format option because' \ - 'links are always removed with plain text output.')), + ' links are always removed with plain text output.')), OptionRecommendation(name='keep_image_references', recommended_value=False, level=OptionRecommendation.LOW, help=_('Do not remove image references within the document. This is only ' \ 'useful when paired with the markdown-format option because' \ - 'image references are always removed with plain text output.')), + ' image references are always removed with plain text output.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/gui2/convert/bulk.py b/src/calibre/gui2/convert/bulk.py index b97ab1a2dc..591ac92b2b 100644 --- a/src/calibre/gui2/convert/bulk.py +++ b/src/calibre/gui2/convert/bulk.py @@ -94,7 +94,7 @@ class BulkConfig(Config): if not c: break self.stack.removeWidget(c) - widgets = [lf, hw, sr, ps, sd, toc] + widgets = [lf, hw, ps, sd, toc, sr] if output_widget is not None: widgets.append(output_widget) for w in widgets: diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py index 6739c199b7..e788888257 100644 --- a/src/calibre/gui2/convert/heuristics.py +++ b/src/calibre/gui2/convert/heuristics.py @@ -11,9 +11,10 @@ from calibre.gui2.convert import Widget class HeuristicsWidget(Widget, Ui_Form): - TITLE = _('Heuristic Processing') + TITLE = _('Heuristic\nProcessing') HELP = _('Modify the document text and structure using common patterns.') COMMIT_NAME = 'heuristics' + ICON = I('heuristics.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -25,44 +26,29 @@ class HeuristicsWidget(Widget, Ui_Form): ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) - + self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics) self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap) - + self.enable_heuristics(self.opt_enable_heuristics.checkState()) def break_cycles(self): Widget.break_cycles(self) - + try: self.opt_enable_heuristics.stateChanged.disconnect() self.opt_unwrap_lines.stateChanged.disconnect() except: pass - + def set_value_handler(self, g, val): if val is None and g is self.opt_html_unwrap_factor: g.setValue(0.0) return True def enable_heuristics(self, state): - if state == Qt.Checked: - state = True - else: - state = False - self.opt_markup_chapter_headings.setEnabled(state) - self.opt_italicize_common_cases.setEnabled(state) - self.opt_fix_indents.setEnabled(state) - self.opt_delete_blank_paragraphs.setEnabled(state) - self.opt_format_scene_breaks.setEnabled(state) - self.opt_dehyphenate.setEnabled(state) - self.opt_renumber_headings.setEnabled(state) - - self.opt_unwrap_lines.setEnabled(state) - if state and self.opt_unwrap_lines.checkState() == Qt.Checked: - self.opt_html_unwrap_factor.setEnabled(True) - else: - self.opt_html_unwrap_factor.setEnabled(False) + state = state == Qt.Checked + self.heuristic_options.setEnabled(state) def enable_unwrap(self, state): if state == Qt.Checked: diff --git a/src/calibre/gui2/convert/heuristics.ui b/src/calibre/gui2/convert/heuristics.ui index 8048bef204..4358512996 100644 --- a/src/calibre/gui2/convert/heuristics.ui +++ b/src/calibre/gui2/convert/heuristics.ui @@ -6,7 +6,7 @@ 0 0 - 938 + 724 470 @@ -15,114 +15,160 @@ - + - &Preprocess input file to possibly improve structure detection + <b>Heuristic processing</b> means that calibre will scan your book for common patterns and fix them. As the name implies, this involves guesswork, which means that it could end up worsening the result of a conversion, if calibre guesses wrong. Therefore, it is disabled by default. Often, if a conversion does not turn out as you expect, turning on heuristics can improve matters. + + + true - + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 20 + 15 + + + + + + + + Enable &heuristic processing + + + + + Heuristic Processing - - + + Unwrap lines - - - - Line &un-wrap factor during preprocess: - - - opt_html_unwrap_factor - - + + + + + + Qt::Horizontal + + + QSizePolicy::Fixed + + + + 40 + 20 + + + + + + + + Line &un-wrap factor : + + + opt_html_unwrap_factor + + + + + + + + + + 1.000000000000000 + + + 0.050000000000000 + + + 0.400000000000000 + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + - - - - - - - 1.000000000000000 - - - 0.050000000000000 - - - 0.400000000000000 - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - + Detect and markup unformatted chapter headings and sub headings - + Renumber sequences of <h1> or <h2> tags to prevent splitting - + Delete blank lines between paragraphs - + Ensure scene breaks are consistently formatted - + Remove unnecessary hyphens - + Italicize common words and patterns - + Replace entity indents with CSS indents - + Qt::Vertical diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index c85e4fe414..04a337a4fc 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -12,9 +12,10 @@ from calibre.gui2 import error_dialog class SearchAndReplaceWidget(Widget, Ui_Form): - TITLE = _('Search &\nReplace') + TITLE = _(u'Search\u00a0&\nReplace') HELP = _('Modify the document text and structure using user defined patterns.') COMMIT_NAME = 'search_and_replace' + ICON = I('search.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -24,19 +25,19 @@ class SearchAndReplaceWidget(Widget, Ui_Form): ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) - self.opt_sr1_search.set_msg(_('Search Regular Expression')) + self.opt_sr1_search.set_msg(_('&Search Regular Expression')) self.opt_sr1_search.set_book_id(book_id) self.opt_sr1_search.set_db(db) - self.opt_sr2_search.set_msg(_('Search Regular Expression')) + self.opt_sr2_search.set_msg(_('&Search Regular Expression')) self.opt_sr2_search.set_book_id(book_id) self.opt_sr2_search.set_db(db) - self.opt_sr3_search.set_msg(_('Search Regular Expression')) + self.opt_sr3_search.set_msg(_('&Search Regular Expression')) self.opt_sr3_search.set_book_id(book_id) self.opt_sr3_search.set_db(db) - + def break_cycles(self): Widget.break_cycles(self) - + self.opt_sr1_search.break_cycles() self.opt_sr2_search.break_cycles() self.opt_sr3_search.break_cycles() @@ -49,6 +50,6 @@ class SearchAndReplaceWidget(Widget, Ui_Form): re.compile(pat) except Exception, err: error_dialog(self, _('Invalid regular expression'), - _('Invalid regular expression: %s')%err).exec_() + _('Invalid regular expression: %s')%err, show=True) return False return True diff --git a/src/calibre/gui2/convert/search_and_replace.ui b/src/calibre/gui2/convert/search_and_replace.ui index e0e9570f8c..b7447f8feb 100644 --- a/src/calibre/gui2/convert/search_and_replace.ui +++ b/src/calibre/gui2/convert/search_and_replace.ui @@ -6,8 +6,8 @@ 0 0 - 198 - 350 + 468 + 451 @@ -23,7 +23,7 @@ QLayout::SetDefaultConstraint - + @@ -32,7 +32,7 @@ - 1. + First expression @@ -57,7 +57,10 @@ - Replacement Text + &Replacement Text + + + opt_sr1_replace @@ -74,7 +77,7 @@ - + @@ -83,7 +86,7 @@ - 2. + Second Expression @@ -108,7 +111,10 @@ - Replacement Text + &Replacement Text + + + opt_sr2_replace @@ -125,7 +131,7 @@ - + @@ -134,7 +140,7 @@ - 3. + Third expression @@ -159,7 +165,10 @@ - Replacement Text + &Replacement Text + + + opt_sr3_replace @@ -176,6 +185,19 @@ + + + + <p>Search and replace uses <i>regular expressions</i>. See the <a href="http://calibre-ebook.com/user_manual/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard buttons below will allow you to test your regular expression against the current input document. + + + true + + + true + + + diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py index 8826d398f5..da58de545b 100644 --- a/src/calibre/gui2/convert/single.py +++ b/src/calibre/gui2/convert/single.py @@ -207,7 +207,7 @@ class Config(ResizableDialog, Ui_Dialog): if not c: break self.stack.removeWidget(c) - widgets = [self.mw, lf, hw, sr, ps, sd, toc] + widgets = [self.mw, lf, hw, ps, sd, toc, sr] if input_widget is not None: widgets.append(input_widget) if output_widget is not None: diff --git a/src/calibre/gui2/convert/single.ui b/src/calibre/gui2/convert/single.ui index ede548d8d7..bb447104d8 100644 --- a/src/calibre/gui2/convert/single.ui +++ b/src/calibre/gui2/convert/single.ui @@ -100,7 +100,7 @@ - 20 + 10 true @@ -129,8 +129,8 @@ 0 0 - 805 - 484 + 810 + 494 diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py index 2c64303ee7..d8e2f4f122 100644 --- a/src/calibre/gui2/convert/structure_detection.py +++ b/src/calibre/gui2/convert/structure_detection.py @@ -31,7 +31,7 @@ class StructureDetectionWidget(Widget, Ui_Form): self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):')) self.opt_page_breaks_before.set_msg(_('Insert page breaks before ' '(XPath expression):')) - + def break_cycles(self): Widget.break_cycles(self) diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py index a16dd68014..0e6a6b9574 100644 --- a/src/calibre/gui2/convert/txt_output.py +++ b/src/calibre/gui2/convert/txt_output.py @@ -23,9 +23,9 @@ class PluginWidget(Widget, Ui_Form): ['newline', 'max_line_length', 'force_max_line_length', 'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references', 'txt_output_encoding']) - self.db, self.book_id = db, book_id + self.db, self.book_id = db, book_id for x in get_option('newline').option.choices: - self.opt_newline.addItem(x) + self.opt_newline.addItem(x) self.initialize_options(get_option, get_help, db, book_id) self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format) @@ -33,17 +33,14 @@ class PluginWidget(Widget, Ui_Form): def break_cycles(self): Widget.break_cycles(self) - + try: self.opt_markdown_format.stateChanged.disconnect() except: pass - + def enable_markdown_format(self, state): - if state == Qt.Checked: - state = True - else: - state = False + state = state == Qt.Checked self.opt_keep_links.setEnabled(state) self.opt_keep_image_references.setEnabled(state) - \ No newline at end of file + diff --git a/src/calibre/gui2/convert/xexp_edit.ui b/src/calibre/gui2/convert/xexp_edit.ui index 4b26eb8dcf..18b7c39b52 100644 --- a/src/calibre/gui2/convert/xexp_edit.ui +++ b/src/calibre/gui2/convert/xexp_edit.ui @@ -6,7 +6,7 @@ 0 0 - 434 + 430 74 @@ -59,7 +59,7 @@ ... - + :/images/wizard.png:/images/wizard.png diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 734d8cd56c..28b5e178ac 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \ warning_dialog, info_dialog, choose_dir from calibre.ebooks.metadata import authors_to_string -from calibre import preferred_encoding, prints, force_unicode +from calibre import preferred_encoding, prints, force_unicode, as_unicode from calibre.utils.filenames import ascii_filename from calibre.devices.errors import FreeSpaceError from calibre.devices.apple.driver import ITUNES_ASYNC @@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{ if self._aborted: return self.failed = True - try: - ex = unicode(err) - except: - try: - ex = str(err).decode(preferred_encoding, 'replace') - except: - ex = repr(err) + ex = as_unicode(err) self._details = ex + '\n\n' + \ traceback.format_exc() self.exception = err diff --git a/src/calibre/gui2/device_drivers/configwidget.ui b/src/calibre/gui2/device_drivers/configwidget.ui index f4902a7387..619d7052e8 100644 --- a/src/calibre/gui2/device_drivers/configwidget.ui +++ b/src/calibre/gui2/device_drivers/configwidget.ui @@ -85,6 +85,9 @@ + + If checked, books are placed into sub directories based on their metadata on the device. If unchecked, books are all put into the top level directory. + Use sub directories diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py index ea614aa817..ae9d6e2f71 100644 --- a/src/calibre/gui2/library/delegates.py +++ b/src/calibre/gui2/library/delegates.py @@ -292,7 +292,7 @@ class CcEnumDelegate(QStyledItemDelegate): # {{{ def createEditor(self, parent, option, index): m = index.model() col = m.column_map[index.column()] - editor = QComboBox(parent) + editor = DelegateCB(parent) editor.addItem('') for v in m.custom_columns[col]['display']['enum_values']: editor.addItem(v) @@ -353,6 +353,17 @@ class CcCommentsDelegate(QStyledItemDelegate): # {{{ model.setData(index, QVariant(editor.textbox.html), Qt.EditRole) # }}} +class DelegateCB(QComboBox): # {{{ + + def __init__(self, parent): + QComboBox.__init__(self, parent) + + def event(self, e): + if e.type() == e.ShortcutOverride: + e.accept() + return QComboBox.event(self, e) +# }}} + class CcBoolDelegate(QStyledItemDelegate): # {{{ def __init__(self, parent): ''' @@ -361,7 +372,7 @@ class CcBoolDelegate(QStyledItemDelegate): # {{{ QStyledItemDelegate.__init__(self, parent) def createEditor(self, parent, option, index): - editor = QComboBox(parent) + editor = DelegateCB(parent) items = [_('Y'), _('N'), ' '] icons = [I('ok.png'), I('list_remove.png'), I('blank.png')] if tweaks['bool_custom_columns_are_tristate'] == 'no': diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py index 0a8fc375ea..8de9ee1661 100644 --- a/src/calibre/gui2/preferences/conversion.py +++ b/src/calibre/gui2/preferences/conversion.py @@ -85,8 +85,8 @@ class CommonOptions(Base): def load_conversion_widgets(self): self.conversion_widgets = [LookAndFeelWidget, HeuristicsWidget, - SearchAndReplaceWidget, PageSetupWidget, - StructureDetectionWidget, TOCWidget] + PageSetupWidget, + StructureDetectionWidget, TOCWidget, SearchAndReplaceWidget,] class InputOptions(Base): diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index 9275797cbc..dd1121c725 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -453,10 +453,10 @@ class CompleteLineEdit(EnLineEdit): def update_items_cache(self, complete_items): self.completer.update_items_cache(complete_items) - + def set_separator(self, sep): self.separator = sep - + def set_space_before_sep(self, space_before): self.space_before_sep = space_before @@ -501,7 +501,7 @@ class EnComboBox(QComboBox): def __init__(self, *args): QComboBox.__init__(self, *args) self.setLineEdit(EnLineEdit(self)) - self.setAutoCompletionCaseSensitivity(Qt.CaseSensitive) + self.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive) self.setMinimumContentsLength(20) def text(self): @@ -515,17 +515,17 @@ class EnComboBox(QComboBox): self.setCurrentIndex(idx) class CompleteComboBox(EnComboBox): - + def __init__(self, *args): EnComboBox.__init__(self, *args) self.setLineEdit(CompleteLineEdit(self)) def update_items_cache(self, complete_items): self.lineEdit().update_items_cache(complete_items) - + def set_separator(self, sep): self.lineEdit().set_separator(sep) - + def set_space_before_sep(self, space_before): self.lineEdit().set_space_before_sep(space_before) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 8edf266cfb..ae600a29f9 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -18,7 +18,8 @@ from calibre.ebooks.chardet import substitute_entites from calibre.ebooks.oeb.base import XHTML_NS from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir -from calibre.utils.date import format_date, isoformat, now as nowf +from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf +from calibre.utils.icu import capitalize from calibre.utils.logging import default_log as log from calibre.utils.zipfile import ZipFile, ZipInfo from calibre.utils.magick.draw import thumbnail @@ -1026,17 +1027,12 @@ class EPUB_MOBI(CatalogPlugin): self.__totalSteps += 3 # Load section list templates - templates = ['by_authors_normal_title_template', - 'by_authors_series_title_template', - 'by_titles_normal_title_template', - 'by_titles_series_title_template', - 'by_series_title_template', - 'by_genres_normal_title_template', - 'by_genres_series_title_template', - 'by_recently_added_normal_title_template', - 'by_recently_added_series_title_template', - 'by_month_added_normal_title_template', - 'by_month_added_series_title_template'] + templates = [] + with open(P('catalog/section_list_templates.py'), 'r') as f: + for line in f: + t = re.match("(by_.+_template)",line) + if t: + templates.append(t.group(1)) execfile(P('catalog/section_list_templates.py'), locals()) for t in templates: setattr(self,t,eval(t)) @@ -1440,7 +1436,9 @@ class EPUB_MOBI(CatalogPlugin): # Exit if author matches previous, but author_sort doesn't match if author[0] == current_author[0]: error_msg = _(''' -Inconsistent Author Sort values for Author '{0}' ('{1}' <> '{2}'), unable to build catalog.\n +Inconsistent Author Sort values for Author '{0}': +'{1}' <> '{2}', +unable to build catalog.\n Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) self.opts.log.warn('\n*** Metadata error ***') @@ -1449,17 +1447,13 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) self.error.append('Metadata error') self.error.append(error_msg) return False + current_author = author self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort) -# for book in self.booksByAuthor: -# print '{0:<10} {1:<5} {2:<20} {3:<20} {4:<20} {5:<20}'.format(book['series'], book['series_index'], book['title'], -# book['author'], book['authors'],book['author_sort']) -# print - # Build the unique_authors set from existing data - authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor] + authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor] # authors[] contains a list of all book authors, with multiple entries for multiple books by author # authors[]: (([0]:friendly [1]:sort)) @@ -1565,7 +1559,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_title['rating'] = record['rating'] if record['rating'] else 0 - if re.match('0100-01-01',str(record['pubdate'].date())): + if is_date_undefined(record['pubdate']): this_title['date'] = None else: this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) @@ -2681,8 +2675,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) # Use series, series index if avail else just title #aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) - # Reassert 'date' since this is the result of a new search - if re.match('0100-01-01',str(book['pubdate'].date())): + if is_date_undefined(book['pubdate']): book['date'] = None else: book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple()) @@ -2756,7 +2749,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) this_book = {} this_book['author'] = book['author'] this_book['title'] = book['title'] - this_book['author_sort'] = book['author_sort'].capitalize() + this_book['author_sort'] = capitalize(book['author_sort']) this_book['read'] = book['read'] this_book['tags'] = book['tags'] this_book['id'] = book['id'] @@ -3901,14 +3894,14 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) Sort non-series books before series books ''' if not book['series']: - key = '%s %s' % (book['author_sort'].capitalize(), - book['title_sort'].capitalize()) + key = '%s %s' % (capitalize(book['author_sort']), + capitalize(book['title_sort'])) else: index = book['series_index'] integer = int(index) fraction = index-integer series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) - key = '%s ~%s %s' % (book['author_sort'].capitalize(), + key = '%s ~%s %s' % (capitalize(book['author_sort']), self.generateSortTitle(book['series']), series_index) return key @@ -3919,7 +3912,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) ''' if not book['series']: key = '%s %s' % (self.author_to_author_sort(book['author']), - book['title_sort'].capitalize()) + capitalize(book['title_sort'])) else: index = book['series_index'] integer = int(index) @@ -4313,10 +4306,11 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) formats = ' · '.join(formats) # Date of publication - pubdate = book['date'] - pubmonth, pubyear = pubdate.split() - if pubyear == '101': - pubdate = pubmonth = pubyear = '' + if book['date']: + pubdate = book['date'] + pubmonth, pubyear = pubdate.split() + else: + pubdate = pubyear = pubmonth = '' # Thumb _soup = BeautifulSoup('',selfClosingTags=['img']) @@ -4570,7 +4564,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) if self.letter_or_symbol(word[0]) != word[0]: if word[0] > 'A' or (ord('9') < ord(word[0]) < ord('A')) : translated.append('/') - translated.append(word.capitalize()) + translated.append(capitalize(word)) else: if re.search('[0-9]+',word[0]): diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 2bc5687262..de27a5f5bb 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -266,14 +266,14 @@ from bad formatting. Because these functions rely on common patterns, be aware t option may lead to worse results, so use with care. As an example, several of these options will remove all non-breaking-space entities. -:guilabel:`Preprocess input` - This option activates various activates |app|'s Heuristic Processing stage of the conversion pipeline. +:guilabel:`Enable heuristic processing` + This option activates |app|'s Heuristic Processing stage of the conversion pipeline. This must be enabled in order for various sub-functions to be applied :guilabel:`Unwrap lines` Enabling this option will cause |app| to attempt to detect and correct hard line breaks that exist - within a document using punctuation clues and line length. |app| will first attempt to detect whether - hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The + within a document using punctuation clues and line length. |app| will first attempt to detect whether + hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The line-unwrap factor can be reduced if you want to 'force' |app| to unwrap lines. :guilabel:`Line-unwrap factor` @@ -284,21 +284,21 @@ remove all non-breaking-space entities. :guilabel:`Detect and markup unformatted chapter headings and sub headings` If your document does not have Chapter Markers and titles formatted differently from the rest of the text, - |app| can use this option to attempt detection them and surround them with heading tags. <h2> tags are used - for chapter headings; <h3> tags are used for any titles that are detected. + |app| can use this option to attempt detection them and surround them with heading tags.

tags are used + for chapter headings;

tags are used for any titles that are detected. This function will not create a TOC, but in many cases it will cause |app|'s default chapter detection settings - to correctly detect chapters and build a TOC. Adjust the Xpath under Structure Detection if a TOC is not automatically + to correctly detect chapters and build a TOC. Adjust the XPath under Structure Detection if a TOC is not automatically created. If there are no other headings used in the document then setting "//h:h2" under Structure Detection would be the easiest way to create a TOC for the document. - The inserted headings are not formatted, to apply formatting use the 'extra_css' option under + The inserted headings are not formatted, to apply formatting use the :guilabel:`Extra CSS` option under the Look and Feel conversion settings. For example, to center heading tags, use the following:: h2, h3 { text-align: center } -:guilabel:`Renumber sequences of <h1> or <h2> tags` - Some publishers format chapter headings using multiple <h1> or <h2> tags sequentially. +:guilabel:`Renumber sequences of

or

tags` + Some publishers format chapter headings using multiple

or

tags sequentially. |app|'s default conversion settings will cause such titles to be split into two pieces. This option will re-number the heading tags to prevent splitting. @@ -345,7 +345,7 @@ specifying a replacement expression. The search works by using a python regular expression. All matched text is simply removed from the document or replaced using the replacement pattern. You can learn more about regular expressions and -their syntax at http://docs.python.org/library/re.html. +their syntax at :ref:`regexptutorial`. .. _structure-detection: diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index b473893673..37d18ea329 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -107,10 +107,10 @@ My device is not being detected by |app|? Follow these steps to find the problem: * Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time. - * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `http://calibre-ebook.com/download`_. + * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website `_. * Ensure your operating system is seeing the device. That is, the device should be mounted as a disk that you can access using Windows explorer or whatever the file management program on your computer is * In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled. - * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `http://bugs.calibre-ebook.com`_. + * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker `_. How does |app| manage collections on my SONY reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/manual/regexp.rst b/src/calibre/manual/regexp.rst index 5cd9a8b097..776141b113 100644 --- a/src/calibre/manual/regexp.rst +++ b/src/calibre/manual/regexp.rst @@ -21,7 +21,7 @@ This is, inevitably, going to be somewhat technical- after all, regular expressi Where in |app| can you use regular expressions? --------------------------------------------------- -There are a few places |app| uses regular expressions. There's the header/footer removal in conversion options, metadata detection from filenames in the import settings and, since last version, there's the option to use regular expressions to search and replace in metadata of multiple books. +There are a few places |app| uses regular expressions. There's the Search & Replace in conversion options, metadata detection from filenames in the import settings and Search & Replace when editing the metadata of books in bulk. What on earth *is* a regular expression? ------------------------------------------------ @@ -94,7 +94,7 @@ I think I'm beginning to understand these regular expressions now... how do I us Conversions ^^^^^^^^^^^^^^ -Let's begin with the conversion settings, which is really neat. In the structure detection part, you can input a regexp (short for regular expression) that describes the header or footer string that will be removed during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the header or footer you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would remove were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example:: +Let's begin with the conversion settings, which is really neat. In the Search and Replace part, you can input a regexp (short for regular expression) that describes the string that will be replaced during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the string you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would replace were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example:: Maybe, but the cops feel like you do, Anita. What's one more dead vampire? New laws don't change that.

@@ -104,7 +104,7 @@ Let's begin with the conversion settings, which is really neat. In the structure

It had only been two years since Addison v. Clark. The court case gave us a revised version of what life was -(shamelessly ripped out of `this thread `_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ````, now you have to end with the corresponding closing tag (opening tags are ````, closing tags are ````), which is simply the next ```` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ````, the closing tag using ````, thus we could remove everything between those tags using ``.*?``. But using this expression would be a bad idea, because it removes everything enclosed by - tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the header/footer removal. +(shamelessly ripped out of `this thread `_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ````, now you have to end with the corresponding closing tag (opening tags are ````, closing tags are ````), which is simply the next ```` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ````, the closing tag using ````, thus we could remove everything between those tags using ``.*?``. But using this expression would be a bad idea, because it removes everything enclosed by - tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the removal. Adding books ^^^^^^^^^^^^^^^^ diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index 2551b90788..31c770bea5 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -46,6 +46,17 @@ local_tz = _local_tz = SafeLocalTimeZone() UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz) +def is_date_undefined(qt_or_dt): + d = qt_or_dt + if d is None: + return True + if hasattr(d, 'toString'): + d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz) + return d.year < UNDEFINED_DATE.year or ( + d.year == UNDEFINED_DATE.year and + d.month == UNDEFINED_DATE.month and + d.day == UNDEFINED_DATE.day) + def parse_date(date_string, assume_utc=False, as_utc=True, default=None): ''' Parse a date/time string into a timezone aware datetime object. The timezone diff --git a/src/calibre/utils/smtplib.py b/src/calibre/utils/smtplib.py index d6f3fb0b69..9992039d00 100755 --- a/src/calibre/utils/smtplib.py +++ b/src/calibre/utils/smtplib.py @@ -554,6 +554,8 @@ class SMTP: def encode_cram_md5(challenge, user, password): challenge = base64.decodestring(challenge) + if isinstance(password, unicode): # Added by Kovid, see http://bugs.python.org/issue5285 + password = password.encode('utf-8') response = user + " " + hmac.HMAC(password, challenge).hexdigest() return encode_base64(response, eol="")